PyPI - nexaai - Versions diffs - 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.17__cp310-cp310-macosx_13_0_x86_64.whl - Mend

nexaai 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.17__cp310-cp310-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (14) hide show

nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/mlx_backend/vlm/generate_qwen3_vl.py +71 -38
nexaai/mlx_backend/vlm/interface.py +79 -7
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +7 -7
nexaai/utils/manifest_utils.py +10 -14
nexaai/utils/model_manager.py +28 -26
{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/METADATA +1 -1
{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/RECORD +13 -14
nexaai/utils/avatar_fetcher.py +0 -104
{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/WHEEL +0 -0
{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/top_level.txt +0 -0

nexaai/_stub.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is generated by CMake from _version.py.in
 # Do not modify this file manually - it will be overwritten
-__version__ = "1.0.16-rc13"
+__version__ = "1.0.17"

nexaai/binds/common_bind.cpython-310-darwin.so CHANGED Viewed

Binary file

nexaai/binds/libnexa_bridge.dylib CHANGED Viewed

Binary file

nexaai/mlx_backend/vlm/generate_qwen3_vl.py CHANGED Viewed

@@ -41,7 +41,12 @@ except ImportError:
 from ml import ChatMessage
 from dataclasses import dataclass
 from typing import Any, Generator, List, Optional, Sequence, Tuple, Union
-from .generate import GenerationResult
+from .generate import GenerationResult
+# Custom exception for context length exceeded
+class ContextLengthExceededError(Exception):
+    """Raised when input context length exceeds model's maximum context size"""
+    pass
 @dataclass
 class Qwen3VLBundledModel:
@@ -67,6 +72,7 @@ def load_qwen3_vl(
     Parameters are aligned with .generate.load for compatibility.
     """
     model_path = Path(path_or_repo)
     if not model_path.exists():
         if "/" in path_or_repo:
@@ -154,7 +160,6 @@ def load_qwen3_vl(
     if quantization_bits in [4, 8]:
         nn.quantize(llm_model, bits=quantization_bits, group_size=64,
                     class_predicate=quant_predicate)
-    # For f32 (32-bit), no quantization needed
     llm_model.load_weights(str(llm_weights_path), strict=True)
@@ -166,11 +171,15 @@ def load_qwen3_vl(
 def apply_chat_template_qwen3_vl(messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = False) -> str:
     """Apply chat template: serialize messages with content as a list of typed items."""
     messages_dict = []
-    for msg in messages:
+    for i, msg in enumerate(messages):
         content_items = [{"type": "text", "text": msg.content}]
         messages_dict.append({"role": msg.role, "content": content_items})
-    return json.dumps(messages_dict)
+    result = json.dumps(messages_dict)
+    return result
 def stream_generate_qwen3_vl(
@@ -184,15 +193,22 @@ def stream_generate_qwen3_vl(
 ) -> Generator[Any, None, None]:
     """Stream generation yielding .generate.GenerationResult-compatible chunks."""
-    messages = json.loads(prompt)
+    try:
+        messages = json.loads(prompt)
+    except json.JSONDecodeError as e:
+        raise
     if image is not None:
         image_list = image if isinstance(image, list) else [image]
         pil_images = []
-        for p in image_list:
+        for i, p in enumerate(image_list):
             try:
-                pil_images.append(Image.open(p))
-            except Exception:
+                img = Image.open(p)
+                pil_images.append(img)
+            except Exception as e:
                 continue
         contents = [{"type": "image", "image": img} for img in pil_images]
         if messages:
             if "content" not in messages[-1] or not isinstance(messages[-1]["content"], list):
@@ -201,6 +217,7 @@ def stream_generate_qwen3_vl(
     raw_text, processed_images = processor.messages_to_text(
         messages, add_generation_prompt=True)
     inputs = processor.text_to_input_ids(
         raw_text, images=processed_images, return_tensors="mlx")
@@ -208,10 +225,18 @@ def stream_generate_qwen3_vl(
     input_ids = inputs["input_ids"]
     pixel_values = inputs.get("pixel_values")
     image_grid_thw = inputs.get("image_grid_thw")
+    # Check if input context exceeds KV cache size and raise error
+    max_kv_size = 4096  # This should match the max_kv_size used in make_prompt_cache and nexa_generate_step
+    if input_ids.size > max_kv_size:
+        error_msg = f"Input context length ({input_ids.size} tokens) exceeds maximum supported context size ({max_kv_size} tokens). Please reduce the input length."
+        raise ContextLengthExceededError(error_msg)
     inputs_embeds, deepstack_visual_embeds, visual_pos_masks, cos, sin, rope_deltas = handle_multimodal_embeds(
         model.vision_model, model.llm_model, input_ids, pixel_values, image_grid_thw
     )
     prompt_cache = make_prompt_cache(model.llm_model, max_kv_size=4096)
     tokenizer = processor.tokenizer
@@ -222,37 +247,45 @@ def stream_generate_qwen3_vl(
     gen_count = 0
     tic = time.perf_counter()
-    for token, logprobs in nexa_generate_step(
-        model=model.llm_model,
-        prompt=None,
-        input_embeddings=inputs_embeds,
-        max_tokens=max_tokens,
-        max_kv_size=4096,
-        prompt_cache=prompt_cache,
-        visual_pos_masks=visual_pos_masks,
-        deepstack_visual_embeds=deepstack_visual_embeds,
-        cos=cos,
-        sin=sin,
-        rope_deltas=rope_deltas,
-    ):
-        if token == tokenizer.eos_token_id:
-            break
-        text_piece = tokenizer.decode([token])
-        gen_count += 1
-        yield GenerationResult(
-            text=text_piece,
-            token=token,
-            logprobs=logprobs,
-            prompt_tokens=int(input_ids.size),
-            generation_tokens=gen_count,
-            prompt_tps=float(prompt_tps),
-            generation_tps=float(
-                gen_count / max(1e-6, (time.perf_counter() - tic))),
-            peak_memory=float(mx.get_peak_memory() / 1e9),
-        )
+    try:
+        for token, logprobs in nexa_generate_step(
+            model=model.llm_model,
+            prompt=None,
+            input_embeddings=inputs_embeds,
+            max_tokens=max_tokens,
+            max_kv_size=4096,
+            prompt_cache=prompt_cache,
+            visual_pos_masks=visual_pos_masks,
+            deepstack_visual_embeds=deepstack_visual_embeds,
+            cos=cos,
+            sin=sin,
+            rope_deltas=rope_deltas,
+        ):
+            if token == tokenizer.eos_token_id:
+                break
+            text_piece = tokenizer.decode([token])
+            gen_count += 1
+            current_tps = gen_count / max(1e-6, (time.perf_counter() - tic))
+            yield GenerationResult(
+                text=text_piece,
+                token=token,
+                logprobs=logprobs,
+                prompt_tokens=int(input_ids.size),
+                generation_tokens=gen_count,
+                prompt_tps=float(prompt_tps),
+                generation_tps=float(current_tps),
+                peak_memory=float(mx.get_peak_memory() / 1e9),
+            )
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise
 def quant_predicate(path: str, mod: nn.Module) -> bool:
     """Quantization predicate to exclude certain layers from quantization."""

nexaai/mlx_backend/vlm/interface.py CHANGED Viewed

@@ -25,7 +25,7 @@ from profiling import ProfilingMixin, ProfilingData, StopReason
 # Import from the actual mlx_vlm structure
 from .generate import generate, stream_generate, load
-from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl
+from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl, ContextLengthExceededError
 from .modeling.prompt_utils import apply_chat_template
@@ -80,6 +80,9 @@ class VLM(ProfilingMixin):
         # Init deafutl sampler config with defualt.
         self.sampler_config = SamplerConfig()
+        # Track global character position for incremental processing
+        self.global_n_past_chars = 0
     def destroy(self) -> None:
         """Destroy the model and free resources."""
@@ -89,6 +92,7 @@ class VLM(ProfilingMixin):
     def reset(self) -> None:
         """Reset the model state."""
         self._reset_cache()
+        self.global_n_past_chars = 0
     def _reset_cache(self) -> None:
         """Reset the KV cache."""
@@ -120,7 +124,7 @@ class VLM(ProfilingMixin):
         prompt: str,
         config: Optional[GenerationConfig] = None,
     ) -> GenerationResult:
-        """Generate text from prompt."""
+        """Generate text from prompt."""
         # Start profiling
         self._start_profiling()
@@ -141,6 +145,19 @@ class VLM(ProfilingMixin):
         image_list = [str(path) for path in image_paths] if image_paths else None
         audio_list = [str(path) for path in audio_paths] if audio_paths else None
+        # Extract incremental portion of the prompt (similar to llama.cpp VLM)
+        full_prompt_len = len(prompt)
+        incremental_prompt = prompt
+        # Apply incremental processing only for non-qwen3vl models
+        # qwen3vl requires complete JSON conversation structure
+        if self.model_name != "qwen3vl":
+            if self.global_n_past_chars < full_prompt_len:
+                incremental_prompt = prompt[self.global_n_past_chars:]
+            else:
+                # No new text to process
+                incremental_prompt = ""
         # End prompt processing, start decode
         self._prompt_end()
         self._decode_start()
@@ -152,7 +169,7 @@ class VLM(ProfilingMixin):
             text, stats = generate(
                 self.model,
                 self.processor,
-                prompt,
+                incremental_prompt,  # Use incremental prompt instead of full prompt
                 image=image_list,
                 audio=audio_list,
                 **gen_kwargs,
@@ -181,10 +198,16 @@ class VLM(ProfilingMixin):
             self._update_prompt_tokens(prompt_tokens)
             self._update_generated_tokens(generated_tokens)
             self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
+            # Update global character position (not needed for qwen3vl JSON processing)
+            if self.model_name != "qwen3vl":
+                old_pos = self.global_n_past_chars
+                self.global_n_past_chars = full_prompt_len + len(text)
             self._decode_end()
             self._end_profiling()
-            return GenerationResult(
+            result = GenerationResult(
                 text=text,
                 prompt_tokens=prompt_tokens,
                 generation_tokens=generated_tokens,
@@ -193,7 +216,18 @@ class VLM(ProfilingMixin):
                 generation_tps=stats.get("generation_tps", 0.0),
                 peak_memory=stats.get("peak_memory", 0.0),
             )
+            return result
+        except ContextLengthExceededError as e:
+            self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
+            self._decode_end()
+            self._end_profiling()
+            # Re-raise the original exception without wrapping it
+            raise e
         except Exception as e:
+            import traceback
+            traceback.print_exc()
             self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
             self._decode_end()
             self._end_profiling()
@@ -206,6 +240,7 @@ class VLM(ProfilingMixin):
         on_token: Optional[TokenCallback],
     ) -> GenerationResult:
         """Generate text with streaming callback. Unified method for both text and multimodal generation."""
         # Start profiling
         self._start_profiling()
@@ -218,6 +253,7 @@ class VLM(ProfilingMixin):
         if self.sampler_config is not None:
             gen_kwargs.update(self.sampler_config.__dict__)
         # Get image and audio paths from config
         image_paths = config.image_paths if config else None
         audio_paths = config.audio_paths if config else None
@@ -226,6 +262,21 @@ class VLM(ProfilingMixin):
         image_list = [str(path) for path in image_paths] if image_paths else None
         audio_list = [str(path) for path in audio_paths] if audio_paths else None
+        # Extract incremental portion of the prompt (similar to llama.cpp VLM)
+        full_prompt_len = len(prompt)
+        incremental_prompt = prompt
+        # Apply incremental processing only for non-qwen3vl models
+        # qwen3vl requires complete JSON conversation structure
+        if self.model_name != "qwen3vl":
+            if self.global_n_past_chars < full_prompt_len:
+                incremental_prompt = prompt[self.global_n_past_chars:]
+            else:
+                # No new text to process
+                incremental_prompt = ""
         # End prompt processing, start decode
         self._prompt_end()
         self._decode_start()
@@ -236,15 +287,19 @@ class VLM(ProfilingMixin):
         stream_generate_impl = stream_generate_qwen3_vl if self.model_name == "qwen3vl" else stream_generate
         try:
+            token_count = 0
             for result in stream_generate_impl(
                 self.model,
                 self.processor,
-                prompt,
+                incremental_prompt,  # Use incremental prompt instead of full prompt
                 image=image_list,
                 audio=audio_list,
                 **gen_kwargs,
             ):
-                                # Record TTFT on first token
+                token_count += 1
+                # Record TTFT on first token
                 if first_token:
                     self._record_ttft()
                     first_token = False
@@ -257,6 +312,7 @@ class VLM(ProfilingMixin):
                 text += result.text
                 last_result = result
             # Set stop reason if not user stop
             if self._profiling_context.stop_reason != StopReason.ML_STOP_REASON_USER:
                 self._set_stop_reason(StopReason.ML_STOP_REASON_EOS)
@@ -266,10 +322,15 @@ class VLM(ProfilingMixin):
                 self._update_prompt_tokens(last_result.prompt_tokens)
                 self._update_generated_tokens(last_result.generation_tokens)
+            # Update global character position (not needed for qwen3vl JSON processing)
+            if self.model_name != "qwen3vl":
+                old_pos = self.global_n_past_chars
+                self.global_n_past_chars = full_prompt_len + len(text)
             self._decode_end()
             self._end_profiling()
-            return GenerationResult(
+            result = GenerationResult(
                 text=text,
                 token=last_result.token if last_result else None,
                 logprobs=last_result.logprobs if last_result else None,
@@ -280,7 +341,18 @@ class VLM(ProfilingMixin):
                 generation_tps=last_result.generation_tps if last_result else 0.0,
                 peak_memory=last_result.peak_memory if last_result else 0.0,
             )
+            return result
+        except ContextLengthExceededError as e:
+            self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
+            self._decode_end()
+            self._end_profiling()
+            # Re-raise the original exception without wrapping it
+            raise e
         except Exception as e:
+            import traceback
+            traceback.print_exc()
             self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
             self._decode_end()
             self._end_profiling()

nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py CHANGED Viewed

@@ -232,7 +232,7 @@ def generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -249,7 +249,7 @@ def generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -371,7 +371,7 @@ def nexa_generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -388,7 +388,7 @@ def nexa_generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -507,7 +507,7 @@ def nexa_multimodal_generate_step(
             prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
             prompt_processed_tokens += prefill_step_size
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = _step(y)
@@ -524,7 +524,7 @@ def nexa_multimodal_generate_step(
             break
         yield y.item(), logprobs
         if n % 256 == 0:
-            mx.metal.clear_cache()
+            mx.clear_cache()
         y, logprobs = next_y, next_logprobs
         n += 1
@@ -632,7 +632,7 @@ def speculative_generate_step(
             quantize_cache_fn(cache)
             mx.eval([c.state for c in cache])
             y = y[prefill_step_size:]
-            mx.metal.clear_cache()
+            mx.clear_cache()
         return y
     def _rewind_cache(num_draft, num_accept):

nexaai/utils/manifest_utils.py CHANGED Viewed

@@ -35,15 +35,8 @@ def process_manifest_metadata(manifest: Dict[str, Any], repo_id: str) -> Dict[st
     # Handle download_time - keep as null if missing
     download_time = manifest.get('download_time')
-    # Handle avatar_url - fetch on-the-fly if missing/null
+    # Handle avatar_url - leave it null if missing/null
     avatar_url = manifest.get('avatar_url')
-    if not avatar_url:
-        try:
-            from .avatar_fetcher import get_avatar_url_for_repo
-            avatar_url = get_avatar_url_for_repo(repo_id)
-        except Exception:
-            # If fetching fails, leave as None
-            avatar_url = None
     # Return processed metadata
     processed_manifest = manifest.copy()
@@ -171,9 +164,9 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
         },
         "ExtraFiles": None,
         # Preserve old metadata fields
-        "pipeline_tag": old_metadata.get('pipeline_tag'),
-        "download_time": old_metadata.get('download_time'),
-        "avatar_url": old_metadata.get('avatar_url')
+        "pipeline_tag": old_metadata.get('pipeline_tag') if old_metadata.get('pipeline_tag') else existing_manifest.get('pipeline_tag'),
+        "download_time": old_metadata.get('download_time') if old_metadata.get('download_time') else existing_manifest.get('download_time'),
+        "avatar_url": old_metadata.get('avatar_url') if old_metadata.get('avatar_url') else existing_manifest.get('avatar_url')
     }
     return manifest
@@ -182,6 +175,9 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
 def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> Dict[str, Any]:
     """Create MLX format manifest."""
+    # Load existing manifest to merge MLX files if it exists
+    existing_manifest = load_nexa_manifest(directory_path)
     model_files = {}
     extra_files = []
@@ -250,9 +246,9 @@ def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old
         },
         "ExtraFiles": extra_files if extra_files else None,
         # Preserve old metadata fields
-        "pipeline_tag": old_metadata.get('pipeline_tag'),
-        "download_time": old_metadata.get('download_time'),
-        "avatar_url": old_metadata.get('avatar_url')
+        "pipeline_tag": old_metadata.get('pipeline_tag') if old_metadata.get('pipeline_tag') else existing_manifest.get('pipeline_tag'),
+        "download_time": old_metadata.get('download_time') if old_metadata.get('download_time') else existing_manifest.get('download_time'),
+        "avatar_url": old_metadata.get('avatar_url') if old_metadata.get('avatar_url') else existing_manifest.get('avatar_url')
     }
     return manifest

nexaai/utils/model_manager.py CHANGED Viewed

@@ -11,7 +11,6 @@ from huggingface_hub import HfApi
 from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
 from .progress_tracker import CustomProgressTqdm, DownloadProgressTracker
-from .avatar_fetcher import get_avatar_url_for_repo
 from .manifest_utils import (
     load_download_metadata,
     save_download_metadata,
@@ -790,7 +789,7 @@ class HuggingFaceDownloader:
         # If no expected size, just check that file is not empty
         return os.path.getsize(file_path) > 0
-    def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> None:
+    def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
         """Fetch model info and save metadata after successful download."""
         # Initialize metadata with defaults to ensure manifest is always created
         old_metadata = {
@@ -809,14 +808,9 @@ class HuggingFaceDownloader:
             # Log the error but continue with manifest creation
             print(f"Warning: Could not fetch model info for {repo_id}: {e}")
-        try:
-            # Get avatar URL
-            avatar_url = get_avatar_url_for_repo(repo_id, custom_endpoint=self.endpoint)
-            if avatar_url:
-                old_metadata['avatar_url'] = avatar_url
-        except Exception as e:
-            # Log the error but continue with manifest creation
-            print(f"Warning: Could not fetch avatar URL for {repo_id}: {e}")
+        # Use input avater url if provided
+        old_metadata['avatar_url'] = kwargs.get('avatar_url')
         # CRITICAL: Always create the manifest file, regardless of metadata fetch failures
         try:
@@ -850,7 +844,8 @@ class HuggingFaceDownloader:
         file_name: str,
         local_dir: str,
         progress_tracker: Optional[DownloadProgressTracker],
-        force_download: bool = False
+        force_download: bool = False,
+        **kwargs
     ) -> str:
         """Download a single file from the repository using HuggingFace Hub API."""
         # Create repo-specific directory for the single file
@@ -882,7 +877,7 @@ class HuggingFaceDownloader:
                 progress_tracker.stop_tracking()
             # Save metadata after successful download
-            self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name)
+            self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
             return downloaded_path
@@ -901,7 +896,8 @@ class HuggingFaceDownloader:
         repo_id: str,
         local_dir: str,
         progress_tracker: Optional[DownloadProgressTracker],
-        force_download: bool = False
+        force_download: bool = False,
+        **kwargs
     ) -> str:
         """Download the entire repository."""
         # Create a subdirectory for this specific repo
@@ -927,7 +923,7 @@ class HuggingFaceDownloader:
                 progress_tracker.stop_tracking()
             # Save metadata after successful download
-            self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name)
+            self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
             return downloaded_path
@@ -944,7 +940,8 @@ class HuggingFaceDownloader:
         file_names: List[str],
         local_dir: str,
         progress_tracker: Optional[DownloadProgressTracker],
-        force_download: bool = False
+        force_download: bool = False,
+        **kwargs
     ) -> str:
         """Download multiple specific files from HuggingFace Hub."""
         # Create repo-specific directory
@@ -989,7 +986,7 @@ class HuggingFaceDownloader:
                 progress_tracker.stop_tracking()
             # Save metadata after successful download
-            self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name)
+            self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
             return repo_local_dir
@@ -1015,7 +1012,8 @@ class HuggingFaceDownloader:
         progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
         show_progress: bool = True,
         force_download: bool = False,
-        is_mmproj: bool = False
+        is_mmproj: bool = False,
+        **kwargs
     ) -> str:
         """
         Main download method that handles all download scenarios.
@@ -1062,13 +1060,13 @@ class HuggingFaceDownloader:
             if file_name is None:
                 # Download entire repository
                 return self._download_entire_repository(
-                    repo_id, local_dir, progress_tracker, force_download
+                    repo_id, local_dir, progress_tracker, force_download, **kwargs
                 )
             elif isinstance(file_name, str):
                 # Download specific single file
                 self._validate_file_exists_in_repo(file_name, info, repo_id, progress_tracker)
                 return self._download_single_file(
-                    repo_id, file_name, local_dir, progress_tracker, force_download
+                    repo_id, file_name, local_dir, progress_tracker, force_download, **kwargs
                 )
             else:  # file_name is a list
                 # Download multiple specific files
@@ -1077,7 +1075,7 @@ class HuggingFaceDownloader:
                     self._validate_file_exists_in_repo(fname, info, repo_id, progress_tracker)
                 return self._download_multiple_files_from_hf(
-                    repo_id, file_name, local_dir, progress_tracker, force_download
+                    repo_id, file_name, local_dir, progress_tracker, force_download, **kwargs
                 )
         except Exception as e:
@@ -1107,7 +1105,8 @@ def download_from_huggingface(
     token: Union[bool, str, None] = None,
     custom_endpoint: Optional[str] = None,
     force_download: bool = False,
-    is_mmproj: Optional[bool] = None
+    is_mmproj: Optional[bool] = None,
+    **kwargs
 ) -> str:
     """
     Download models or files from HuggingFace Hub or custom mirror endpoints.
@@ -1197,7 +1196,8 @@ def download_from_huggingface(
         progress_callback=progress_callback,
         show_progress=show_progress,
         force_download=force_download,
-        is_mmproj=is_mmproj
+        is_mmproj=is_mmproj,
+        **kwargs
     )
@@ -1211,7 +1211,8 @@ def _download_model_if_needed(
     param_name: str,
     progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
     token: Union[bool, str, None] = None,
-    is_mmproj: bool = False
+    is_mmproj: bool = False,
+    **kwargs
 ) -> str:
     """
     Helper function to download a model from HuggingFace if it doesn't exist locally.
@@ -1247,7 +1248,8 @@ def _download_model_if_needed(
             progress_callback=progress_callback,
             show_progress=True,
             token=token,
-            is_mmproj=is_mmproj
+            is_mmproj=is_mmproj,
+            **kwargs
         )
         return downloaded_path
@@ -1320,7 +1322,7 @@ def auto_download_model(func: Callable) -> Callable:
         if name_or_path is not None:
             try:
                 downloaded_name_path = _download_model_if_needed(
-                    name_or_path, 'name_or_path', progress_callback, token
+                    name_or_path, 'name_or_path', progress_callback, token, **kwargs
                 )
                 # Replace name_or_path with downloaded path
@@ -1338,7 +1340,7 @@ def auto_download_model(func: Callable) -> Callable:
         if mmproj_path is not None:
             try:
                 downloaded_mmproj_path = _download_model_if_needed(
-                    mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True
+                    mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
                 )
                 # Replace mmproj_path with downloaded path

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.16rc13
+Version: 1.0.17
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
-nexaai/_stub.cpython-310-darwin.so,sha256=Bd-r6O9pG8m0SL3rkS3PQF8Z1ie_WD7uqaFPRyitb9E,49832
-nexaai/_version.py,sha256=w1I23pLkLt0xrD0hMhWK5fW9rqbLqnW1ii4yYp9UCTo,144
+nexaai/_stub.cpython-310-darwin.so,sha256=HjqUYc8SyajzyySZk1eBJdO7Rc_db2F-kS3KdPSPB5o,49832
+nexaai/_version.py,sha256=eaXF_gF6uNVz9AglXCAwIyseTDCCAGEhr3CCnSfr3tY,139
 nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
 nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
@@ -17,9 +17,9 @@ nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
 nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
 nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
-nexaai/binds/common_bind.cpython-310-darwin.so,sha256=km1TU5WOJHVjvyM4l5mgAkS_omxuKt8pM92E9Wv0VqM,235488
+nexaai/binds/common_bind.cpython-310-darwin.so,sha256=BoXByRlNGDaNS1YyZyCF-s7h0vXP9NLPlJMQQ5pqusU,235488
 nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
-nexaai/binds/libnexa_bridge.dylib,sha256=SLP_DHAJeSl5gJMSs2fZtPLv-VgNyojZTK0auqDXSpo,250408
+nexaai/binds/libnexa_bridge.dylib,sha256=e6uFx8ENEdCWk8whKyoVvX-e9-Bk_35kqIDV3kRDuXU,250408
 nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
 nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
 nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
@@ -246,8 +246,8 @@ nexaai/mlx_backend/tts/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nu
 nexaai/mlx_backend/tts/interface.py,sha256=0FvZbIyOvg8jERZEQ6bygbv7v02O9xHO4-TPUlar0b4,9568
 nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXqYzKew,73
 nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
-nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=undjso1mfxqpd6FMTksSA5qagRttxAGbOBj1x7cqI1s,9211
-nexaai/mlx_backend/vlm/interface.py,sha256=vFTzJCbqq55ybv_tbDBC9NVn1_sXgCfqXdsV-3ia8vo,16177
+nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=eeizW18u6dHPZOOnJtQUJkiqMAIIpOSS-IOjacXGsz4,10240
+nexaai/mlx_backend/vlm/interface.py,sha256=HOPzWNMs6QaHO6x0Z83kW1xkRRmb8_xo6xQLKsOWqAo,19013
 nexaai/mlx_backend/vlm/main.py,sha256=nPcg25jupeDD74uvRoxpWp3Dsulw7WddI7vll6zejak,10664
 nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
@@ -362,7 +362,7 @@ nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py,sha256=LArnNtI98B_GJO
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py,sha256=4RlZwgz8YX2ngmJNaymxFFpw9hJu-0EMw9xwXpngW9o,3496
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py,sha256=NMOB6x-RT6svF4H-Ymo5WqnP7ptAal3aaKjWZXWGMsM,17671
-nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=Mw7Btz0_t7erQOrfWzCXT-ktEwZl61OODcmDMIo3VS0,26719
+nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=bchCpnlewysWQss5TQKxdKPXYd5VA7ySUDfRt8Xj_H4,26677
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py,sha256=ty0dA3SsEUFtFbHo16tKdnKymrNKKsUO3KMYapMajbY,8704
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py,sha256=8SEeVwgjuvaYy-4ALAU0RHQMuRr2k7EkXba_csxk498,10673
 nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py,sha256=Gqanx4hBDcon_k5ClhUsS4YpMbZNiee8jvImGS9h43s,13229
@@ -378,17 +378,16 @@ nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5
 nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
 nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
-nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
 nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
-nexaai/utils/manifest_utils.py,sha256=sR9Nme4GbD3Cb3fMd55yLvGZpqxb71vd6b2XZTsrIGM,12328
-nexaai/utils/model_manager.py,sha256=p2kJKK63Zk-rEUucFsgY0T5PyXi_IvJY0gKewUVcAV4,56081
+nexaai/utils/manifest_utils.py,sha256=PA84obFP7W1dlneURlIHIzJjWIF5dbDHGdNeHouUy68,12659
+nexaai/utils/model_manager.py,sha256=_WKJP7YVk7q587OoOWwDNWVR-8tbKZkmHKjcCZN8Q4M,55979
 nexaai/utils/model_types.py,sha256=-DER8L4lAUR_iLS99F0r57avwqWtuN21ug5pX2p24_E,1369
 nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
 nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
 nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
-nexaai-1.0.16rc13.dist-info/METADATA,sha256=eqPLK_7JBryWiB7qvdppmdEoHd42jZohyBHi0j1Lges,1202
-nexaai-1.0.16rc13.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
-nexaai-1.0.16rc13.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.16rc13.dist-info/RECORD,,
+nexaai-1.0.17.dist-info/METADATA,sha256=BMYxa8SkZYJx_zRraC8kS32fkBpXFsrKthZBJxISykc,1198
+nexaai-1.0.17.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
+nexaai-1.0.17.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.17.dist-info/RECORD,,

nexaai/utils/avatar_fetcher.py DELETED Viewed

@@ -1,104 +0,0 @@
-"""Utility for fetching avatar URLs from HuggingFace."""
-import logging
-from typing import Dict, Optional
-import httpx
-logger = logging.getLogger(__name__)
-def fetch_avatar_urls_from_hf_api(query: str, custom_endpoint: Optional[str] = None) -> Dict[str, str]:
-    """
-    Fetch avatar URLs from HuggingFace models-json endpoint.
-    Args:
-        query: Search query to fetch models for
-        custom_endpoint: Optional custom HuggingFace endpoint
-    Returns:
-        Dictionary mapping author names to avatar URLs
-    """
-    avatar_map = {}
-    try:
-        # Use the base URL from the configured endpoint
-        base_url = custom_endpoint if custom_endpoint else "https://huggingface.co"
-        # Build the URL with query parameter
-        url = f"{base_url}/models-json?sort=trending&search={query}&withCount=true"
-        # Make the HTTP request with a timeout
-        with httpx.Client(timeout=2.0) as client:
-            response = client.get(url)
-            if response.status_code == 200:
-                data = response.json()
-                models = data.get("models", [])
-                # Build a map of author names to avatar URLs
-                for model in models:
-                    author = model.get("author")
-                    author_data = model.get("authorData", {})
-                    avatar_url = author_data.get("avatarUrl")
-                    if author and avatar_url:
-                        # Handle relative URLs by prepending appropriate base URL
-                        if avatar_url.startswith("/"):
-                            avatar_url = f"{base_url}{avatar_url}"
-                        avatar_map[author] = avatar_url
-                logger.debug(f"Fetched {len(avatar_map)} avatar URLs from HuggingFace API")
-            else:
-                logger.warning(f"Failed to fetch avatar URLs: HTTP {response.status_code}")
-    except Exception as e:
-        logger.warning(f"Error fetching avatar URLs from HuggingFace API: {e}")
-        # Return empty map on error - we'll fall back to default behavior
-    return avatar_map
-def get_avatar_url_for_repo(repo_id: str, search_query: Optional[str] = None,
-                            custom_endpoint: Optional[str] = None) -> Optional[str]:
-    """
-    Get avatar URL for a repository ID.
-    This method tries multiple strategies:
-    1. If search_query is provided, fetch from HuggingFace API with that query
-    2. Try fetching with the full repo_id as query
-    3. Try fetching with just the organization name as query
-    4. Fall back to CDN URL pattern
-    Args:
-        repo_id: Repository ID in format "owner/repo"
-        search_query: Optional search query to use for fetching avatars
-        custom_endpoint: Optional custom HuggingFace endpoint
-    Returns:
-        Avatar URL or None if not found
-    """
-    if "/" not in repo_id:
-        return None
-    org_name = repo_id.split("/")[0]
-    # Try with search query if provided
-    if search_query:
-        avatar_map = fetch_avatar_urls_from_hf_api(search_query, custom_endpoint)
-        avatar_url = avatar_map.get(org_name)
-        if avatar_url:
-            return avatar_url
-    # Try with full repo_id
-    avatar_map = fetch_avatar_urls_from_hf_api(repo_id, custom_endpoint)
-    avatar_url = avatar_map.get(org_name)
-    if avatar_url:
-        return avatar_url
-    # Try with just organization name
-    avatar_map = fetch_avatar_urls_from_hf_api(org_name, custom_endpoint)
-    avatar_url = avatar_map.get(org_name)
-    if avatar_url:
-        return avatar_url
-    # Fallback to CDN URL pattern
-    return f"https://cdn-thumbnails.huggingface.co/social-thumbnails/{org_name}.png"

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/top_level.txt RENAMED Viewed

File without changes