nexaai 1.0.16rc13__cp310-cp310-macosx_13_0_x86_64.whl → 1.0.17__cp310-cp310-macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +1 -1
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl.py +71 -38
- nexaai/mlx_backend/vlm/interface.py +79 -7
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +7 -7
- nexaai/utils/manifest_utils.py +10 -14
- nexaai/utils/model_manager.py +28 -26
- {nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/METADATA +1 -1
- {nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/RECORD +13 -14
- nexaai/utils/avatar_fetcher.py +0 -104
- {nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/WHEEL +0 -0
- {nexaai-1.0.16rc13.dist-info → nexaai-1.0.17.dist-info}/top_level.txt +0 -0
|
Binary file
|
nexaai/_version.py
CHANGED
|
Binary file
|
|
Binary file
|
|
@@ -41,7 +41,12 @@ except ImportError:
|
|
|
41
41
|
from ml import ChatMessage
|
|
42
42
|
from dataclasses import dataclass
|
|
43
43
|
from typing import Any, Generator, List, Optional, Sequence, Tuple, Union
|
|
44
|
-
from .generate import GenerationResult
|
|
44
|
+
from .generate import GenerationResult
|
|
45
|
+
|
|
46
|
+
# Custom exception for context length exceeded
|
|
47
|
+
class ContextLengthExceededError(Exception):
|
|
48
|
+
"""Raised when input context length exceeds model's maximum context size"""
|
|
49
|
+
pass
|
|
45
50
|
|
|
46
51
|
@dataclass
|
|
47
52
|
class Qwen3VLBundledModel:
|
|
@@ -67,6 +72,7 @@ def load_qwen3_vl(
|
|
|
67
72
|
|
|
68
73
|
Parameters are aligned with .generate.load for compatibility.
|
|
69
74
|
"""
|
|
75
|
+
|
|
70
76
|
model_path = Path(path_or_repo)
|
|
71
77
|
if not model_path.exists():
|
|
72
78
|
if "/" in path_or_repo:
|
|
@@ -154,7 +160,6 @@ def load_qwen3_vl(
|
|
|
154
160
|
if quantization_bits in [4, 8]:
|
|
155
161
|
nn.quantize(llm_model, bits=quantization_bits, group_size=64,
|
|
156
162
|
class_predicate=quant_predicate)
|
|
157
|
-
# For f32 (32-bit), no quantization needed
|
|
158
163
|
|
|
159
164
|
llm_model.load_weights(str(llm_weights_path), strict=True)
|
|
160
165
|
|
|
@@ -166,11 +171,15 @@ def load_qwen3_vl(
|
|
|
166
171
|
|
|
167
172
|
def apply_chat_template_qwen3_vl(messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = False) -> str:
|
|
168
173
|
"""Apply chat template: serialize messages with content as a list of typed items."""
|
|
174
|
+
|
|
169
175
|
messages_dict = []
|
|
170
|
-
for msg in messages:
|
|
176
|
+
for i, msg in enumerate(messages):
|
|
171
177
|
content_items = [{"type": "text", "text": msg.content}]
|
|
172
178
|
messages_dict.append({"role": msg.role, "content": content_items})
|
|
173
|
-
|
|
179
|
+
|
|
180
|
+
result = json.dumps(messages_dict)
|
|
181
|
+
|
|
182
|
+
return result
|
|
174
183
|
|
|
175
184
|
|
|
176
185
|
def stream_generate_qwen3_vl(
|
|
@@ -184,15 +193,22 @@ def stream_generate_qwen3_vl(
|
|
|
184
193
|
|
|
185
194
|
) -> Generator[Any, None, None]:
|
|
186
195
|
"""Stream generation yielding .generate.GenerationResult-compatible chunks."""
|
|
187
|
-
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
messages = json.loads(prompt)
|
|
199
|
+
except json.JSONDecodeError as e:
|
|
200
|
+
raise
|
|
201
|
+
|
|
188
202
|
if image is not None:
|
|
189
203
|
image_list = image if isinstance(image, list) else [image]
|
|
190
204
|
pil_images = []
|
|
191
|
-
for p in image_list:
|
|
205
|
+
for i, p in enumerate(image_list):
|
|
192
206
|
try:
|
|
193
|
-
|
|
194
|
-
|
|
207
|
+
img = Image.open(p)
|
|
208
|
+
pil_images.append(img)
|
|
209
|
+
except Exception as e:
|
|
195
210
|
continue
|
|
211
|
+
|
|
196
212
|
contents = [{"type": "image", "image": img} for img in pil_images]
|
|
197
213
|
if messages:
|
|
198
214
|
if "content" not in messages[-1] or not isinstance(messages[-1]["content"], list):
|
|
@@ -201,6 +217,7 @@ def stream_generate_qwen3_vl(
|
|
|
201
217
|
|
|
202
218
|
raw_text, processed_images = processor.messages_to_text(
|
|
203
219
|
messages, add_generation_prompt=True)
|
|
220
|
+
|
|
204
221
|
|
|
205
222
|
inputs = processor.text_to_input_ids(
|
|
206
223
|
raw_text, images=processed_images, return_tensors="mlx")
|
|
@@ -208,10 +225,18 @@ def stream_generate_qwen3_vl(
|
|
|
208
225
|
input_ids = inputs["input_ids"]
|
|
209
226
|
pixel_values = inputs.get("pixel_values")
|
|
210
227
|
image_grid_thw = inputs.get("image_grid_thw")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# Check if input context exceeds KV cache size and raise error
|
|
231
|
+
max_kv_size = 4096 # This should match the max_kv_size used in make_prompt_cache and nexa_generate_step
|
|
232
|
+
if input_ids.size > max_kv_size:
|
|
233
|
+
error_msg = f"Input context length ({input_ids.size} tokens) exceeds maximum supported context size ({max_kv_size} tokens). Please reduce the input length."
|
|
234
|
+
raise ContextLengthExceededError(error_msg)
|
|
211
235
|
|
|
212
236
|
inputs_embeds, deepstack_visual_embeds, visual_pos_masks, cos, sin, rope_deltas = handle_multimodal_embeds(
|
|
213
237
|
model.vision_model, model.llm_model, input_ids, pixel_values, image_grid_thw
|
|
214
238
|
)
|
|
239
|
+
|
|
215
240
|
|
|
216
241
|
prompt_cache = make_prompt_cache(model.llm_model, max_kv_size=4096)
|
|
217
242
|
tokenizer = processor.tokenizer
|
|
@@ -222,37 +247,45 @@ def stream_generate_qwen3_vl(
|
|
|
222
247
|
|
|
223
248
|
gen_count = 0
|
|
224
249
|
tic = time.perf_counter()
|
|
250
|
+
|
|
225
251
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
gen_count
|
|
254
|
-
|
|
255
|
-
|
|
252
|
+
try:
|
|
253
|
+
for token, logprobs in nexa_generate_step(
|
|
254
|
+
model=model.llm_model,
|
|
255
|
+
prompt=None,
|
|
256
|
+
input_embeddings=inputs_embeds,
|
|
257
|
+
max_tokens=max_tokens,
|
|
258
|
+
max_kv_size=4096,
|
|
259
|
+
prompt_cache=prompt_cache,
|
|
260
|
+
visual_pos_masks=visual_pos_masks,
|
|
261
|
+
deepstack_visual_embeds=deepstack_visual_embeds,
|
|
262
|
+
cos=cos,
|
|
263
|
+
sin=sin,
|
|
264
|
+
rope_deltas=rope_deltas,
|
|
265
|
+
):
|
|
266
|
+
if token == tokenizer.eos_token_id:
|
|
267
|
+
break
|
|
268
|
+
|
|
269
|
+
text_piece = tokenizer.decode([token])
|
|
270
|
+
gen_count += 1
|
|
271
|
+
|
|
272
|
+
current_tps = gen_count / max(1e-6, (time.perf_counter() - tic))
|
|
273
|
+
|
|
274
|
+
yield GenerationResult(
|
|
275
|
+
text=text_piece,
|
|
276
|
+
token=token,
|
|
277
|
+
logprobs=logprobs,
|
|
278
|
+
prompt_tokens=int(input_ids.size),
|
|
279
|
+
generation_tokens=gen_count,
|
|
280
|
+
prompt_tps=float(prompt_tps),
|
|
281
|
+
generation_tps=float(current_tps),
|
|
282
|
+
peak_memory=float(mx.get_peak_memory() / 1e9),
|
|
283
|
+
)
|
|
284
|
+
except Exception as e:
|
|
285
|
+
import traceback
|
|
286
|
+
traceback.print_exc()
|
|
287
|
+
raise
|
|
288
|
+
|
|
256
289
|
|
|
257
290
|
def quant_predicate(path: str, mod: nn.Module) -> bool:
|
|
258
291
|
"""Quantization predicate to exclude certain layers from quantization."""
|
|
@@ -25,7 +25,7 @@ from profiling import ProfilingMixin, ProfilingData, StopReason
|
|
|
25
25
|
|
|
26
26
|
# Import from the actual mlx_vlm structure
|
|
27
27
|
from .generate import generate, stream_generate, load
|
|
28
|
-
from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl
|
|
28
|
+
from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl, ContextLengthExceededError
|
|
29
29
|
|
|
30
30
|
from .modeling.prompt_utils import apply_chat_template
|
|
31
31
|
|
|
@@ -80,6 +80,9 @@ class VLM(ProfilingMixin):
|
|
|
80
80
|
|
|
81
81
|
# Init deafutl sampler config with defualt.
|
|
82
82
|
self.sampler_config = SamplerConfig()
|
|
83
|
+
|
|
84
|
+
# Track global character position for incremental processing
|
|
85
|
+
self.global_n_past_chars = 0
|
|
83
86
|
|
|
84
87
|
def destroy(self) -> None:
|
|
85
88
|
"""Destroy the model and free resources."""
|
|
@@ -89,6 +92,7 @@ class VLM(ProfilingMixin):
|
|
|
89
92
|
def reset(self) -> None:
|
|
90
93
|
"""Reset the model state."""
|
|
91
94
|
self._reset_cache()
|
|
95
|
+
self.global_n_past_chars = 0
|
|
92
96
|
|
|
93
97
|
def _reset_cache(self) -> None:
|
|
94
98
|
"""Reset the KV cache."""
|
|
@@ -120,7 +124,7 @@ class VLM(ProfilingMixin):
|
|
|
120
124
|
prompt: str,
|
|
121
125
|
config: Optional[GenerationConfig] = None,
|
|
122
126
|
) -> GenerationResult:
|
|
123
|
-
"""Generate text from prompt."""
|
|
127
|
+
"""Generate text from prompt."""
|
|
124
128
|
# Start profiling
|
|
125
129
|
self._start_profiling()
|
|
126
130
|
|
|
@@ -141,6 +145,19 @@ class VLM(ProfilingMixin):
|
|
|
141
145
|
image_list = [str(path) for path in image_paths] if image_paths else None
|
|
142
146
|
audio_list = [str(path) for path in audio_paths] if audio_paths else None
|
|
143
147
|
|
|
148
|
+
# Extract incremental portion of the prompt (similar to llama.cpp VLM)
|
|
149
|
+
full_prompt_len = len(prompt)
|
|
150
|
+
incremental_prompt = prompt
|
|
151
|
+
|
|
152
|
+
# Apply incremental processing only for non-qwen3vl models
|
|
153
|
+
# qwen3vl requires complete JSON conversation structure
|
|
154
|
+
if self.model_name != "qwen3vl":
|
|
155
|
+
if self.global_n_past_chars < full_prompt_len:
|
|
156
|
+
incremental_prompt = prompt[self.global_n_past_chars:]
|
|
157
|
+
else:
|
|
158
|
+
# No new text to process
|
|
159
|
+
incremental_prompt = ""
|
|
160
|
+
|
|
144
161
|
# End prompt processing, start decode
|
|
145
162
|
self._prompt_end()
|
|
146
163
|
self._decode_start()
|
|
@@ -152,7 +169,7 @@ class VLM(ProfilingMixin):
|
|
|
152
169
|
text, stats = generate(
|
|
153
170
|
self.model,
|
|
154
171
|
self.processor,
|
|
155
|
-
prompt
|
|
172
|
+
incremental_prompt, # Use incremental prompt instead of full prompt
|
|
156
173
|
image=image_list,
|
|
157
174
|
audio=audio_list,
|
|
158
175
|
**gen_kwargs,
|
|
@@ -181,10 +198,16 @@ class VLM(ProfilingMixin):
|
|
|
181
198
|
self._update_prompt_tokens(prompt_tokens)
|
|
182
199
|
self._update_generated_tokens(generated_tokens)
|
|
183
200
|
self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
|
|
201
|
+
|
|
202
|
+
# Update global character position (not needed for qwen3vl JSON processing)
|
|
203
|
+
if self.model_name != "qwen3vl":
|
|
204
|
+
old_pos = self.global_n_past_chars
|
|
205
|
+
self.global_n_past_chars = full_prompt_len + len(text)
|
|
206
|
+
|
|
184
207
|
self._decode_end()
|
|
185
208
|
self._end_profiling()
|
|
186
209
|
|
|
187
|
-
|
|
210
|
+
result = GenerationResult(
|
|
188
211
|
text=text,
|
|
189
212
|
prompt_tokens=prompt_tokens,
|
|
190
213
|
generation_tokens=generated_tokens,
|
|
@@ -193,7 +216,18 @@ class VLM(ProfilingMixin):
|
|
|
193
216
|
generation_tps=stats.get("generation_tps", 0.0),
|
|
194
217
|
peak_memory=stats.get("peak_memory", 0.0),
|
|
195
218
|
)
|
|
219
|
+
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
except ContextLengthExceededError as e:
|
|
223
|
+
self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
|
|
224
|
+
self._decode_end()
|
|
225
|
+
self._end_profiling()
|
|
226
|
+
# Re-raise the original exception without wrapping it
|
|
227
|
+
raise e
|
|
196
228
|
except Exception as e:
|
|
229
|
+
import traceback
|
|
230
|
+
traceback.print_exc()
|
|
197
231
|
self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
|
|
198
232
|
self._decode_end()
|
|
199
233
|
self._end_profiling()
|
|
@@ -206,6 +240,7 @@ class VLM(ProfilingMixin):
|
|
|
206
240
|
on_token: Optional[TokenCallback],
|
|
207
241
|
) -> GenerationResult:
|
|
208
242
|
"""Generate text with streaming callback. Unified method for both text and multimodal generation."""
|
|
243
|
+
|
|
209
244
|
# Start profiling
|
|
210
245
|
self._start_profiling()
|
|
211
246
|
|
|
@@ -218,6 +253,7 @@ class VLM(ProfilingMixin):
|
|
|
218
253
|
if self.sampler_config is not None:
|
|
219
254
|
gen_kwargs.update(self.sampler_config.__dict__)
|
|
220
255
|
|
|
256
|
+
|
|
221
257
|
# Get image and audio paths from config
|
|
222
258
|
image_paths = config.image_paths if config else None
|
|
223
259
|
audio_paths = config.audio_paths if config else None
|
|
@@ -226,6 +262,21 @@ class VLM(ProfilingMixin):
|
|
|
226
262
|
image_list = [str(path) for path in image_paths] if image_paths else None
|
|
227
263
|
audio_list = [str(path) for path in audio_paths] if audio_paths else None
|
|
228
264
|
|
|
265
|
+
|
|
266
|
+
# Extract incremental portion of the prompt (similar to llama.cpp VLM)
|
|
267
|
+
full_prompt_len = len(prompt)
|
|
268
|
+
incremental_prompt = prompt
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# Apply incremental processing only for non-qwen3vl models
|
|
272
|
+
# qwen3vl requires complete JSON conversation structure
|
|
273
|
+
if self.model_name != "qwen3vl":
|
|
274
|
+
if self.global_n_past_chars < full_prompt_len:
|
|
275
|
+
incremental_prompt = prompt[self.global_n_past_chars:]
|
|
276
|
+
else:
|
|
277
|
+
# No new text to process
|
|
278
|
+
incremental_prompt = ""
|
|
279
|
+
|
|
229
280
|
# End prompt processing, start decode
|
|
230
281
|
self._prompt_end()
|
|
231
282
|
self._decode_start()
|
|
@@ -236,15 +287,19 @@ class VLM(ProfilingMixin):
|
|
|
236
287
|
stream_generate_impl = stream_generate_qwen3_vl if self.model_name == "qwen3vl" else stream_generate
|
|
237
288
|
|
|
238
289
|
try:
|
|
290
|
+
token_count = 0
|
|
291
|
+
|
|
239
292
|
for result in stream_generate_impl(
|
|
240
293
|
self.model,
|
|
241
294
|
self.processor,
|
|
242
|
-
prompt
|
|
295
|
+
incremental_prompt, # Use incremental prompt instead of full prompt
|
|
243
296
|
image=image_list,
|
|
244
297
|
audio=audio_list,
|
|
245
298
|
**gen_kwargs,
|
|
246
299
|
):
|
|
247
|
-
|
|
300
|
+
token_count += 1
|
|
301
|
+
|
|
302
|
+
# Record TTFT on first token
|
|
248
303
|
if first_token:
|
|
249
304
|
self._record_ttft()
|
|
250
305
|
first_token = False
|
|
@@ -257,6 +312,7 @@ class VLM(ProfilingMixin):
|
|
|
257
312
|
text += result.text
|
|
258
313
|
last_result = result
|
|
259
314
|
|
|
315
|
+
|
|
260
316
|
# Set stop reason if not user stop
|
|
261
317
|
if self._profiling_context.stop_reason != StopReason.ML_STOP_REASON_USER:
|
|
262
318
|
self._set_stop_reason(StopReason.ML_STOP_REASON_EOS)
|
|
@@ -266,10 +322,15 @@ class VLM(ProfilingMixin):
|
|
|
266
322
|
self._update_prompt_tokens(last_result.prompt_tokens)
|
|
267
323
|
self._update_generated_tokens(last_result.generation_tokens)
|
|
268
324
|
|
|
325
|
+
# Update global character position (not needed for qwen3vl JSON processing)
|
|
326
|
+
if self.model_name != "qwen3vl":
|
|
327
|
+
old_pos = self.global_n_past_chars
|
|
328
|
+
self.global_n_past_chars = full_prompt_len + len(text)
|
|
329
|
+
|
|
269
330
|
self._decode_end()
|
|
270
331
|
self._end_profiling()
|
|
271
332
|
|
|
272
|
-
|
|
333
|
+
result = GenerationResult(
|
|
273
334
|
text=text,
|
|
274
335
|
token=last_result.token if last_result else None,
|
|
275
336
|
logprobs=last_result.logprobs if last_result else None,
|
|
@@ -280,7 +341,18 @@ class VLM(ProfilingMixin):
|
|
|
280
341
|
generation_tps=last_result.generation_tps if last_result else 0.0,
|
|
281
342
|
peak_memory=last_result.peak_memory if last_result else 0.0,
|
|
282
343
|
)
|
|
344
|
+
|
|
345
|
+
return result
|
|
346
|
+
|
|
347
|
+
except ContextLengthExceededError as e:
|
|
348
|
+
self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
|
|
349
|
+
self._decode_end()
|
|
350
|
+
self._end_profiling()
|
|
351
|
+
# Re-raise the original exception without wrapping it
|
|
352
|
+
raise e
|
|
283
353
|
except Exception as e:
|
|
354
|
+
import traceback
|
|
355
|
+
traceback.print_exc()
|
|
284
356
|
self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
|
|
285
357
|
self._decode_end()
|
|
286
358
|
self._end_profiling()
|
|
@@ -232,7 +232,7 @@ def generate_step(
|
|
|
232
232
|
prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
|
|
233
233
|
prompt_processed_tokens += prefill_step_size
|
|
234
234
|
y = y[prefill_step_size:]
|
|
235
|
-
mx.
|
|
235
|
+
mx.clear_cache()
|
|
236
236
|
|
|
237
237
|
y, logprobs = _step(y)
|
|
238
238
|
|
|
@@ -249,7 +249,7 @@ def generate_step(
|
|
|
249
249
|
break
|
|
250
250
|
yield y.item(), logprobs
|
|
251
251
|
if n % 256 == 0:
|
|
252
|
-
mx.
|
|
252
|
+
mx.clear_cache()
|
|
253
253
|
y, logprobs = next_y, next_logprobs
|
|
254
254
|
n += 1
|
|
255
255
|
|
|
@@ -371,7 +371,7 @@ def nexa_generate_step(
|
|
|
371
371
|
prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
|
|
372
372
|
prompt_processed_tokens += prefill_step_size
|
|
373
373
|
y = y[prefill_step_size:]
|
|
374
|
-
mx.
|
|
374
|
+
mx.clear_cache()
|
|
375
375
|
|
|
376
376
|
y, logprobs = _step(y)
|
|
377
377
|
|
|
@@ -388,7 +388,7 @@ def nexa_generate_step(
|
|
|
388
388
|
break
|
|
389
389
|
yield y.item(), logprobs
|
|
390
390
|
if n % 256 == 0:
|
|
391
|
-
mx.
|
|
391
|
+
mx.clear_cache()
|
|
392
392
|
y, logprobs = next_y, next_logprobs
|
|
393
393
|
n += 1
|
|
394
394
|
|
|
@@ -507,7 +507,7 @@ def nexa_multimodal_generate_step(
|
|
|
507
507
|
prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
|
|
508
508
|
prompt_processed_tokens += prefill_step_size
|
|
509
509
|
y = y[prefill_step_size:]
|
|
510
|
-
mx.
|
|
510
|
+
mx.clear_cache()
|
|
511
511
|
|
|
512
512
|
y, logprobs = _step(y)
|
|
513
513
|
|
|
@@ -524,7 +524,7 @@ def nexa_multimodal_generate_step(
|
|
|
524
524
|
break
|
|
525
525
|
yield y.item(), logprobs
|
|
526
526
|
if n % 256 == 0:
|
|
527
|
-
mx.
|
|
527
|
+
mx.clear_cache()
|
|
528
528
|
y, logprobs = next_y, next_logprobs
|
|
529
529
|
n += 1
|
|
530
530
|
|
|
@@ -632,7 +632,7 @@ def speculative_generate_step(
|
|
|
632
632
|
quantize_cache_fn(cache)
|
|
633
633
|
mx.eval([c.state for c in cache])
|
|
634
634
|
y = y[prefill_step_size:]
|
|
635
|
-
mx.
|
|
635
|
+
mx.clear_cache()
|
|
636
636
|
return y
|
|
637
637
|
|
|
638
638
|
def _rewind_cache(num_draft, num_accept):
|
nexaai/utils/manifest_utils.py
CHANGED
|
@@ -35,15 +35,8 @@ def process_manifest_metadata(manifest: Dict[str, Any], repo_id: str) -> Dict[st
|
|
|
35
35
|
# Handle download_time - keep as null if missing
|
|
36
36
|
download_time = manifest.get('download_time')
|
|
37
37
|
|
|
38
|
-
# Handle avatar_url -
|
|
38
|
+
# Handle avatar_url - leave it null if missing/null
|
|
39
39
|
avatar_url = manifest.get('avatar_url')
|
|
40
|
-
if not avatar_url:
|
|
41
|
-
try:
|
|
42
|
-
from .avatar_fetcher import get_avatar_url_for_repo
|
|
43
|
-
avatar_url = get_avatar_url_for_repo(repo_id)
|
|
44
|
-
except Exception:
|
|
45
|
-
# If fetching fails, leave as None
|
|
46
|
-
avatar_url = None
|
|
47
40
|
|
|
48
41
|
# Return processed metadata
|
|
49
42
|
processed_manifest = manifest.copy()
|
|
@@ -171,9 +164,9 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
|
|
|
171
164
|
},
|
|
172
165
|
"ExtraFiles": None,
|
|
173
166
|
# Preserve old metadata fields
|
|
174
|
-
"pipeline_tag": old_metadata.get('pipeline_tag'),
|
|
175
|
-
"download_time": old_metadata.get('download_time'),
|
|
176
|
-
"avatar_url": old_metadata.get('avatar_url')
|
|
167
|
+
"pipeline_tag": old_metadata.get('pipeline_tag') if old_metadata.get('pipeline_tag') else existing_manifest.get('pipeline_tag'),
|
|
168
|
+
"download_time": old_metadata.get('download_time') if old_metadata.get('download_time') else existing_manifest.get('download_time'),
|
|
169
|
+
"avatar_url": old_metadata.get('avatar_url') if old_metadata.get('avatar_url') else existing_manifest.get('avatar_url')
|
|
177
170
|
}
|
|
178
171
|
|
|
179
172
|
return manifest
|
|
@@ -182,6 +175,9 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
|
|
|
182
175
|
def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> Dict[str, Any]:
|
|
183
176
|
"""Create MLX format manifest."""
|
|
184
177
|
|
|
178
|
+
# Load existing manifest to merge MLX files if it exists
|
|
179
|
+
existing_manifest = load_nexa_manifest(directory_path)
|
|
180
|
+
|
|
185
181
|
model_files = {}
|
|
186
182
|
extra_files = []
|
|
187
183
|
|
|
@@ -250,9 +246,9 @@ def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old
|
|
|
250
246
|
},
|
|
251
247
|
"ExtraFiles": extra_files if extra_files else None,
|
|
252
248
|
# Preserve old metadata fields
|
|
253
|
-
"pipeline_tag": old_metadata.get('pipeline_tag'),
|
|
254
|
-
"download_time": old_metadata.get('download_time'),
|
|
255
|
-
"avatar_url": old_metadata.get('avatar_url')
|
|
249
|
+
"pipeline_tag": old_metadata.get('pipeline_tag') if old_metadata.get('pipeline_tag') else existing_manifest.get('pipeline_tag'),
|
|
250
|
+
"download_time": old_metadata.get('download_time') if old_metadata.get('download_time') else existing_manifest.get('download_time'),
|
|
251
|
+
"avatar_url": old_metadata.get('avatar_url') if old_metadata.get('avatar_url') else existing_manifest.get('avatar_url')
|
|
256
252
|
}
|
|
257
253
|
|
|
258
254
|
return manifest
|
nexaai/utils/model_manager.py
CHANGED
|
@@ -11,7 +11,6 @@ from huggingface_hub import HfApi
|
|
|
11
11
|
from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
|
|
12
12
|
|
|
13
13
|
from .progress_tracker import CustomProgressTqdm, DownloadProgressTracker
|
|
14
|
-
from .avatar_fetcher import get_avatar_url_for_repo
|
|
15
14
|
from .manifest_utils import (
|
|
16
15
|
load_download_metadata,
|
|
17
16
|
save_download_metadata,
|
|
@@ -790,7 +789,7 @@ class HuggingFaceDownloader:
|
|
|
790
789
|
# If no expected size, just check that file is not empty
|
|
791
790
|
return os.path.getsize(file_path) > 0
|
|
792
791
|
|
|
793
|
-
def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> None:
|
|
792
|
+
def _fetch_and_save_metadata(self, repo_id: str, local_dir: str, is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
|
|
794
793
|
"""Fetch model info and save metadata after successful download."""
|
|
795
794
|
# Initialize metadata with defaults to ensure manifest is always created
|
|
796
795
|
old_metadata = {
|
|
@@ -809,14 +808,9 @@ class HuggingFaceDownloader:
|
|
|
809
808
|
# Log the error but continue with manifest creation
|
|
810
809
|
print(f"Warning: Could not fetch model info for {repo_id}: {e}")
|
|
811
810
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
if avatar_url:
|
|
816
|
-
old_metadata['avatar_url'] = avatar_url
|
|
817
|
-
except Exception as e:
|
|
818
|
-
# Log the error but continue with manifest creation
|
|
819
|
-
print(f"Warning: Could not fetch avatar URL for {repo_id}: {e}")
|
|
811
|
+
# Use input avater url if provided
|
|
812
|
+
old_metadata['avatar_url'] = kwargs.get('avatar_url')
|
|
813
|
+
|
|
820
814
|
|
|
821
815
|
# CRITICAL: Always create the manifest file, regardless of metadata fetch failures
|
|
822
816
|
try:
|
|
@@ -850,7 +844,8 @@ class HuggingFaceDownloader:
|
|
|
850
844
|
file_name: str,
|
|
851
845
|
local_dir: str,
|
|
852
846
|
progress_tracker: Optional[DownloadProgressTracker],
|
|
853
|
-
force_download: bool = False
|
|
847
|
+
force_download: bool = False,
|
|
848
|
+
**kwargs
|
|
854
849
|
) -> str:
|
|
855
850
|
"""Download a single file from the repository using HuggingFace Hub API."""
|
|
856
851
|
# Create repo-specific directory for the single file
|
|
@@ -882,7 +877,7 @@ class HuggingFaceDownloader:
|
|
|
882
877
|
progress_tracker.stop_tracking()
|
|
883
878
|
|
|
884
879
|
# Save metadata after successful download
|
|
885
|
-
self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name)
|
|
880
|
+
self._fetch_and_save_metadata(repo_id, file_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
|
|
886
881
|
|
|
887
882
|
return downloaded_path
|
|
888
883
|
|
|
@@ -901,7 +896,8 @@ class HuggingFaceDownloader:
|
|
|
901
896
|
repo_id: str,
|
|
902
897
|
local_dir: str,
|
|
903
898
|
progress_tracker: Optional[DownloadProgressTracker],
|
|
904
|
-
force_download: bool = False
|
|
899
|
+
force_download: bool = False,
|
|
900
|
+
**kwargs
|
|
905
901
|
) -> str:
|
|
906
902
|
"""Download the entire repository."""
|
|
907
903
|
# Create a subdirectory for this specific repo
|
|
@@ -927,7 +923,7 @@ class HuggingFaceDownloader:
|
|
|
927
923
|
progress_tracker.stop_tracking()
|
|
928
924
|
|
|
929
925
|
# Save metadata after successful download
|
|
930
|
-
self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name)
|
|
926
|
+
self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
|
|
931
927
|
|
|
932
928
|
return downloaded_path
|
|
933
929
|
|
|
@@ -944,7 +940,8 @@ class HuggingFaceDownloader:
|
|
|
944
940
|
file_names: List[str],
|
|
945
941
|
local_dir: str,
|
|
946
942
|
progress_tracker: Optional[DownloadProgressTracker],
|
|
947
|
-
force_download: bool = False
|
|
943
|
+
force_download: bool = False,
|
|
944
|
+
**kwargs
|
|
948
945
|
) -> str:
|
|
949
946
|
"""Download multiple specific files from HuggingFace Hub."""
|
|
950
947
|
# Create repo-specific directory
|
|
@@ -989,7 +986,7 @@ class HuggingFaceDownloader:
|
|
|
989
986
|
progress_tracker.stop_tracking()
|
|
990
987
|
|
|
991
988
|
# Save metadata after successful download
|
|
992
|
-
self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name)
|
|
989
|
+
self._fetch_and_save_metadata(repo_id, repo_local_dir, self._current_is_mmproj, self._current_file_name, **kwargs)
|
|
993
990
|
|
|
994
991
|
return repo_local_dir
|
|
995
992
|
|
|
@@ -1015,7 +1012,8 @@ class HuggingFaceDownloader:
|
|
|
1015
1012
|
progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
|
1016
1013
|
show_progress: bool = True,
|
|
1017
1014
|
force_download: bool = False,
|
|
1018
|
-
is_mmproj: bool = False
|
|
1015
|
+
is_mmproj: bool = False,
|
|
1016
|
+
**kwargs
|
|
1019
1017
|
) -> str:
|
|
1020
1018
|
"""
|
|
1021
1019
|
Main download method that handles all download scenarios.
|
|
@@ -1062,13 +1060,13 @@ class HuggingFaceDownloader:
|
|
|
1062
1060
|
if file_name is None:
|
|
1063
1061
|
# Download entire repository
|
|
1064
1062
|
return self._download_entire_repository(
|
|
1065
|
-
repo_id, local_dir, progress_tracker, force_download
|
|
1063
|
+
repo_id, local_dir, progress_tracker, force_download, **kwargs
|
|
1066
1064
|
)
|
|
1067
1065
|
elif isinstance(file_name, str):
|
|
1068
1066
|
# Download specific single file
|
|
1069
1067
|
self._validate_file_exists_in_repo(file_name, info, repo_id, progress_tracker)
|
|
1070
1068
|
return self._download_single_file(
|
|
1071
|
-
repo_id, file_name, local_dir, progress_tracker, force_download
|
|
1069
|
+
repo_id, file_name, local_dir, progress_tracker, force_download, **kwargs
|
|
1072
1070
|
)
|
|
1073
1071
|
else: # file_name is a list
|
|
1074
1072
|
# Download multiple specific files
|
|
@@ -1077,7 +1075,7 @@ class HuggingFaceDownloader:
|
|
|
1077
1075
|
self._validate_file_exists_in_repo(fname, info, repo_id, progress_tracker)
|
|
1078
1076
|
|
|
1079
1077
|
return self._download_multiple_files_from_hf(
|
|
1080
|
-
repo_id, file_name, local_dir, progress_tracker, force_download
|
|
1078
|
+
repo_id, file_name, local_dir, progress_tracker, force_download, **kwargs
|
|
1081
1079
|
)
|
|
1082
1080
|
|
|
1083
1081
|
except Exception as e:
|
|
@@ -1107,7 +1105,8 @@ def download_from_huggingface(
|
|
|
1107
1105
|
token: Union[bool, str, None] = None,
|
|
1108
1106
|
custom_endpoint: Optional[str] = None,
|
|
1109
1107
|
force_download: bool = False,
|
|
1110
|
-
is_mmproj: Optional[bool] = None
|
|
1108
|
+
is_mmproj: Optional[bool] = None,
|
|
1109
|
+
**kwargs
|
|
1111
1110
|
) -> str:
|
|
1112
1111
|
"""
|
|
1113
1112
|
Download models or files from HuggingFace Hub or custom mirror endpoints.
|
|
@@ -1197,7 +1196,8 @@ def download_from_huggingface(
|
|
|
1197
1196
|
progress_callback=progress_callback,
|
|
1198
1197
|
show_progress=show_progress,
|
|
1199
1198
|
force_download=force_download,
|
|
1200
|
-
is_mmproj=is_mmproj
|
|
1199
|
+
is_mmproj=is_mmproj,
|
|
1200
|
+
**kwargs
|
|
1201
1201
|
)
|
|
1202
1202
|
|
|
1203
1203
|
|
|
@@ -1211,7 +1211,8 @@ def _download_model_if_needed(
|
|
|
1211
1211
|
param_name: str,
|
|
1212
1212
|
progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
|
1213
1213
|
token: Union[bool, str, None] = None,
|
|
1214
|
-
is_mmproj: bool = False
|
|
1214
|
+
is_mmproj: bool = False,
|
|
1215
|
+
**kwargs
|
|
1215
1216
|
) -> str:
|
|
1216
1217
|
"""
|
|
1217
1218
|
Helper function to download a model from HuggingFace if it doesn't exist locally.
|
|
@@ -1247,7 +1248,8 @@ def _download_model_if_needed(
|
|
|
1247
1248
|
progress_callback=progress_callback,
|
|
1248
1249
|
show_progress=True,
|
|
1249
1250
|
token=token,
|
|
1250
|
-
is_mmproj=is_mmproj
|
|
1251
|
+
is_mmproj=is_mmproj,
|
|
1252
|
+
**kwargs
|
|
1251
1253
|
)
|
|
1252
1254
|
|
|
1253
1255
|
return downloaded_path
|
|
@@ -1320,7 +1322,7 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1320
1322
|
if name_or_path is not None:
|
|
1321
1323
|
try:
|
|
1322
1324
|
downloaded_name_path = _download_model_if_needed(
|
|
1323
|
-
name_or_path, 'name_or_path', progress_callback, token
|
|
1325
|
+
name_or_path, 'name_or_path', progress_callback, token, **kwargs
|
|
1324
1326
|
)
|
|
1325
1327
|
|
|
1326
1328
|
# Replace name_or_path with downloaded path
|
|
@@ -1338,7 +1340,7 @@ def auto_download_model(func: Callable) -> Callable:
|
|
|
1338
1340
|
if mmproj_path is not None:
|
|
1339
1341
|
try:
|
|
1340
1342
|
downloaded_mmproj_path = _download_model_if_needed(
|
|
1341
|
-
mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True
|
|
1343
|
+
mmproj_path, 'mmproj_path', progress_callback, token, is_mmproj=True, **kwargs
|
|
1342
1344
|
)
|
|
1343
1345
|
|
|
1344
1346
|
# Replace mmproj_path with downloaded path
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
nexaai/__init__.py,sha256=L8oB7GFZZMGnUpCg0PecDbI_ycKuQak-ZEJ4Y12_QIw,2184
|
|
2
|
-
nexaai/_stub.cpython-310-darwin.so,sha256=
|
|
3
|
-
nexaai/_version.py,sha256=
|
|
2
|
+
nexaai/_stub.cpython-310-darwin.so,sha256=HjqUYc8SyajzyySZk1eBJdO7Rc_db2F-kS3KdPSPB5o,49832
|
|
3
|
+
nexaai/_version.py,sha256=eaXF_gF6uNVz9AglXCAwIyseTDCCAGEhr3CCnSfr3tY,139
|
|
4
4
|
nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
|
|
5
5
|
nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
|
|
6
6
|
nexaai/common.py,sha256=Y0NJNLTi4Nq4x1WL6PQsSvGUto0eGmWhjpsC6jcekfA,3444
|
|
@@ -17,9 +17,9 @@ nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
17
17
|
nexaai/asr_impl/mlx_asr_impl.py,sha256=eosd8-TIWAOwV0HltmoFrLwzXHcU4jyxtncvuZE9pgA,3257
|
|
18
18
|
nexaai/asr_impl/pybind_asr_impl.py,sha256=pE9Hb_hMi5yAc4MF83bLVOb8zDtreCkB3_u7XED9YpA,1516
|
|
19
19
|
nexaai/binds/__init__.py,sha256=eYuay_8DDXeOUWz2_R9HFSabohxs6hvZn391t2L0Po0,104
|
|
20
|
-
nexaai/binds/common_bind.cpython-310-darwin.so,sha256=
|
|
20
|
+
nexaai/binds/common_bind.cpython-310-darwin.so,sha256=BoXByRlNGDaNS1YyZyCF-s7h0vXP9NLPlJMQQ5pqusU,235488
|
|
21
21
|
nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=b2NoXFAJvPLi_P1X7lXLKmAUU0v2HJI3Zwa10gfqHdw,202032
|
|
22
|
-
nexaai/binds/libnexa_bridge.dylib,sha256=
|
|
22
|
+
nexaai/binds/libnexa_bridge.dylib,sha256=e6uFx8ENEdCWk8whKyoVvX-e9-Bk_35kqIDV3kRDuXU,250408
|
|
23
23
|
nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=p1ZTGMolEkWywkmwzOUjTr3RpSEH21BHZAggVzo89Ks,183088
|
|
24
24
|
nexaai/binds/vlm_bind.cpython-310-darwin.so,sha256=LGd-tykePnQFfGca25HnPIBfXsfrMzbwyx6d5Ld3xps,183000
|
|
25
25
|
nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=GyOkHOM-5uHp7NUZ4Sr9BWak6BYpcc9aqI9A-zPnQp4,629528
|
|
@@ -246,8 +246,8 @@ nexaai/mlx_backend/tts/__init__.py,sha256=fuT_9_xpYJ28m4yjly5L2jChUrzlSQz-b_S7nu
|
|
|
246
246
|
nexaai/mlx_backend/tts/interface.py,sha256=0FvZbIyOvg8jERZEQ6bygbv7v02O9xHO4-TPUlar0b4,9568
|
|
247
247
|
nexaai/mlx_backend/vlm/__init__.py,sha256=_25kvMEviX16Hg3bro8Ws70V0eeIEqYKV8ZDXqYzKew,73
|
|
248
248
|
nexaai/mlx_backend/vlm/generate.py,sha256=DqHFEAuqk-nko8ho6U9GAXTDAWz4d8GTe_hCt-XFyCw,19071
|
|
249
|
-
nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=
|
|
250
|
-
nexaai/mlx_backend/vlm/interface.py,sha256=
|
|
249
|
+
nexaai/mlx_backend/vlm/generate_qwen3_vl.py,sha256=eeizW18u6dHPZOOnJtQUJkiqMAIIpOSS-IOjacXGsz4,10240
|
|
250
|
+
nexaai/mlx_backend/vlm/interface.py,sha256=HOPzWNMs6QaHO6x0Z83kW1xkRRmb8_xo6xQLKsOWqAo,19013
|
|
251
251
|
nexaai/mlx_backend/vlm/main.py,sha256=nPcg25jupeDD74uvRoxpWp3Dsulw7WddI7vll6zejak,10664
|
|
252
252
|
nexaai/mlx_backend/vlm/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
253
253
|
nexaai/mlx_backend/vlm/modeling/convert.py,sha256=ia5i9cgTufFGmKyhkYUaW0nfNqT_bMo8i-Hg_zy5JC4,1863
|
|
@@ -362,7 +362,7 @@ nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py,sha256=LArnNtI98B_GJO
|
|
|
362
362
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
363
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py,sha256=4RlZwgz8YX2ngmJNaymxFFpw9hJu-0EMw9xwXpngW9o,3496
|
|
364
364
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py,sha256=NMOB6x-RT6svF4H-Ymo5WqnP7ptAal3aaKjWZXWGMsM,17671
|
|
365
|
-
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=
|
|
365
|
+
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py,sha256=bchCpnlewysWQss5TQKxdKPXYd5VA7ySUDfRt8Xj_H4,26677
|
|
366
366
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py,sha256=ty0dA3SsEUFtFbHo16tKdnKymrNKKsUO3KMYapMajbY,8704
|
|
367
367
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py,sha256=8SEeVwgjuvaYy-4ALAU0RHQMuRr2k7EkXba_csxk498,10673
|
|
368
368
|
nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py,sha256=Gqanx4hBDcon_k5ClhUsS4YpMbZNiee8jvImGS9h43s,13229
|
|
@@ -378,17 +378,16 @@ nexaai/rerank_impl/pybind_rerank_impl.py,sha256=CtwkG7YrW58GPMDERJSnISGTVCXWNju5
|
|
|
378
378
|
nexaai/tts_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
379
379
|
nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJi8,3101
|
|
380
380
|
nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
|
|
381
|
-
nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
|
|
382
381
|
nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
|
|
383
|
-
nexaai/utils/manifest_utils.py,sha256=
|
|
384
|
-
nexaai/utils/model_manager.py,sha256=
|
|
382
|
+
nexaai/utils/manifest_utils.py,sha256=PA84obFP7W1dlneURlIHIzJjWIF5dbDHGdNeHouUy68,12659
|
|
383
|
+
nexaai/utils/model_manager.py,sha256=_WKJP7YVk7q587OoOWwDNWVR-8tbKZkmHKjcCZN8Q4M,55979
|
|
385
384
|
nexaai/utils/model_types.py,sha256=-DER8L4lAUR_iLS99F0r57avwqWtuN21ug5pX2p24_E,1369
|
|
386
385
|
nexaai/utils/progress_tracker.py,sha256=jdUqtmPqyhwC9uSKvQcJEYETwSt-OhP4oitdJ94614o,15394
|
|
387
386
|
nexaai/utils/quantization_utils.py,sha256=FYcNSAKGlBqFDUTx3jSKOr2lnq4nyiyC0ZG8oSxFwiU,7825
|
|
388
387
|
nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
389
388
|
nexaai/vlm_impl/mlx_vlm_impl.py,sha256=pLtWm_ckz8a0U-AtAOMVseFDO4OVPvHyYO2KlfBaGYk,10833
|
|
390
389
|
nexaai/vlm_impl/pybind_vlm_impl.py,sha256=FAbhpRJzHgI78r0mUvKybO97R1szvNhH0aTn_I52oT4,8597
|
|
391
|
-
nexaai-1.0.
|
|
392
|
-
nexaai-1.0.
|
|
393
|
-
nexaai-1.0.
|
|
394
|
-
nexaai-1.0.
|
|
390
|
+
nexaai-1.0.17.dist-info/METADATA,sha256=BMYxa8SkZYJx_zRraC8kS32fkBpXFsrKthZBJxISykc,1198
|
|
391
|
+
nexaai-1.0.17.dist-info/WHEEL,sha256=0KYp5feZ1CMUhsfFXKpSQTbSmQbXy4mv6yPPVBXg2EM,110
|
|
392
|
+
nexaai-1.0.17.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
|
|
393
|
+
nexaai-1.0.17.dist-info/RECORD,,
|
nexaai/utils/avatar_fetcher.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
"""Utility for fetching avatar URLs from HuggingFace."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Dict, Optional
|
|
5
|
-
import httpx
|
|
6
|
-
|
|
7
|
-
logger = logging.getLogger(__name__)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def fetch_avatar_urls_from_hf_api(query: str, custom_endpoint: Optional[str] = None) -> Dict[str, str]:
|
|
11
|
-
"""
|
|
12
|
-
Fetch avatar URLs from HuggingFace models-json endpoint.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
query: Search query to fetch models for
|
|
16
|
-
custom_endpoint: Optional custom HuggingFace endpoint
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
Dictionary mapping author names to avatar URLs
|
|
20
|
-
"""
|
|
21
|
-
avatar_map = {}
|
|
22
|
-
try:
|
|
23
|
-
# Use the base URL from the configured endpoint
|
|
24
|
-
base_url = custom_endpoint if custom_endpoint else "https://huggingface.co"
|
|
25
|
-
|
|
26
|
-
# Build the URL with query parameter
|
|
27
|
-
url = f"{base_url}/models-json?sort=trending&search={query}&withCount=true"
|
|
28
|
-
|
|
29
|
-
# Make the HTTP request with a timeout
|
|
30
|
-
with httpx.Client(timeout=2.0) as client:
|
|
31
|
-
response = client.get(url)
|
|
32
|
-
|
|
33
|
-
if response.status_code == 200:
|
|
34
|
-
data = response.json()
|
|
35
|
-
models = data.get("models", [])
|
|
36
|
-
|
|
37
|
-
# Build a map of author names to avatar URLs
|
|
38
|
-
for model in models:
|
|
39
|
-
author = model.get("author")
|
|
40
|
-
author_data = model.get("authorData", {})
|
|
41
|
-
avatar_url = author_data.get("avatarUrl")
|
|
42
|
-
|
|
43
|
-
if author and avatar_url:
|
|
44
|
-
# Handle relative URLs by prepending appropriate base URL
|
|
45
|
-
if avatar_url.startswith("/"):
|
|
46
|
-
avatar_url = f"{base_url}{avatar_url}"
|
|
47
|
-
avatar_map[author] = avatar_url
|
|
48
|
-
|
|
49
|
-
logger.debug(f"Fetched {len(avatar_map)} avatar URLs from HuggingFace API")
|
|
50
|
-
else:
|
|
51
|
-
logger.warning(f"Failed to fetch avatar URLs: HTTP {response.status_code}")
|
|
52
|
-
|
|
53
|
-
except Exception as e:
|
|
54
|
-
logger.warning(f"Error fetching avatar URLs from HuggingFace API: {e}")
|
|
55
|
-
# Return empty map on error - we'll fall back to default behavior
|
|
56
|
-
|
|
57
|
-
return avatar_map
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def get_avatar_url_for_repo(repo_id: str, search_query: Optional[str] = None,
|
|
61
|
-
custom_endpoint: Optional[str] = None) -> Optional[str]:
|
|
62
|
-
"""
|
|
63
|
-
Get avatar URL for a repository ID.
|
|
64
|
-
|
|
65
|
-
This method tries multiple strategies:
|
|
66
|
-
1. If search_query is provided, fetch from HuggingFace API with that query
|
|
67
|
-
2. Try fetching with the full repo_id as query
|
|
68
|
-
3. Try fetching with just the organization name as query
|
|
69
|
-
4. Fall back to CDN URL pattern
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
repo_id: Repository ID in format "owner/repo"
|
|
73
|
-
search_query: Optional search query to use for fetching avatars
|
|
74
|
-
custom_endpoint: Optional custom HuggingFace endpoint
|
|
75
|
-
|
|
76
|
-
Returns:
|
|
77
|
-
Avatar URL or None if not found
|
|
78
|
-
"""
|
|
79
|
-
if "/" not in repo_id:
|
|
80
|
-
return None
|
|
81
|
-
|
|
82
|
-
org_name = repo_id.split("/")[0]
|
|
83
|
-
|
|
84
|
-
# Try with search query if provided
|
|
85
|
-
if search_query:
|
|
86
|
-
avatar_map = fetch_avatar_urls_from_hf_api(search_query, custom_endpoint)
|
|
87
|
-
avatar_url = avatar_map.get(org_name)
|
|
88
|
-
if avatar_url:
|
|
89
|
-
return avatar_url
|
|
90
|
-
|
|
91
|
-
# Try with full repo_id
|
|
92
|
-
avatar_map = fetch_avatar_urls_from_hf_api(repo_id, custom_endpoint)
|
|
93
|
-
avatar_url = avatar_map.get(org_name)
|
|
94
|
-
if avatar_url:
|
|
95
|
-
return avatar_url
|
|
96
|
-
|
|
97
|
-
# Try with just organization name
|
|
98
|
-
avatar_map = fetch_avatar_urls_from_hf_api(org_name, custom_endpoint)
|
|
99
|
-
avatar_url = avatar_map.get(org_name)
|
|
100
|
-
if avatar_url:
|
|
101
|
-
return avatar_url
|
|
102
|
-
|
|
103
|
-
# Fallback to CDN URL pattern
|
|
104
|
-
return f"https://cdn-thumbnails.huggingface.co/social-thumbnails/{org_name}.png"
|
|
File without changes
|
|
File without changes
|