nexaai 1.0.11rc2__cp310-cp310-win_amd64.whl → 1.0.12__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/_stub.cp310-win_amd64.pyd +0 -0
- nexaai/_version.py +1 -1
- nexaai/binds/common_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/embedder_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/llm_bind.cp310-win_amd64.pyd +0 -0
- nexaai/binds/nexa_bridge.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-base.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-cpu.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-cuda.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml-vulkan.dll +0 -0
- nexaai/binds/nexa_llama_cpp/ggml.dll +0 -0
- nexaai/binds/nexa_llama_cpp/llama.dll +0 -0
- nexaai/binds/nexa_llama_cpp/mtmd.dll +0 -0
- nexaai/binds/nexa_llama_cpp/nexa_plugin.dll +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +6 -5
- nexaai/utils/manifest_utils.py +280 -0
- nexaai/utils/model_manager.py +103 -87
- nexaai/utils/model_types.py +47 -0
- nexaai/utils/progress_tracker.py +12 -8
- nexaai/utils/quantization_utils.py +239 -0
- {nexaai-1.0.11rc2.dist-info → nexaai-1.0.12.dist-info}/METADATA +2 -1
- {nexaai-1.0.11rc2.dist-info → nexaai-1.0.12.dist-info}/RECORD +24 -21
- {nexaai-1.0.11rc2.dist-info → nexaai-1.0.12.dist-info}/WHEEL +0 -0
- {nexaai-1.0.11rc2.dist-info → nexaai-1.0.12.dist-info}/top_level.txt +0 -0
nexaai/_stub.cp310-win_amd64.pyd
CHANGED
|
Binary file
|
nexaai/_version.py
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
nexaai/binds/nexa_bridge.dll
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
|
3
3
|
|
|
4
4
|
from nexaai.common import PluginID
|
|
5
5
|
from nexaai.embedder import Embedder, EmbeddingConfig
|
|
6
|
-
from nexaai.mlx_backend.embedding.interface import
|
|
6
|
+
from nexaai.mlx_backend.embedding.interface import create_embedder
|
|
7
7
|
from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
|
|
8
8
|
|
|
9
9
|
|
|
@@ -27,11 +27,12 @@ class MLXEmbedderImpl(Embedder):
|
|
|
27
27
|
MLXEmbedderImpl instance
|
|
28
28
|
"""
|
|
29
29
|
try:
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
# Create instance and load MLX embedder
|
|
30
|
+
# Create instance
|
|
33
31
|
instance = cls()
|
|
34
|
-
|
|
32
|
+
|
|
33
|
+
# Use the factory function to create the appropriate embedder based on model type
|
|
34
|
+
# This will automatically detect if it's JinaV2 or generic model and route correctly
|
|
35
|
+
instance._mlx_embedder = create_embedder(
|
|
35
36
|
model_path=model_path,
|
|
36
37
|
tokenizer_path=tokenizer_file
|
|
37
38
|
)
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Manifest and metadata utilities for handling nexa.manifest files and model metadata.
|
|
3
|
+
|
|
4
|
+
This module provides utilities to:
|
|
5
|
+
- Load and save nexa.manifest files
|
|
6
|
+
- Create GGUF and MLX manifests
|
|
7
|
+
- Process manifest metadata (handle null fields, fetch avatars, etc.)
|
|
8
|
+
- Manage backward compatibility with old download_metadata.json files
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Dict, Any, List, Optional
|
|
15
|
+
|
|
16
|
+
from .quantization_utils import (
|
|
17
|
+
extract_quantization_from_filename,
|
|
18
|
+
detect_quantization_for_mlx
|
|
19
|
+
)
|
|
20
|
+
from .model_types import (
|
|
21
|
+
PIPELINE_TO_MODEL_TYPE,
|
|
22
|
+
MODEL_TYPE_TO_PIPELINE
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def process_manifest_metadata(manifest: Dict[str, Any], repo_id: str) -> Dict[str, Any]:
|
|
27
|
+
"""Process manifest metadata to handle null/missing fields."""
|
|
28
|
+
# Handle pipeline_tag
|
|
29
|
+
pipeline_tag = manifest.get('pipeline_tag')
|
|
30
|
+
if not pipeline_tag:
|
|
31
|
+
# Reverse map from ModelType if available
|
|
32
|
+
model_type = manifest.get('ModelType')
|
|
33
|
+
pipeline_tag = MODEL_TYPE_TO_PIPELINE.get(model_type) if model_type else None
|
|
34
|
+
|
|
35
|
+
# Handle download_time - keep as null if missing
|
|
36
|
+
download_time = manifest.get('download_time')
|
|
37
|
+
|
|
38
|
+
# Handle avatar_url - fetch on-the-fly if missing/null
|
|
39
|
+
avatar_url = manifest.get('avatar_url')
|
|
40
|
+
if not avatar_url:
|
|
41
|
+
try:
|
|
42
|
+
from .avatar_fetcher import get_avatar_url_for_repo
|
|
43
|
+
avatar_url = get_avatar_url_for_repo(repo_id)
|
|
44
|
+
except Exception:
|
|
45
|
+
# If fetching fails, leave as None
|
|
46
|
+
avatar_url = None
|
|
47
|
+
|
|
48
|
+
# Return processed metadata
|
|
49
|
+
processed_manifest = manifest.copy()
|
|
50
|
+
processed_manifest.update({
|
|
51
|
+
'pipeline_tag': pipeline_tag,
|
|
52
|
+
'download_time': download_time,
|
|
53
|
+
'avatar_url': avatar_url
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
return processed_manifest
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_nexa_manifest(directory_path: str) -> Dict[str, Any]:
|
|
60
|
+
"""Load manifest from nexa.manifest if it exists."""
|
|
61
|
+
manifest_path = os.path.join(directory_path, 'nexa.manifest')
|
|
62
|
+
if os.path.exists(manifest_path):
|
|
63
|
+
try:
|
|
64
|
+
with open(manifest_path, 'r', encoding='utf-8') as f:
|
|
65
|
+
return json.load(f)
|
|
66
|
+
except (json.JSONDecodeError, IOError):
|
|
67
|
+
pass
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def load_download_metadata(directory_path: str, repo_id: Optional[str] = None) -> Dict[str, Any]:
|
|
72
|
+
"""Load download metadata from nexa.manifest if it exists, fallback to old format."""
|
|
73
|
+
# First try to load from new manifest format
|
|
74
|
+
manifest = load_nexa_manifest(directory_path)
|
|
75
|
+
if manifest and repo_id:
|
|
76
|
+
# Process the manifest to handle null/missing fields
|
|
77
|
+
return process_manifest_metadata(manifest, repo_id)
|
|
78
|
+
elif manifest:
|
|
79
|
+
# Return manifest as-is if no repo_id provided (for backward compatibility)
|
|
80
|
+
return manifest
|
|
81
|
+
|
|
82
|
+
# Fallback to old format for backward compatibility
|
|
83
|
+
old_metadata_path = os.path.join(directory_path, 'download_metadata.json')
|
|
84
|
+
if os.path.exists(old_metadata_path):
|
|
85
|
+
try:
|
|
86
|
+
with open(old_metadata_path, 'r', encoding='utf-8') as f:
|
|
87
|
+
return json.load(f)
|
|
88
|
+
except (json.JSONDecodeError, IOError):
|
|
89
|
+
pass
|
|
90
|
+
return {}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def save_download_metadata(directory_path: str, metadata: Dict[str, Any]) -> None:
|
|
94
|
+
"""Save download metadata to nexa.manifest in the new format."""
|
|
95
|
+
manifest_path = os.path.join(directory_path, 'nexa.manifest')
|
|
96
|
+
try:
|
|
97
|
+
with open(manifest_path, 'w', encoding='utf-8') as f:
|
|
98
|
+
json.dump(metadata, f, indent=2)
|
|
99
|
+
except IOError:
|
|
100
|
+
# If we can't save metadata, don't fail the download
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
105
|
+
"""Create GGUF format manifest."""
|
|
106
|
+
|
|
107
|
+
# Load existing manifest to merge GGUF files if it exists
|
|
108
|
+
existing_manifest = load_nexa_manifest(directory_path)
|
|
109
|
+
|
|
110
|
+
model_files = {}
|
|
111
|
+
if existing_manifest and "ModelFile" in existing_manifest:
|
|
112
|
+
model_files = existing_manifest["ModelFile"].copy()
|
|
113
|
+
|
|
114
|
+
# Process GGUF files
|
|
115
|
+
for file_name in files:
|
|
116
|
+
if file_name.endswith('.gguf'):
|
|
117
|
+
# Use the new enum-based quantization extraction
|
|
118
|
+
quantization_type = extract_quantization_from_filename(file_name)
|
|
119
|
+
quant_level = quantization_type.value if quantization_type else "UNKNOWN"
|
|
120
|
+
|
|
121
|
+
file_path = os.path.join(directory_path, file_name)
|
|
122
|
+
file_size = 0
|
|
123
|
+
if os.path.exists(file_path):
|
|
124
|
+
try:
|
|
125
|
+
file_size = os.path.getsize(file_path)
|
|
126
|
+
except (OSError, IOError):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
model_files[quant_level] = {
|
|
130
|
+
"Name": file_name,
|
|
131
|
+
"Downloaded": True,
|
|
132
|
+
"Size": file_size
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
manifest = {
|
|
136
|
+
"Name": repo_id,
|
|
137
|
+
"ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
|
|
138
|
+
"PluginId": "llama_cpp",
|
|
139
|
+
"ModelFile": model_files,
|
|
140
|
+
"MMProjFile": {
|
|
141
|
+
"Name": "",
|
|
142
|
+
"Downloaded": False,
|
|
143
|
+
"Size": 0
|
|
144
|
+
},
|
|
145
|
+
"TokenizerFile": {
|
|
146
|
+
"Name": "",
|
|
147
|
+
"Downloaded": False,
|
|
148
|
+
"Size": 0
|
|
149
|
+
},
|
|
150
|
+
"ExtraFiles": None,
|
|
151
|
+
# Preserve old metadata fields
|
|
152
|
+
"pipeline_tag": old_metadata.get('pipeline_tag'),
|
|
153
|
+
"download_time": old_metadata.get('download_time'),
|
|
154
|
+
"avatar_url": old_metadata.get('avatar_url')
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return manifest
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
161
|
+
"""Create MLX format manifest."""
|
|
162
|
+
|
|
163
|
+
model_files = {}
|
|
164
|
+
extra_files = []
|
|
165
|
+
|
|
166
|
+
# Try different methods to extract quantization for MLX models
|
|
167
|
+
quantization_type = detect_quantization_for_mlx(repo_id, directory_path)
|
|
168
|
+
|
|
169
|
+
# Use the detected quantization or default to "DEFAULT"
|
|
170
|
+
quant_level = quantization_type.value if quantization_type else "DEFAULT"
|
|
171
|
+
|
|
172
|
+
for file_name in files:
|
|
173
|
+
file_path = os.path.join(directory_path, file_name)
|
|
174
|
+
file_size = 0
|
|
175
|
+
if os.path.exists(file_path):
|
|
176
|
+
try:
|
|
177
|
+
file_size = os.path.getsize(file_path)
|
|
178
|
+
except (OSError, IOError):
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
# Check if this is a main model file (safetensors but not index files)
|
|
182
|
+
if (file_name.endswith('.safetensors') and not file_name.endswith('.index.json')):
|
|
183
|
+
model_files[quant_level] = {
|
|
184
|
+
"Name": file_name,
|
|
185
|
+
"Downloaded": True,
|
|
186
|
+
"Size": file_size
|
|
187
|
+
}
|
|
188
|
+
else:
|
|
189
|
+
# Add to extra files
|
|
190
|
+
extra_files.append({
|
|
191
|
+
"Name": file_name,
|
|
192
|
+
"Downloaded": True,
|
|
193
|
+
"Size": file_size
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
manifest = {
|
|
197
|
+
"Name": repo_id,
|
|
198
|
+
"ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
|
|
199
|
+
"PluginId": "mlx",
|
|
200
|
+
"ModelFile": model_files,
|
|
201
|
+
"MMProjFile": {
|
|
202
|
+
"Name": "",
|
|
203
|
+
"Downloaded": False,
|
|
204
|
+
"Size": 0
|
|
205
|
+
},
|
|
206
|
+
"TokenizerFile": {
|
|
207
|
+
"Name": "",
|
|
208
|
+
"Downloaded": False,
|
|
209
|
+
"Size": 0
|
|
210
|
+
},
|
|
211
|
+
"ExtraFiles": extra_files if extra_files else None,
|
|
212
|
+
# Preserve old metadata fields
|
|
213
|
+
"pipeline_tag": old_metadata.get('pipeline_tag'),
|
|
214
|
+
"download_time": old_metadata.get('download_time'),
|
|
215
|
+
"avatar_url": old_metadata.get('avatar_url')
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return manifest
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def detect_model_type(files: List[str]) -> str:
|
|
222
|
+
"""Detect if this is a GGUF or MLX model based on file extensions."""
|
|
223
|
+
has_gguf = any(f.endswith('.gguf') for f in files)
|
|
224
|
+
has_safetensors = any(f.endswith('.safetensors') or 'safetensors' in f for f in files)
|
|
225
|
+
|
|
226
|
+
if has_gguf:
|
|
227
|
+
return "gguf"
|
|
228
|
+
elif has_safetensors:
|
|
229
|
+
return "mlx"
|
|
230
|
+
else:
|
|
231
|
+
# Default to mlx for other types
|
|
232
|
+
return "mlx"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def create_manifest_from_files(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
236
|
+
"""
|
|
237
|
+
Create appropriate manifest format based on detected model type.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
repo_id: Repository ID
|
|
241
|
+
files: List of files in the model directory
|
|
242
|
+
directory_path: Path to the model directory
|
|
243
|
+
old_metadata: Existing metadata (pipeline_tag, download_time, avatar_url)
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Dict containing the appropriate manifest format
|
|
247
|
+
"""
|
|
248
|
+
model_type = detect_model_type(files)
|
|
249
|
+
|
|
250
|
+
if model_type == "gguf":
|
|
251
|
+
return create_gguf_manifest(repo_id, files, directory_path, old_metadata)
|
|
252
|
+
else: # mlx or other
|
|
253
|
+
return create_mlx_manifest(repo_id, files, directory_path, old_metadata)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata: Dict[str, Any]) -> None:
|
|
257
|
+
"""
|
|
258
|
+
Create and save manifest based on files found in the directory.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
repo_id: Repository ID
|
|
262
|
+
local_dir: Local directory containing the model files
|
|
263
|
+
old_metadata: Existing metadata to preserve
|
|
264
|
+
"""
|
|
265
|
+
# Get list of files in the directory
|
|
266
|
+
files = []
|
|
267
|
+
try:
|
|
268
|
+
for root, dirs, filenames in os.walk(local_dir):
|
|
269
|
+
for filename in filenames:
|
|
270
|
+
# Store relative path from the directory
|
|
271
|
+
rel_path = os.path.relpath(os.path.join(root, filename), local_dir)
|
|
272
|
+
files.append(rel_path)
|
|
273
|
+
except (OSError, IOError):
|
|
274
|
+
pass
|
|
275
|
+
|
|
276
|
+
# Create appropriate manifest
|
|
277
|
+
manifest = create_manifest_from_files(repo_id, files, local_dir, old_metadata)
|
|
278
|
+
|
|
279
|
+
# Save manifest
|
|
280
|
+
save_download_metadata(local_dir, manifest)
|
nexaai/utils/model_manager.py
CHANGED
|
@@ -5,17 +5,22 @@ from datetime import datetime
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import Optional, Callable, Dict, Any, List, Union
|
|
7
7
|
import functools
|
|
8
|
+
from enum import Enum
|
|
8
9
|
from tqdm.auto import tqdm
|
|
9
10
|
from huggingface_hub import HfApi
|
|
10
11
|
from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
|
|
11
12
|
|
|
12
13
|
from .progress_tracker import CustomProgressTqdm, DownloadProgressTracker
|
|
13
14
|
from .avatar_fetcher import get_avatar_url_for_repo
|
|
15
|
+
from .manifest_utils import (
|
|
16
|
+
load_download_metadata,
|
|
17
|
+
save_download_metadata,
|
|
18
|
+
save_manifest_with_files_metadata,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
# Default path for model storage
|
|
16
22
|
DEFAULT_MODEL_SAVING_PATH = "~/.cache/nexa.ai/nexa_sdk/models/"
|
|
17
23
|
|
|
18
|
-
|
|
19
24
|
@dataclass
|
|
20
25
|
class DownloadedModel:
|
|
21
26
|
"""Data class representing a downloaded model with all its metadata."""
|
|
@@ -88,30 +93,6 @@ def _check_for_incomplete_downloads(directory_path: str) -> bool:
|
|
|
88
93
|
# If we can't access the directory, assume download is complete
|
|
89
94
|
return True
|
|
90
95
|
|
|
91
|
-
|
|
92
|
-
def _load_download_metadata(directory_path: str) -> Dict[str, Any]:
|
|
93
|
-
"""Load download metadata from download_metadata.json if it exists."""
|
|
94
|
-
metadata_path = os.path.join(directory_path, 'download_metadata.json')
|
|
95
|
-
if os.path.exists(metadata_path):
|
|
96
|
-
try:
|
|
97
|
-
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
98
|
-
return json.load(f)
|
|
99
|
-
except (json.JSONDecodeError, IOError):
|
|
100
|
-
pass
|
|
101
|
-
return {}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def _save_download_metadata(directory_path: str, metadata: Dict[str, Any]) -> None:
|
|
105
|
-
"""Save download metadata to download_metadata.json."""
|
|
106
|
-
metadata_path = os.path.join(directory_path, 'download_metadata.json')
|
|
107
|
-
try:
|
|
108
|
-
with open(metadata_path, 'w', encoding='utf-8') as f:
|
|
109
|
-
json.dump(metadata, f, indent=2)
|
|
110
|
-
except IOError:
|
|
111
|
-
# If we can't save metadata, don't fail the download
|
|
112
|
-
pass
|
|
113
|
-
|
|
114
|
-
|
|
115
96
|
def _get_directory_size_and_files(directory_path: str) -> tuple[int, List[str]]:
|
|
116
97
|
"""Get total size and list of files in a directory."""
|
|
117
98
|
total_size = 0
|
|
@@ -137,6 +118,13 @@ def _get_directory_size_and_files(directory_path: str) -> tuple[int, List[str]]:
|
|
|
137
118
|
return total_size, files
|
|
138
119
|
|
|
139
120
|
|
|
121
|
+
def _has_valid_metadata(directory_path: str) -> bool:
|
|
122
|
+
"""Check if directory has either nexa.manifest or download_metadata.json (for backward compatibility)."""
|
|
123
|
+
manifest_path = os.path.join(directory_path, 'nexa.manifest')
|
|
124
|
+
old_metadata_path = os.path.join(directory_path, 'download_metadata.json')
|
|
125
|
+
return os.path.exists(manifest_path) or os.path.exists(old_metadata_path)
|
|
126
|
+
|
|
127
|
+
|
|
140
128
|
def _scan_for_repo_folders(base_path: str) -> List[DownloadedModel]:
|
|
141
129
|
"""Scan a directory for repository folders and return model information."""
|
|
142
130
|
models = []
|
|
@@ -162,24 +150,27 @@ def _scan_for_repo_folders(base_path: str) -> List[DownloadedModel]:
|
|
|
162
150
|
if os.path.isdir(subitem_path):
|
|
163
151
|
has_subdirs = True
|
|
164
152
|
# This looks like owner/repo structure
|
|
165
|
-
|
|
166
|
-
if
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
repo_id=f"{item}/{subitem}"
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
153
|
+
# Only include if nexa.manifest or download_metadata.json exists (backward compatibility)
|
|
154
|
+
if _has_valid_metadata(subitem_path):
|
|
155
|
+
size_bytes, files = _get_directory_size_and_files(subitem_path)
|
|
156
|
+
if files: # Only include if there are files
|
|
157
|
+
# Check if the download is complete
|
|
158
|
+
download_complete = _check_for_incomplete_downloads(subitem_path)
|
|
159
|
+
# Load metadata if it exists
|
|
160
|
+
repo_id = f"{item}/{subitem}"
|
|
161
|
+
metadata = load_download_metadata(subitem_path, repo_id)
|
|
162
|
+
models.append(DownloadedModel(
|
|
163
|
+
repo_id=repo_id,
|
|
164
|
+
files=files,
|
|
165
|
+
folder_type='owner_repo',
|
|
166
|
+
local_path=subitem_path,
|
|
167
|
+
size_bytes=size_bytes,
|
|
168
|
+
file_count=len(files),
|
|
169
|
+
full_repo_download_complete=download_complete,
|
|
170
|
+
pipeline_tag=metadata.get('pipeline_tag'),
|
|
171
|
+
download_time=metadata.get('download_time'),
|
|
172
|
+
avatar_url=metadata.get('avatar_url')
|
|
173
|
+
))
|
|
183
174
|
else:
|
|
184
175
|
direct_files.append(subitem)
|
|
185
176
|
except (OSError, IOError):
|
|
@@ -188,24 +179,27 @@ def _scan_for_repo_folders(base_path: str) -> List[DownloadedModel]:
|
|
|
188
179
|
|
|
189
180
|
# Direct repo folder (no owner structure)
|
|
190
181
|
if not has_subdirs and direct_files:
|
|
191
|
-
|
|
192
|
-
if
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
repo_id=item
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
182
|
+
# Only include if nexa.manifest or download_metadata.json exists (backward compatibility)
|
|
183
|
+
if _has_valid_metadata(item_path):
|
|
184
|
+
size_bytes, files = _get_directory_size_and_files(item_path)
|
|
185
|
+
if files: # Only include if there are files
|
|
186
|
+
# Check if the download is complete
|
|
187
|
+
download_complete = _check_for_incomplete_downloads(item_path)
|
|
188
|
+
# Load metadata if it exists
|
|
189
|
+
repo_id = item
|
|
190
|
+
metadata = load_download_metadata(item_path, repo_id)
|
|
191
|
+
models.append(DownloadedModel(
|
|
192
|
+
repo_id=repo_id,
|
|
193
|
+
files=files,
|
|
194
|
+
folder_type='direct_repo',
|
|
195
|
+
local_path=item_path,
|
|
196
|
+
size_bytes=size_bytes,
|
|
197
|
+
file_count=len(files),
|
|
198
|
+
full_repo_download_complete=download_complete,
|
|
199
|
+
pipeline_tag=metadata.get('pipeline_tag'),
|
|
200
|
+
download_time=metadata.get('download_time'),
|
|
201
|
+
avatar_url=metadata.get('avatar_url')
|
|
202
|
+
))
|
|
209
203
|
|
|
210
204
|
except (OSError, IOError):
|
|
211
205
|
# Skip if base path can't be accessed
|
|
@@ -735,27 +729,57 @@ class HuggingFaceDownloader:
|
|
|
735
729
|
|
|
736
730
|
def _fetch_and_save_metadata(self, repo_id: str, local_dir: str) -> None:
|
|
737
731
|
"""Fetch model info and save metadata after successful download."""
|
|
732
|
+
# Initialize metadata with defaults to ensure manifest is always created
|
|
733
|
+
old_metadata = {
|
|
734
|
+
'pipeline_tag': "text-generation", # Default to text-generation pipeline-tag
|
|
735
|
+
'download_time': datetime.now().isoformat(),
|
|
736
|
+
'avatar_url': None
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
# Try to fetch additional metadata, but don't let failures prevent manifest creation
|
|
738
740
|
try:
|
|
739
741
|
# Fetch model info to get pipeline_tag
|
|
740
742
|
info = self.api.model_info(repo_id, token=self.token)
|
|
741
|
-
|
|
742
|
-
|
|
743
|
+
if hasattr(info, 'pipeline_tag') and info.pipeline_tag:
|
|
744
|
+
old_metadata['pipeline_tag'] = info.pipeline_tag
|
|
745
|
+
except Exception as e:
|
|
746
|
+
# Log the error but continue with manifest creation
|
|
747
|
+
print(f"Warning: Could not fetch model info for {repo_id}: {e}")
|
|
748
|
+
|
|
749
|
+
try:
|
|
743
750
|
# Get avatar URL
|
|
744
751
|
avatar_url = get_avatar_url_for_repo(repo_id, custom_endpoint=self.endpoint)
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
752
|
+
if avatar_url:
|
|
753
|
+
old_metadata['avatar_url'] = avatar_url
|
|
754
|
+
except Exception as e:
|
|
755
|
+
# Log the error but continue with manifest creation
|
|
756
|
+
print(f"Warning: Could not fetch avatar URL for {repo_id}: {e}")
|
|
757
|
+
|
|
758
|
+
# CRITICAL: Always create the manifest file, regardless of metadata fetch failures
|
|
759
|
+
try:
|
|
760
|
+
save_manifest_with_files_metadata(repo_id, local_dir, old_metadata)
|
|
761
|
+
print(f"[OK] Successfully created nexa.manifest for {repo_id}")
|
|
762
|
+
except Exception as e:
|
|
763
|
+
# This is critical - if manifest creation fails, we should know about it
|
|
764
|
+
print(f"ERROR: Failed to create nexa.manifest for {repo_id}: {e}")
|
|
765
|
+
# Try a fallback approach - create a minimal manifest
|
|
766
|
+
try:
|
|
767
|
+
minimal_manifest = {
|
|
768
|
+
"Name": repo_id,
|
|
769
|
+
"ModelType": "other",
|
|
770
|
+
"PluginId": "unknown",
|
|
771
|
+
"ModelFile": {},
|
|
772
|
+
"MMProjFile": {"Name": "", "Downloaded": False, "Size": 0},
|
|
773
|
+
"TokenizerFile": {"Name": "", "Downloaded": False, "Size": 0},
|
|
774
|
+
"ExtraFiles": None,
|
|
775
|
+
"pipeline_tag": old_metadata.get('pipeline_tag'),
|
|
776
|
+
"download_time": old_metadata.get('download_time'),
|
|
777
|
+
"avatar_url": old_metadata.get('avatar_url')
|
|
778
|
+
}
|
|
779
|
+
save_download_metadata(local_dir, minimal_manifest)
|
|
780
|
+
print(f"[OK] Created minimal nexa.manifest for {repo_id} as fallback")
|
|
781
|
+
except Exception as fallback_error:
|
|
782
|
+
print(f"CRITICAL ERROR: Could not create even minimal manifest for {repo_id}: {fallback_error}")
|
|
759
783
|
|
|
760
784
|
def _download_single_file(
|
|
761
785
|
self,
|
|
@@ -772,7 +796,7 @@ class HuggingFaceDownloader:
|
|
|
772
796
|
# Check if file already exists
|
|
773
797
|
local_file_path = os.path.join(file_local_dir, file_name)
|
|
774
798
|
if not force_download and self._check_file_exists_and_valid(local_file_path):
|
|
775
|
-
print(f"
|
|
799
|
+
print(f"[SKIP] File already exists: {file_name}")
|
|
776
800
|
# Stop progress tracking
|
|
777
801
|
if progress_tracker:
|
|
778
802
|
progress_tracker.stop_tracking()
|
|
@@ -820,14 +844,6 @@ class HuggingFaceDownloader:
|
|
|
820
844
|
# Create a subdirectory for this specific repo
|
|
821
845
|
repo_local_dir = self._create_repo_directory(local_dir, repo_id)
|
|
822
846
|
|
|
823
|
-
# Check if repository already exists (basic check for directory existence)
|
|
824
|
-
if not force_download and os.path.exists(repo_local_dir) and os.listdir(repo_local_dir):
|
|
825
|
-
print(f"✓ Repository already exists, skipping: {repo_id}")
|
|
826
|
-
# Stop progress tracking
|
|
827
|
-
if progress_tracker:
|
|
828
|
-
progress_tracker.stop_tracking()
|
|
829
|
-
return repo_local_dir
|
|
830
|
-
|
|
831
847
|
try:
|
|
832
848
|
download_kwargs = {
|
|
833
849
|
'repo_id': repo_id,
|
|
@@ -887,7 +903,7 @@ class HuggingFaceDownloader:
|
|
|
887
903
|
# Check if file already exists
|
|
888
904
|
local_file_path = os.path.join(repo_local_dir, file_name)
|
|
889
905
|
if not force_download and self._check_file_exists_and_valid(local_file_path):
|
|
890
|
-
print(f"
|
|
906
|
+
print(f"[SKIP] File already exists: {file_name}")
|
|
891
907
|
overall_progress.update(1)
|
|
892
908
|
continue
|
|
893
909
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model type mappings for HuggingFace pipeline tags to our internal model types.
|
|
3
|
+
|
|
4
|
+
This module provides centralized model type mapping functionality to avoid
|
|
5
|
+
circular imports between other utility modules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Dict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ModelTypeMapping(Enum):
|
|
13
|
+
"""Enum for mapping HuggingFace pipeline_tag to our ModelType."""
|
|
14
|
+
TEXT_GENERATION = ("text-generation", "llm")
|
|
15
|
+
IMAGE_TEXT_TO_TEXT = ("image-text-to-text", "vlm")
|
|
16
|
+
|
|
17
|
+
def __init__(self, pipeline_tag: str, model_type: str):
|
|
18
|
+
self.pipeline_tag = pipeline_tag
|
|
19
|
+
self.model_type = model_type
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Create mapping dictionaries from the enum
|
|
23
|
+
PIPELINE_TO_MODEL_TYPE: Dict[str, str] = {
|
|
24
|
+
mapping.pipeline_tag: mapping.model_type
|
|
25
|
+
for mapping in ModelTypeMapping
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
MODEL_TYPE_TO_PIPELINE: Dict[str, str] = {
|
|
29
|
+
mapping.model_type: mapping.pipeline_tag
|
|
30
|
+
for mapping in ModelTypeMapping
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def map_pipeline_tag_to_model_type(pipeline_tag: str) -> str:
|
|
35
|
+
"""Map HuggingFace pipeline_tag to our ModelType."""
|
|
36
|
+
if not pipeline_tag:
|
|
37
|
+
return "other"
|
|
38
|
+
|
|
39
|
+
return PIPELINE_TO_MODEL_TYPE.get(pipeline_tag, "other")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def map_model_type_to_pipeline_tag(model_type: str) -> str:
|
|
43
|
+
"""Reverse map ModelType back to HuggingFace pipeline_tag."""
|
|
44
|
+
if not model_type:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
return MODEL_TYPE_TO_PIPELINE.get(model_type)
|
nexaai/utils/progress_tracker.py
CHANGED
|
@@ -107,7 +107,7 @@ class DownloadProgressTracker:
|
|
|
107
107
|
time_diff = current_time - self.last_time
|
|
108
108
|
|
|
109
109
|
# Only calculate if we have a meaningful time difference (avoid division by very small numbers)
|
|
110
|
-
if time_diff > 0.
|
|
110
|
+
if time_diff > 0.1: # At least 100ms between measurements
|
|
111
111
|
bytes_diff = current_downloaded - self.last_downloaded
|
|
112
112
|
|
|
113
113
|
# Only calculate speed if bytes actually changed
|
|
@@ -118,6 +118,14 @@ class DownloadProgressTracker:
|
|
|
118
118
|
self.speed_history.append(speed)
|
|
119
119
|
if len(self.speed_history) > self.max_speed_history:
|
|
120
120
|
self.speed_history.pop(0)
|
|
121
|
+
|
|
122
|
+
# Update tracking variables when we actually calculate speed
|
|
123
|
+
self.last_downloaded = current_downloaded
|
|
124
|
+
self.last_time = current_time
|
|
125
|
+
else:
|
|
126
|
+
# First measurement - initialize tracking variables
|
|
127
|
+
self.last_downloaded = current_downloaded
|
|
128
|
+
self.last_time = current_time
|
|
121
129
|
|
|
122
130
|
# Return the average of historical speeds if we have any
|
|
123
131
|
# This ensures we show the last known speed even when skipping updates
|
|
@@ -157,13 +165,9 @@ class DownloadProgressTracker:
|
|
|
157
165
|
total_file_sizes += data['total']
|
|
158
166
|
active_file_count += 1
|
|
159
167
|
|
|
160
|
-
# Calculate speed
|
|
168
|
+
# Calculate speed (tracking variables are updated internally)
|
|
161
169
|
speed = self.calculate_speed(total_downloaded)
|
|
162
170
|
|
|
163
|
-
# Update tracking variables
|
|
164
|
-
self.last_downloaded = total_downloaded
|
|
165
|
-
self.last_time = time.time()
|
|
166
|
-
|
|
167
171
|
# Determine total size - prioritize pre-fetched repo size, then aggregate file sizes
|
|
168
172
|
if self.total_repo_size > 0:
|
|
169
173
|
# Use pre-fetched repository info if available
|
|
@@ -245,11 +249,11 @@ class DownloadProgressTracker:
|
|
|
245
249
|
if known_total and total_size_raw > 0:
|
|
246
250
|
# Known total size - show actual progress
|
|
247
251
|
filled_width = int(bar_width * min(percentage, 100) / 100)
|
|
248
|
-
bar = '
|
|
252
|
+
bar = '#' * filled_width + '-' * (bar_width - filled_width)
|
|
249
253
|
else:
|
|
250
254
|
# Unknown total size - show animated progress
|
|
251
255
|
animation_pos = int(time.time() * 2) % bar_width
|
|
252
|
-
bar = '
|
|
256
|
+
bar = '-' * animation_pos + '#' + '-' * (bar_width - animation_pos - 1)
|
|
253
257
|
|
|
254
258
|
# Format the progress line
|
|
255
259
|
status = progress_data.get('status', 'unknown')
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quantization utilities for extracting quantization types from model files and configurations.
|
|
3
|
+
|
|
4
|
+
This module provides utilities to extract quantization information from:
|
|
5
|
+
- GGUF model filenames
|
|
6
|
+
- MLX model repository IDs
|
|
7
|
+
- MLX model config.json files
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
import logging
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
# Set up logger
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class QuantizationType(str, Enum):
|
|
22
|
+
"""Enum for GGUF and MLX model quantization types."""
|
|
23
|
+
# GGUF quantization types
|
|
24
|
+
BF16 = "BF16"
|
|
25
|
+
F16 = "F16"
|
|
26
|
+
Q2_K = "Q2_K"
|
|
27
|
+
Q2_K_L = "Q2_K_L"
|
|
28
|
+
Q3_K_M = "Q3_K_M"
|
|
29
|
+
Q3_K_S = "Q3_K_S"
|
|
30
|
+
Q4_0 = "Q4_0"
|
|
31
|
+
Q4_1 = "Q4_1"
|
|
32
|
+
Q4_K_M = "Q4_K_M"
|
|
33
|
+
Q4_K_S = "Q4_K_S"
|
|
34
|
+
Q5_K_M = "Q5_K_M"
|
|
35
|
+
Q5_K_S = "Q5_K_S"
|
|
36
|
+
Q6_K = "Q6_K"
|
|
37
|
+
Q8_0 = "Q8_0"
|
|
38
|
+
MXFP4 = "MXFP4"
|
|
39
|
+
MXFP8 = "MXFP8"
|
|
40
|
+
|
|
41
|
+
# MLX bit-based quantization types
|
|
42
|
+
BIT_1 = "1BIT"
|
|
43
|
+
BIT_2 = "2BIT"
|
|
44
|
+
BIT_3 = "3BIT"
|
|
45
|
+
BIT_4 = "4BIT"
|
|
46
|
+
BIT_5 = "5BIT"
|
|
47
|
+
BIT_6 = "6BIT"
|
|
48
|
+
BIT_7 = "7BIT"
|
|
49
|
+
BIT_8 = "8BIT"
|
|
50
|
+
BIT_16 = "16BIT"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
|
|
54
|
+
"""
|
|
55
|
+
Extract quantization type from filename.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
filename: The filename to extract quantization from
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
QuantizationType enum value or None if not found
|
|
62
|
+
"""
|
|
63
|
+
# Define mapping from lowercase patterns to enum values
|
|
64
|
+
# Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
|
|
65
|
+
pattern_to_enum = {
|
|
66
|
+
'bf16.': QuantizationType.BF16,
|
|
67
|
+
'f16.': QuantizationType.F16, # Add F16 support
|
|
68
|
+
'q2_k_l.': QuantizationType.Q2_K_L, # Check Q2_K_L before Q2_K to avoid partial match
|
|
69
|
+
'q2_k.': QuantizationType.Q2_K,
|
|
70
|
+
'q3_k_m.': QuantizationType.Q3_K_M,
|
|
71
|
+
'q3_ks.': QuantizationType.Q3_K_S,
|
|
72
|
+
'q4_k_m.': QuantizationType.Q4_K_M,
|
|
73
|
+
'q4_k_s.': QuantizationType.Q4_K_S,
|
|
74
|
+
'q4_0.': QuantizationType.Q4_0,
|
|
75
|
+
'q4_1.': QuantizationType.Q4_1,
|
|
76
|
+
'q5_k_m.': QuantizationType.Q5_K_M,
|
|
77
|
+
'q5_k_s.': QuantizationType.Q5_K_S,
|
|
78
|
+
'q6_k.': QuantizationType.Q6_K,
|
|
79
|
+
'q8_0.': QuantizationType.Q8_0,
|
|
80
|
+
'mxfp4.': QuantizationType.MXFP4,
|
|
81
|
+
'mxfp8.': QuantizationType.MXFP8,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
filename_lower = filename.lower()
|
|
85
|
+
|
|
86
|
+
# Check longer patterns first to avoid partial matches
|
|
87
|
+
# Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
|
|
88
|
+
for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
|
|
89
|
+
if pattern in filename_lower:
|
|
90
|
+
return pattern_to_enum[pattern]
|
|
91
|
+
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
|
|
96
|
+
"""
|
|
97
|
+
Extract quantization type from repo_id for MLX models by looking for bit patterns.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
repo_id: The repository ID to extract quantization from
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
QuantizationType enum value or None if not found
|
|
104
|
+
"""
|
|
105
|
+
# Define mapping from bit numbers to enum values
|
|
106
|
+
bit_to_enum = {
|
|
107
|
+
1: QuantizationType.BIT_1,
|
|
108
|
+
2: QuantizationType.BIT_2,
|
|
109
|
+
3: QuantizationType.BIT_3,
|
|
110
|
+
4: QuantizationType.BIT_4,
|
|
111
|
+
5: QuantizationType.BIT_5,
|
|
112
|
+
6: QuantizationType.BIT_6,
|
|
113
|
+
7: QuantizationType.BIT_7,
|
|
114
|
+
8: QuantizationType.BIT_8,
|
|
115
|
+
16: QuantizationType.BIT_16,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# First check for patterns like "4bit", "8bit" etc. (case insensitive)
|
|
119
|
+
pattern = r'(\d+)bit'
|
|
120
|
+
matches = re.findall(pattern, repo_id.lower())
|
|
121
|
+
|
|
122
|
+
for match in matches:
|
|
123
|
+
try:
|
|
124
|
+
bit_number = int(match)
|
|
125
|
+
if bit_number in bit_to_enum:
|
|
126
|
+
logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
|
|
127
|
+
return bit_to_enum[bit_number]
|
|
128
|
+
except ValueError:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
# Also check for patterns like "-q8", "_Q4" etc.
|
|
132
|
+
q_pattern = r'[-_]q(\d+)'
|
|
133
|
+
q_matches = re.findall(q_pattern, repo_id.lower())
|
|
134
|
+
|
|
135
|
+
for match in q_matches:
|
|
136
|
+
try:
|
|
137
|
+
bit_number = int(match)
|
|
138
|
+
if bit_number in bit_to_enum:
|
|
139
|
+
logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
|
|
140
|
+
return bit_to_enum[bit_number]
|
|
141
|
+
except ValueError:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
|
|
148
|
+
"""
|
|
149
|
+
Extract quantization type from MLX model's config.json file.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
mlx_folder_path: Path to the MLX model folder
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
QuantizationType enum value or None if not found
|
|
156
|
+
"""
|
|
157
|
+
config_path = os.path.join(mlx_folder_path, "config.json")
|
|
158
|
+
|
|
159
|
+
if not os.path.exists(config_path):
|
|
160
|
+
logger.debug(f"Config file not found: {config_path}")
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
with open(config_path, 'r', encoding='utf-8') as f:
|
|
165
|
+
config = json.load(f)
|
|
166
|
+
|
|
167
|
+
# Look for quantization.bits field
|
|
168
|
+
quantization_config = config.get("quantization", {})
|
|
169
|
+
if isinstance(quantization_config, dict):
|
|
170
|
+
bits = quantization_config.get("bits")
|
|
171
|
+
if isinstance(bits, int):
|
|
172
|
+
# Define mapping from bit numbers to enum values
|
|
173
|
+
bit_to_enum = {
|
|
174
|
+
1: QuantizationType.BIT_1,
|
|
175
|
+
2: QuantizationType.BIT_2,
|
|
176
|
+
3: QuantizationType.BIT_3,
|
|
177
|
+
4: QuantizationType.BIT_4,
|
|
178
|
+
5: QuantizationType.BIT_5,
|
|
179
|
+
6: QuantizationType.BIT_6,
|
|
180
|
+
7: QuantizationType.BIT_7,
|
|
181
|
+
8: QuantizationType.BIT_8,
|
|
182
|
+
16: QuantizationType.BIT_16,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if bits in bit_to_enum:
|
|
186
|
+
logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
|
|
187
|
+
return bit_to_enum[bits]
|
|
188
|
+
else:
|
|
189
|
+
logger.debug(f"Unsupported quantization bits value: {bits}")
|
|
190
|
+
|
|
191
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
192
|
+
logger.warning(f"Error reading config.json from {config_path}: {e}")
|
|
193
|
+
except Exception as e:
|
|
194
|
+
logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
|
|
195
|
+
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def extract_gguf_quantization(filename: str) -> str:
|
|
200
|
+
"""
|
|
201
|
+
Extract quantization level from GGUF filename using the enum-based approach.
|
|
202
|
+
|
|
203
|
+
This function provides backward compatibility by returning a string representation
|
|
204
|
+
of the quantization type.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
filename: The GGUF filename
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
String representation of the quantization type or "UNKNOWN" if not found
|
|
211
|
+
"""
|
|
212
|
+
quantization_type = extract_quantization_from_filename(filename)
|
|
213
|
+
if quantization_type:
|
|
214
|
+
return quantization_type.value
|
|
215
|
+
return "UNKNOWN"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
|
|
219
|
+
"""
|
|
220
|
+
Detect quantization for MLX models using multiple methods in priority order.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
repo_id: The repository ID
|
|
224
|
+
directory_path: Path to the model directory
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
QuantizationType enum value or None if not found
|
|
228
|
+
"""
|
|
229
|
+
# Method 1: Extract from repo_id
|
|
230
|
+
quantization_type = extract_quantization_from_repo_id(repo_id)
|
|
231
|
+
if quantization_type:
|
|
232
|
+
return quantization_type
|
|
233
|
+
|
|
234
|
+
# Method 2: Extract from config.json if available
|
|
235
|
+
quantization_type = extract_quantization_from_mlx_config(directory_path)
|
|
236
|
+
if quantization_type:
|
|
237
|
+
return quantization_type
|
|
238
|
+
|
|
239
|
+
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nexaai
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.12
|
|
4
4
|
Summary: Python bindings for NexaSDK C-lib backend
|
|
5
5
|
Author-email: "Nexa AI, Inc." <dev@nexa.ai>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
|
|
@@ -21,6 +21,7 @@ Provides-Extra: mlx
|
|
|
21
21
|
Requires-Dist: mlx; extra == "mlx"
|
|
22
22
|
Requires-Dist: mlx-lm; extra == "mlx"
|
|
23
23
|
Requires-Dist: mlx-vlm; extra == "mlx"
|
|
24
|
+
Requires-Dist: mlx-embeddings; extra == "mlx"
|
|
24
25
|
Requires-Dist: tokenizers; extra == "mlx"
|
|
25
26
|
Requires-Dist: safetensors; extra == "mlx"
|
|
26
27
|
Requires-Dist: Pillow; extra == "mlx"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
nexaai/__init__.py,sha256=Lt8NU57eTMtWrDYzpFeYR9XtGAPXqizynP83TPU0UW0,2105
|
|
2
|
-
nexaai/_stub.cp310-win_amd64.pyd,sha256=
|
|
3
|
-
nexaai/_version.py,sha256=
|
|
2
|
+
nexaai/_stub.cp310-win_amd64.pyd,sha256=IdNlwep8Q0SYw_ZdPF5oGomULfRedkRNPyctAKhrRwA,10752
|
|
3
|
+
nexaai/_version.py,sha256=yxquMAwAvnQQmroVYQPhPGvJQg9Ps8HuIYi5CUjVn_c,143
|
|
4
4
|
nexaai/asr.py,sha256=_fsGaxpiU137bUtO5ujtFSYCI1RLsyeEm3Gf4GhHVRk,2118
|
|
5
5
|
nexaai/base.py,sha256=qQBCiQVNzgpkQjZX9aiFDEdbAAe56TROKC3WnWra2Zg,1021
|
|
6
6
|
nexaai/common.py,sha256=6keIpdX5XS5us4z79EMoa6RSkVze9SbbXax13IJ9yvs,3525
|
|
@@ -16,25 +16,25 @@ nexaai/asr_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
16
16
|
nexaai/asr_impl/mlx_asr_impl.py,sha256=XwMX3LYMeulp8cDS0TCCYcjvttFHAyDWQ_oMvABwQmI,3349
|
|
17
17
|
nexaai/asr_impl/pybind_asr_impl.py,sha256=20o5SOPzhF9x41ra8L_qIM7YxCkYeLb5csSrNde-dds,1560
|
|
18
18
|
nexaai/binds/__init__.py,sha256=tYvy0pFhoY29GstDT5r-oRiPRarPLECvJAkcamJItOg,83
|
|
19
|
-
nexaai/binds/common_bind.cp310-win_amd64.pyd,sha256=
|
|
20
|
-
nexaai/binds/embedder_bind.cp310-win_amd64.pyd,sha256=
|
|
19
|
+
nexaai/binds/common_bind.cp310-win_amd64.pyd,sha256=QNAr1rBQdtT0FPVTr_rRDkOmh93pbYs0RTL0YQqPHzU,201216
|
|
20
|
+
nexaai/binds/embedder_bind.cp310-win_amd64.pyd,sha256=VcXpRPR_a5kMIXh4sPgDi3IQZFR3pjCQOFBc_Veyjm8,182784
|
|
21
21
|
nexaai/binds/libcrypto-3-x64.dll,sha256=-Lau6pL5DpDXzpg9MED63gCeL8oRrSLI_e2LeaxIHqk,7314432
|
|
22
22
|
nexaai/binds/libssl-3-x64.dll,sha256=Tzzyu5jRpUugFxr_65hbFlAtFpjxIDpOYMU1E0ijkJw,1313792
|
|
23
|
-
nexaai/binds/llm_bind.cp310-win_amd64.pyd,sha256=
|
|
24
|
-
nexaai/binds/nexa_bridge.dll,sha256=
|
|
25
|
-
nexaai/binds/nexa_llama_cpp/ggml-base.dll,sha256=
|
|
26
|
-
nexaai/binds/nexa_llama_cpp/ggml-cpu.dll,sha256=
|
|
27
|
-
nexaai/binds/nexa_llama_cpp/ggml-cuda.dll,sha256=
|
|
28
|
-
nexaai/binds/nexa_llama_cpp/ggml-vulkan.dll,sha256=
|
|
29
|
-
nexaai/binds/nexa_llama_cpp/ggml.dll,sha256=
|
|
30
|
-
nexaai/binds/nexa_llama_cpp/llama.dll,sha256=
|
|
31
|
-
nexaai/binds/nexa_llama_cpp/mtmd.dll,sha256=
|
|
32
|
-
nexaai/binds/nexa_llama_cpp/nexa_plugin.dll,sha256=
|
|
23
|
+
nexaai/binds/llm_bind.cp310-win_amd64.pyd,sha256=WLrGVjs_MgGWdLEs93Eds52AALORlC3QdCpDReIAKDg,162816
|
|
24
|
+
nexaai/binds/nexa_bridge.dll,sha256=b5qv-GPNnyX_sDvsfjEwJe5ECoiyLLXO1ct8Lqy63Ww,168448
|
|
25
|
+
nexaai/binds/nexa_llama_cpp/ggml-base.dll,sha256=6mI5Zl12Wr6m9Q_bLkWMuTfdOvCROkWXY0sWCi1ca2E,532480
|
|
26
|
+
nexaai/binds/nexa_llama_cpp/ggml-cpu.dll,sha256=ajopRU4nUW4h0d-TGE_0eaPa1z_Kud4AUoAyFFBuliM,672768
|
|
27
|
+
nexaai/binds/nexa_llama_cpp/ggml-cuda.dll,sha256=9zCrk-YgktsjRnyN4jAFUgSm7JAC2kL2jiJCwxHM0a8,313528832
|
|
28
|
+
nexaai/binds/nexa_llama_cpp/ggml-vulkan.dll,sha256=YIAvKw37UgoeW7QLYQw8Wu79KT4hoVEkHQQ3J2XVg_E,36627456
|
|
29
|
+
nexaai/binds/nexa_llama_cpp/ggml.dll,sha256=PXrc4zXNBtJeJ5gDd5YrtWSdqdYtTIZscCxpg5qtFrA,66560
|
|
30
|
+
nexaai/binds/nexa_llama_cpp/llama.dll,sha256=OLXdI0L2YXufxvTtLz-Y9Y0PcoMInG9xeajw6sLlC9M,1611776
|
|
31
|
+
nexaai/binds/nexa_llama_cpp/mtmd.dll,sha256=WtIuZBtoQH81ZOS6dVkFHxb59DCdx8cqq8ChGO5kpyc,561152
|
|
32
|
+
nexaai/binds/nexa_llama_cpp/nexa_plugin.dll,sha256=9CkDYa5nlOTFussLXjf9llEfKT72wVL0d6APginbVjA,1405440
|
|
33
33
|
nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
nexaai/cv_impl/mlx_cv_impl.py,sha256=QLd_8w90gtxH8kmssaDYatCTRvQNIJuUGKZNnYrmx6E,3317
|
|
35
35
|
nexaai/cv_impl/pybind_cv_impl.py,sha256=aSOCAxmHrwJbEkSN6VX3Cykqlj_9RIpVrZXILul04GA,1096
|
|
36
36
|
nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
nexaai/embedder_impl/mlx_embedder_impl.py,sha256=
|
|
37
|
+
nexaai/embedder_impl/mlx_embedder_impl.py,sha256=Kzd-veLNl95FbI2oEJMtr6qKbjtPDDajzsGUVjJfTRA,4598
|
|
38
38
|
nexaai/embedder_impl/pybind_embedder_impl.py,sha256=FoLsUrzF5cNtEsSFchPlapkdqLGFOUGNPx0Kc8hdCvA,3589
|
|
39
39
|
nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=peUE9ue9ApaPlZVOICBWiHtd13sY40OWQbE8EjfIUMU,11511
|
|
@@ -50,12 +50,15 @@ nexaai/tts_impl/mlx_tts_impl.py,sha256=LcH9bVdIl3Q6lOzSUB_X2s-_nWFmlCl1yL7XSUK0f
|
|
|
50
50
|
nexaai/tts_impl/pybind_tts_impl.py,sha256=n3z4zmPQayQJgAwcvETw0IBUCp8IYROuYFSg0tAy_8Y,1487
|
|
51
51
|
nexaai/utils/avatar_fetcher.py,sha256=D01f8je-37Nd68zGw8MYK2m7y3fvGlC6h0KR-aN9kdU,3925
|
|
52
52
|
nexaai/utils/decode.py,sha256=0Z9jDH4ICzw4YXj8nD4L-sMouDaev-TISGRQ4KzidWE,421
|
|
53
|
-
nexaai/utils/
|
|
54
|
-
nexaai/utils/
|
|
53
|
+
nexaai/utils/manifest_utils.py,sha256=zMgQpf5dAgF2RjGhk73zBggxRDGMRKDGxh2a8m8kmYg,10045
|
|
54
|
+
nexaai/utils/model_manager.py,sha256=TmaT1fFculHgfAdutpNXP4d07HIhEMPVTfPvFKE-zR0,51480
|
|
55
|
+
nexaai/utils/model_types.py,sha256=arIyb9q-1uG0nyUGdWZaxxDJAxv0cfnJEpjCzyELL5Q,1416
|
|
56
|
+
nexaai/utils/progress_tracker.py,sha256=BztrFqtjwNUmeREwZ5m7H6ZcrVzQEbpZfsxndWh4z0A,15778
|
|
57
|
+
nexaai/utils/quantization_utils.py,sha256=jjQaz7K4qH6TdP8Tnv5Ktb2viz8BaVBSOrb_jm3ns28,7889
|
|
55
58
|
nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
59
|
nexaai/vlm_impl/mlx_vlm_impl.py,sha256=oY_qb9z_iF0zArBuY5CCYIvZcA3R0i_NKXrr_r-QSgg,10989
|
|
57
60
|
nexaai/vlm_impl/pybind_vlm_impl.py,sha256=Hu8g8OXyPn8OzLQOpRSE5lfGmhjChiKj7fMRB8mC_cI,9147
|
|
58
|
-
nexaai-1.0.
|
|
59
|
-
nexaai-1.0.
|
|
60
|
-
nexaai-1.0.
|
|
61
|
-
nexaai-1.0.
|
|
61
|
+
nexaai-1.0.12.dist-info/METADATA,sha256=pxXRrrQ9Lu_whvoxLFGiHKpG7PzT8JUBMH8F6F6Ar0k,1230
|
|
62
|
+
nexaai-1.0.12.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
|
|
63
|
+
nexaai-1.0.12.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
|
|
64
|
+
nexaai-1.0.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|