nexaai 1.0.6rc1__cp310-cp310-macosx_14_0_universal2.whl → 1.0.7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

@@ -0,0 +1,280 @@
1
+ """
2
+ Manifest and metadata utilities for handling nexa.manifest files and model metadata.
3
+
4
+ This module provides utilities to:
5
+ - Load and save nexa.manifest files
6
+ - Create GGUF and MLX manifests
7
+ - Process manifest metadata (handle null fields, fetch avatars, etc.)
8
+ - Manage backward compatibility with old download_metadata.json files
9
+ """
10
+
11
+ import os
12
+ import json
13
+ from datetime import datetime
14
+ from typing import Dict, Any, List, Optional
15
+
16
+ from .quantization_utils import (
17
+ extract_quantization_from_filename,
18
+ detect_quantization_for_mlx
19
+ )
20
+ from .model_types import (
21
+ PIPELINE_TO_MODEL_TYPE,
22
+ MODEL_TYPE_TO_PIPELINE
23
+ )
24
+
25
+
26
+ def process_manifest_metadata(manifest: Dict[str, Any], repo_id: str) -> Dict[str, Any]:
27
+ """Process manifest metadata to handle null/missing fields."""
28
+ # Handle pipeline_tag
29
+ pipeline_tag = manifest.get('pipeline_tag')
30
+ if not pipeline_tag:
31
+ # Reverse map from ModelType if available
32
+ model_type = manifest.get('ModelType')
33
+ pipeline_tag = MODEL_TYPE_TO_PIPELINE.get(model_type) if model_type else None
34
+
35
+ # Handle download_time - keep as null if missing
36
+ download_time = manifest.get('download_time')
37
+
38
+ # Handle avatar_url - fetch on-the-fly if missing/null
39
+ avatar_url = manifest.get('avatar_url')
40
+ if not avatar_url:
41
+ try:
42
+ from .avatar_fetcher import get_avatar_url_for_repo
43
+ avatar_url = get_avatar_url_for_repo(repo_id)
44
+ except Exception:
45
+ # If fetching fails, leave as None
46
+ avatar_url = None
47
+
48
+ # Return processed metadata
49
+ processed_manifest = manifest.copy()
50
+ processed_manifest.update({
51
+ 'pipeline_tag': pipeline_tag,
52
+ 'download_time': download_time,
53
+ 'avatar_url': avatar_url
54
+ })
55
+
56
+ return processed_manifest
57
+
58
+
59
+ def load_nexa_manifest(directory_path: str) -> Dict[str, Any]:
60
+ """Load manifest from nexa.manifest if it exists."""
61
+ manifest_path = os.path.join(directory_path, 'nexa.manifest')
62
+ if os.path.exists(manifest_path):
63
+ try:
64
+ with open(manifest_path, 'r', encoding='utf-8') as f:
65
+ return json.load(f)
66
+ except (json.JSONDecodeError, IOError):
67
+ pass
68
+ return {}
69
+
70
+
71
+ def load_download_metadata(directory_path: str, repo_id: Optional[str] = None) -> Dict[str, Any]:
72
+ """Load download metadata from nexa.manifest if it exists, fallback to old format."""
73
+ # First try to load from new manifest format
74
+ manifest = load_nexa_manifest(directory_path)
75
+ if manifest and repo_id:
76
+ # Process the manifest to handle null/missing fields
77
+ return process_manifest_metadata(manifest, repo_id)
78
+ elif manifest:
79
+ # Return manifest as-is if no repo_id provided (for backward compatibility)
80
+ return manifest
81
+
82
+ # Fallback to old format for backward compatibility
83
+ old_metadata_path = os.path.join(directory_path, 'download_metadata.json')
84
+ if os.path.exists(old_metadata_path):
85
+ try:
86
+ with open(old_metadata_path, 'r', encoding='utf-8') as f:
87
+ return json.load(f)
88
+ except (json.JSONDecodeError, IOError):
89
+ pass
90
+ return {}
91
+
92
+
93
+ def save_download_metadata(directory_path: str, metadata: Dict[str, Any]) -> None:
94
+ """Save download metadata to nexa.manifest in the new format."""
95
+ manifest_path = os.path.join(directory_path, 'nexa.manifest')
96
+ try:
97
+ with open(manifest_path, 'w', encoding='utf-8') as f:
98
+ json.dump(metadata, f, indent=2)
99
+ except IOError:
100
+ # If we can't save metadata, don't fail the download
101
+ pass
102
+
103
+
104
+ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
105
+ """Create GGUF format manifest."""
106
+
107
+ # Load existing manifest to merge GGUF files if it exists
108
+ existing_manifest = load_nexa_manifest(directory_path)
109
+
110
+ model_files = {}
111
+ if existing_manifest and "ModelFile" in existing_manifest:
112
+ model_files = existing_manifest["ModelFile"].copy()
113
+
114
+ # Process GGUF files
115
+ for file_name in files:
116
+ if file_name.endswith('.gguf'):
117
+ # Use the new enum-based quantization extraction
118
+ quantization_type = extract_quantization_from_filename(file_name)
119
+ quant_level = quantization_type.value if quantization_type else "UNKNOWN"
120
+
121
+ file_path = os.path.join(directory_path, file_name)
122
+ file_size = 0
123
+ if os.path.exists(file_path):
124
+ try:
125
+ file_size = os.path.getsize(file_path)
126
+ except (OSError, IOError):
127
+ pass
128
+
129
+ model_files[quant_level] = {
130
+ "Name": file_name,
131
+ "Downloaded": True,
132
+ "Size": file_size
133
+ }
134
+
135
+ manifest = {
136
+ "Name": repo_id,
137
+ "ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
138
+ "PluginId": "llama_cpp",
139
+ "ModelFile": model_files,
140
+ "MMProjFile": {
141
+ "Name": "",
142
+ "Downloaded": False,
143
+ "Size": 0
144
+ },
145
+ "TokenizerFile": {
146
+ "Name": "",
147
+ "Downloaded": False,
148
+ "Size": 0
149
+ },
150
+ "ExtraFiles": None,
151
+ # Preserve old metadata fields
152
+ "pipeline_tag": old_metadata.get('pipeline_tag'),
153
+ "download_time": old_metadata.get('download_time'),
154
+ "avatar_url": old_metadata.get('avatar_url')
155
+ }
156
+
157
+ return manifest
158
+
159
+
160
+ def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
161
+ """Create MLX format manifest."""
162
+
163
+ model_files = {}
164
+ extra_files = []
165
+
166
+ # Try different methods to extract quantization for MLX models
167
+ quantization_type = detect_quantization_for_mlx(repo_id, directory_path)
168
+
169
+ # Use the detected quantization or default to "DEFAULT"
170
+ quant_level = quantization_type.value if quantization_type else "DEFAULT"
171
+
172
+ for file_name in files:
173
+ file_path = os.path.join(directory_path, file_name)
174
+ file_size = 0
175
+ if os.path.exists(file_path):
176
+ try:
177
+ file_size = os.path.getsize(file_path)
178
+ except (OSError, IOError):
179
+ pass
180
+
181
+ # Check if this is a main model file (safetensors but not index files)
182
+ if (file_name.endswith('.safetensors') and not file_name.endswith('.index.json')):
183
+ model_files[quant_level] = {
184
+ "Name": file_name,
185
+ "Downloaded": True,
186
+ "Size": file_size
187
+ }
188
+ else:
189
+ # Add to extra files
190
+ extra_files.append({
191
+ "Name": file_name,
192
+ "Downloaded": True,
193
+ "Size": file_size
194
+ })
195
+
196
+ manifest = {
197
+ "Name": repo_id,
198
+ "ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
199
+ "PluginId": "mlx",
200
+ "ModelFile": model_files,
201
+ "MMProjFile": {
202
+ "Name": "",
203
+ "Downloaded": False,
204
+ "Size": 0
205
+ },
206
+ "TokenizerFile": {
207
+ "Name": "",
208
+ "Downloaded": False,
209
+ "Size": 0
210
+ },
211
+ "ExtraFiles": extra_files if extra_files else None,
212
+ # Preserve old metadata fields
213
+ "pipeline_tag": old_metadata.get('pipeline_tag'),
214
+ "download_time": old_metadata.get('download_time'),
215
+ "avatar_url": old_metadata.get('avatar_url')
216
+ }
217
+
218
+ return manifest
219
+
220
+
221
+ def detect_model_type(files: List[str]) -> str:
222
+ """Detect if this is a GGUF or MLX model based on file extensions."""
223
+ has_gguf = any(f.endswith('.gguf') for f in files)
224
+ has_safetensors = any(f.endswith('.safetensors') or 'safetensors' in f for f in files)
225
+
226
+ if has_gguf:
227
+ return "gguf"
228
+ elif has_safetensors:
229
+ return "mlx"
230
+ else:
231
+ # Default to mlx for other types
232
+ return "mlx"
233
+
234
+
235
+ def create_manifest_from_files(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any]) -> Dict[str, Any]:
236
+ """
237
+ Create appropriate manifest format based on detected model type.
238
+
239
+ Args:
240
+ repo_id: Repository ID
241
+ files: List of files in the model directory
242
+ directory_path: Path to the model directory
243
+ old_metadata: Existing metadata (pipeline_tag, download_time, avatar_url)
244
+
245
+ Returns:
246
+ Dict containing the appropriate manifest format
247
+ """
248
+ model_type = detect_model_type(files)
249
+
250
+ if model_type == "gguf":
251
+ return create_gguf_manifest(repo_id, files, directory_path, old_metadata)
252
+ else: # mlx or other
253
+ return create_mlx_manifest(repo_id, files, directory_path, old_metadata)
254
+
255
+
256
+ def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata: Dict[str, Any]) -> None:
257
+ """
258
+ Create and save manifest based on files found in the directory.
259
+
260
+ Args:
261
+ repo_id: Repository ID
262
+ local_dir: Local directory containing the model files
263
+ old_metadata: Existing metadata to preserve
264
+ """
265
+ # Get list of files in the directory
266
+ files = []
267
+ try:
268
+ for root, dirs, filenames in os.walk(local_dir):
269
+ for filename in filenames:
270
+ # Store relative path from the directory
271
+ rel_path = os.path.relpath(os.path.join(root, filename), local_dir)
272
+ files.append(rel_path)
273
+ except (OSError, IOError):
274
+ pass
275
+
276
+ # Create appropriate manifest
277
+ manifest = create_manifest_from_files(repo_id, files, local_dir, old_metadata)
278
+
279
+ # Save manifest
280
+ save_download_metadata(local_dir, manifest)
@@ -5,17 +5,21 @@ from datetime import datetime
5
5
  from dataclasses import dataclass
6
6
  from typing import Optional, Callable, Dict, Any, List, Union
7
7
  import functools
8
+ from enum import Enum
8
9
  from tqdm.auto import tqdm
9
10
  from huggingface_hub import HfApi
10
11
  from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
11
12
 
12
13
  from .progress_tracker import CustomProgressTqdm, DownloadProgressTracker
13
14
  from .avatar_fetcher import get_avatar_url_for_repo
15
+ from .manifest_utils import (
16
+ load_download_metadata,
17
+ save_manifest_with_files_metadata,
18
+ )
14
19
 
15
20
  # Default path for model storage
16
21
  DEFAULT_MODEL_SAVING_PATH = "~/.cache/nexa.ai/nexa_sdk/models/"
17
22
 
18
-
19
23
  @dataclass
20
24
  class DownloadedModel:
21
25
  """Data class representing a downloaded model with all its metadata."""
@@ -88,30 +92,6 @@ def _check_for_incomplete_downloads(directory_path: str) -> bool:
88
92
  # If we can't access the directory, assume download is complete
89
93
  return True
90
94
 
91
-
92
- def _load_download_metadata(directory_path: str) -> Dict[str, Any]:
93
- """Load download metadata from download_metadata.json if it exists."""
94
- metadata_path = os.path.join(directory_path, 'download_metadata.json')
95
- if os.path.exists(metadata_path):
96
- try:
97
- with open(metadata_path, 'r', encoding='utf-8') as f:
98
- return json.load(f)
99
- except (json.JSONDecodeError, IOError):
100
- pass
101
- return {}
102
-
103
-
104
- def _save_download_metadata(directory_path: str, metadata: Dict[str, Any]) -> None:
105
- """Save download metadata to download_metadata.json."""
106
- metadata_path = os.path.join(directory_path, 'download_metadata.json')
107
- try:
108
- with open(metadata_path, 'w', encoding='utf-8') as f:
109
- json.dump(metadata, f, indent=2)
110
- except IOError:
111
- # If we can't save metadata, don't fail the download
112
- pass
113
-
114
-
115
95
  def _get_directory_size_and_files(directory_path: str) -> tuple[int, List[str]]:
116
96
  """Get total size and list of files in a directory."""
117
97
  total_size = 0
@@ -162,24 +142,28 @@ def _scan_for_repo_folders(base_path: str) -> List[DownloadedModel]:
162
142
  if os.path.isdir(subitem_path):
163
143
  has_subdirs = True
164
144
  # This looks like owner/repo structure
165
- size_bytes, files = _get_directory_size_and_files(subitem_path)
166
- if files: # Only include if there are files
167
- # Check if the download is complete
168
- download_complete = _check_for_incomplete_downloads(subitem_path)
169
- # Load metadata if it exists
170
- metadata = _load_download_metadata(subitem_path)
171
- models.append(DownloadedModel(
172
- repo_id=f"{item}/{subitem}",
173
- files=files,
174
- folder_type='owner_repo',
175
- local_path=subitem_path,
176
- size_bytes=size_bytes,
177
- file_count=len(files),
178
- full_repo_download_complete=download_complete,
179
- pipeline_tag=metadata.get('pipeline_tag'),
180
- download_time=metadata.get('download_time'),
181
- avatar_url=metadata.get('avatar_url')
182
- ))
145
+ # Only include if nexa.manifest exists
146
+ manifest_path = os.path.join(subitem_path, 'nexa.manifest')
147
+ if os.path.exists(manifest_path):
148
+ size_bytes, files = _get_directory_size_and_files(subitem_path)
149
+ if files: # Only include if there are files
150
+ # Check if the download is complete
151
+ download_complete = _check_for_incomplete_downloads(subitem_path)
152
+ # Load metadata if it exists
153
+ repo_id = f"{item}/{subitem}"
154
+ metadata = load_download_metadata(subitem_path, repo_id)
155
+ models.append(DownloadedModel(
156
+ repo_id=repo_id,
157
+ files=files,
158
+ folder_type='owner_repo',
159
+ local_path=subitem_path,
160
+ size_bytes=size_bytes,
161
+ file_count=len(files),
162
+ full_repo_download_complete=download_complete,
163
+ pipeline_tag=metadata.get('pipeline_tag'),
164
+ download_time=metadata.get('download_time'),
165
+ avatar_url=metadata.get('avatar_url')
166
+ ))
183
167
  else:
184
168
  direct_files.append(subitem)
185
169
  except (OSError, IOError):
@@ -188,24 +172,28 @@ def _scan_for_repo_folders(base_path: str) -> List[DownloadedModel]:
188
172
 
189
173
  # Direct repo folder (no owner structure)
190
174
  if not has_subdirs and direct_files:
191
- size_bytes, files = _get_directory_size_and_files(item_path)
192
- if files: # Only include if there are files
193
- # Check if the download is complete
194
- download_complete = _check_for_incomplete_downloads(item_path)
195
- # Load metadata if it exists
196
- metadata = _load_download_metadata(item_path)
197
- models.append(DownloadedModel(
198
- repo_id=item,
199
- files=files,
200
- folder_type='direct_repo',
201
- local_path=item_path,
202
- size_bytes=size_bytes,
203
- file_count=len(files),
204
- full_repo_download_complete=download_complete,
205
- pipeline_tag=metadata.get('pipeline_tag'),
206
- download_time=metadata.get('download_time'),
207
- avatar_url=metadata.get('avatar_url')
208
- ))
175
+ # Only include if nexa.manifest exists
176
+ manifest_path = os.path.join(item_path, 'nexa.manifest')
177
+ if os.path.exists(manifest_path):
178
+ size_bytes, files = _get_directory_size_and_files(item_path)
179
+ if files: # Only include if there are files
180
+ # Check if the download is complete
181
+ download_complete = _check_for_incomplete_downloads(item_path)
182
+ # Load metadata if it exists
183
+ repo_id = item
184
+ metadata = load_download_metadata(item_path, repo_id)
185
+ models.append(DownloadedModel(
186
+ repo_id=repo_id,
187
+ files=files,
188
+ folder_type='direct_repo',
189
+ local_path=item_path,
190
+ size_bytes=size_bytes,
191
+ file_count=len(files),
192
+ full_repo_download_complete=download_complete,
193
+ pipeline_tag=metadata.get('pipeline_tag'),
194
+ download_time=metadata.get('download_time'),
195
+ avatar_url=metadata.get('avatar_url')
196
+ ))
209
197
 
210
198
  except (OSError, IOError):
211
199
  # Skip if base path can't be accessed
@@ -743,15 +731,26 @@ class HuggingFaceDownloader:
743
731
  # Get avatar URL
744
732
  avatar_url = get_avatar_url_for_repo(repo_id, custom_endpoint=self.endpoint)
745
733
 
746
- # Prepare metadata
747
- metadata = {
734
+ # Prepare old-style metadata for compatibility
735
+ old_metadata = {
748
736
  'pipeline_tag': pipeline_tag,
749
737
  'download_time': datetime.now().isoformat(),
750
738
  'avatar_url': avatar_url
751
739
  }
752
740
 
753
- # Save metadata to the repository directory
754
- _save_download_metadata(local_dir, metadata)
741
+ # Get list of files in the directory
742
+ files = []
743
+ try:
744
+ for root, dirs, filenames in os.walk(local_dir):
745
+ for filename in filenames:
746
+ # Store relative path from the directory
747
+ rel_path = os.path.relpath(os.path.join(root, filename), local_dir)
748
+ files.append(rel_path)
749
+ except (OSError, IOError):
750
+ pass
751
+
752
+ # Create and save manifest using the new utility function
753
+ save_manifest_with_files_metadata(repo_id, local_dir, old_metadata)
755
754
 
756
755
  except Exception:
757
756
  # Don't fail the download if metadata fetch fails
@@ -820,14 +819,6 @@ class HuggingFaceDownloader:
820
819
  # Create a subdirectory for this specific repo
821
820
  repo_local_dir = self._create_repo_directory(local_dir, repo_id)
822
821
 
823
- # Check if repository already exists (basic check for directory existence)
824
- if not force_download and os.path.exists(repo_local_dir) and os.listdir(repo_local_dir):
825
- print(f"✓ Repository already exists, skipping: {repo_id}")
826
- # Stop progress tracking
827
- if progress_tracker:
828
- progress_tracker.stop_tracking()
829
- return repo_local_dir
830
-
831
822
  try:
832
823
  download_kwargs = {
833
824
  'repo_id': repo_id,
@@ -0,0 +1,47 @@
1
+ """
2
+ Model type mappings for HuggingFace pipeline tags to our internal model types.
3
+
4
+ This module provides centralized model type mapping functionality to avoid
5
+ circular imports between other utility modules.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Dict
10
+
11
+
12
+ class ModelTypeMapping(Enum):
13
+ """Enum for mapping HuggingFace pipeline_tag to our ModelType."""
14
+ TEXT_GENERATION = ("text-generation", "llm")
15
+ IMAGE_TEXT_TO_TEXT = ("image-text-to-text", "vlm")
16
+
17
+ def __init__(self, pipeline_tag: str, model_type: str):
18
+ self.pipeline_tag = pipeline_tag
19
+ self.model_type = model_type
20
+
21
+
22
+ # Create mapping dictionaries from the enum
23
+ PIPELINE_TO_MODEL_TYPE: Dict[str, str] = {
24
+ mapping.pipeline_tag: mapping.model_type
25
+ for mapping in ModelTypeMapping
26
+ }
27
+
28
+ MODEL_TYPE_TO_PIPELINE: Dict[str, str] = {
29
+ mapping.model_type: mapping.pipeline_tag
30
+ for mapping in ModelTypeMapping
31
+ }
32
+
33
+
34
+ def map_pipeline_tag_to_model_type(pipeline_tag: str) -> str:
35
+ """Map HuggingFace pipeline_tag to our ModelType."""
36
+ if not pipeline_tag:
37
+ return "other"
38
+
39
+ return PIPELINE_TO_MODEL_TYPE.get(pipeline_tag, "other")
40
+
41
+
42
+ def map_model_type_to_pipeline_tag(model_type: str) -> str:
43
+ """Reverse map ModelType back to HuggingFace pipeline_tag."""
44
+ if not model_type:
45
+ return None
46
+
47
+ return MODEL_TYPE_TO_PIPELINE.get(model_type)
@@ -107,7 +107,7 @@ class DownloadProgressTracker:
107
107
  time_diff = current_time - self.last_time
108
108
 
109
109
  # Only calculate if we have a meaningful time difference (avoid division by very small numbers)
110
- if time_diff > 0.5: # At least 500ms between measurements
110
+ if time_diff > 0.1: # At least 100ms between measurements
111
111
  bytes_diff = current_downloaded - self.last_downloaded
112
112
 
113
113
  # Only calculate speed if bytes actually changed
@@ -118,6 +118,14 @@ class DownloadProgressTracker:
118
118
  self.speed_history.append(speed)
119
119
  if len(self.speed_history) > self.max_speed_history:
120
120
  self.speed_history.pop(0)
121
+
122
+ # Update tracking variables when we actually calculate speed
123
+ self.last_downloaded = current_downloaded
124
+ self.last_time = current_time
125
+ else:
126
+ # First measurement - initialize tracking variables
127
+ self.last_downloaded = current_downloaded
128
+ self.last_time = current_time
121
129
 
122
130
  # Return the average of historical speeds if we have any
123
131
  # This ensures we show the last known speed even when skipping updates
@@ -157,13 +165,9 @@ class DownloadProgressTracker:
157
165
  total_file_sizes += data['total']
158
166
  active_file_count += 1
159
167
 
160
- # Calculate speed
168
+ # Calculate speed (tracking variables are updated internally)
161
169
  speed = self.calculate_speed(total_downloaded)
162
170
 
163
- # Update tracking variables
164
- self.last_downloaded = total_downloaded
165
- self.last_time = time.time()
166
-
167
171
  # Determine total size - prioritize pre-fetched repo size, then aggregate file sizes
168
172
  if self.total_repo_size > 0:
169
173
  # Use pre-fetched repository info if available