cortex-llm 1.0.7__tar.gz → 1.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/PKG-INFO +1 -1
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/__init__.py +1 -1
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/mlx_converter.py +105 -4
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/PKG-INFO +1 -1
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/pyproject.toml +1 -1
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/setup.py +1 -1
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/LICENSE +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/README.md +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/__main__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/config.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/conversation_manager.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/fine_tuning/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/fine_tuning/dataset.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/fine_tuning/mlx_lora_trainer.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/fine_tuning/trainer.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/fine_tuning/wizard.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/gpu_validator.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/inference_engine.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/gpu_validator.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/memory_pool.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/mlx_accelerator.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/mlx_compat.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/mps_optimizer.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/optimizer.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/metal/performance_profiler.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/model_downloader.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/model_manager.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/quantization/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/quantization/dynamic_quantizer.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/auto_detector.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/config_manager.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/interactive.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/registry.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/base.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/complex/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/complex/reasoning.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/alpaca.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/chatml.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/gemma.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/llama.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/simple.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/ui/__init__.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/ui/cli.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/ui/markdown_render.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/ui/terminal_app.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/SOURCES.txt +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/dependency_links.txt +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/entry_points.txt +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/not-zip-safe +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/requires.txt +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex_llm.egg-info/top_level.txt +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/setup.cfg +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/tests/test_apple_silicon.py +0 -0
- {cortex_llm-1.0.7 → cortex_llm-1.0.8}/tests/test_metal_optimization.py +0 -0
|
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
|
|
|
5
5
|
with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.0.
|
|
8
|
+
__version__ = "1.0.8"
|
|
9
9
|
__author__ = "Cortex Development Team"
|
|
10
10
|
__license__ = "MIT"
|
|
11
11
|
|
|
@@ -66,9 +66,22 @@ class MLXConverter:
|
|
|
66
66
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
67
67
|
self.conversion_cache = self.cache_dir / "conversion_cache.json"
|
|
68
68
|
self._load_conversion_cache()
|
|
69
|
+
self._warned_mlx_lm_compat = False
|
|
69
70
|
|
|
70
71
|
logger.info(f"MLX Converter initialized with cache dir: {self.cache_dir}")
|
|
71
72
|
logger.info(f"MLX LM available: {mlx_utils is not None and load is not None}")
|
|
73
|
+
|
|
74
|
+
def _warn_mlx_lm_compat(self, missing: str) -> None:
|
|
75
|
+
"""Warn once when mlx-lm is missing newer helper APIs."""
|
|
76
|
+
if self._warned_mlx_lm_compat:
|
|
77
|
+
return
|
|
78
|
+
self._warned_mlx_lm_compat = True
|
|
79
|
+
message = (
|
|
80
|
+
f"[WARN] mlx-lm is missing '{missing}'. Using compatibility fallback. "
|
|
81
|
+
"For best support, upgrade mlx-lm to a newer version."
|
|
82
|
+
)
|
|
83
|
+
logger.warning(message)
|
|
84
|
+
print(message)
|
|
72
85
|
|
|
73
86
|
def _load_conversion_cache(self) -> None:
|
|
74
87
|
"""Load conversion cache metadata."""
|
|
@@ -206,6 +219,83 @@ class MLXConverter:
|
|
|
206
219
|
|
|
207
220
|
return download_dir
|
|
208
221
|
|
|
222
|
+
def _mlx_get_model_path(self, source_path: Path) -> Tuple[Path, Optional[str]]:
|
|
223
|
+
"""Resolve model path with MLX LM compatibility fallbacks."""
|
|
224
|
+
if mlx_utils is not None and hasattr(mlx_utils, "get_model_path"):
|
|
225
|
+
return mlx_utils.get_model_path(str(source_path))
|
|
226
|
+
self._warn_mlx_lm_compat("get_model_path")
|
|
227
|
+
|
|
228
|
+
# Fallback: local path or direct HF download.
|
|
229
|
+
model_path = Path(source_path)
|
|
230
|
+
if model_path.exists():
|
|
231
|
+
hf_repo = None
|
|
232
|
+
try:
|
|
233
|
+
from huggingface_hub import ModelCard
|
|
234
|
+
|
|
235
|
+
card_path = model_path / "README.md"
|
|
236
|
+
if card_path.is_file():
|
|
237
|
+
card = ModelCard.load(card_path)
|
|
238
|
+
hf_repo = getattr(card.data, "base_model", None)
|
|
239
|
+
except Exception:
|
|
240
|
+
hf_repo = None
|
|
241
|
+
return model_path, hf_repo
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
model_path = Path(
|
|
245
|
+
snapshot_download(
|
|
246
|
+
str(source_path),
|
|
247
|
+
allow_patterns=[
|
|
248
|
+
"*.json",
|
|
249
|
+
"model*.safetensors",
|
|
250
|
+
"*.py",
|
|
251
|
+
"tokenizer.model",
|
|
252
|
+
"*.tiktoken",
|
|
253
|
+
"tiktoken.model",
|
|
254
|
+
"*.txt",
|
|
255
|
+
"*.jsonl",
|
|
256
|
+
"*.jinja",
|
|
257
|
+
],
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
except Exception as e:
|
|
261
|
+
raise RuntimeError(f"Failed to download model from Hugging Face: {e}") from e
|
|
262
|
+
|
|
263
|
+
return model_path, str(source_path)
|
|
264
|
+
|
|
265
|
+
def _mlx_fetch_from_hub(
|
|
266
|
+
self,
|
|
267
|
+
model_path: Path,
|
|
268
|
+
trust_remote_code: bool = False
|
|
269
|
+
) -> Tuple[Any, Dict[str, Any], Any]:
|
|
270
|
+
"""Fetch model/config/tokenizer with MLX LM compatibility fallbacks."""
|
|
271
|
+
if mlx_utils is not None and hasattr(mlx_utils, "fetch_from_hub"):
|
|
272
|
+
return mlx_utils.fetch_from_hub(
|
|
273
|
+
model_path,
|
|
274
|
+
lazy=True,
|
|
275
|
+
trust_remote_code=trust_remote_code
|
|
276
|
+
)
|
|
277
|
+
self._warn_mlx_lm_compat("fetch_from_hub")
|
|
278
|
+
|
|
279
|
+
if mlx_utils is not None and hasattr(mlx_utils, "load_model") and hasattr(mlx_utils, "load_tokenizer"):
|
|
280
|
+
model, model_config = mlx_utils.load_model(model_path, lazy=True)
|
|
281
|
+
try:
|
|
282
|
+
tokenizer = mlx_utils.load_tokenizer(
|
|
283
|
+
model_path,
|
|
284
|
+
eos_token_ids=model_config.get("eos_token_id", None),
|
|
285
|
+
tokenizer_config_extra={"trust_remote_code": trust_remote_code},
|
|
286
|
+
)
|
|
287
|
+
except TypeError:
|
|
288
|
+
tokenizer = mlx_utils.load_tokenizer(
|
|
289
|
+
model_path,
|
|
290
|
+
eos_token_ids=model_config.get("eos_token_id", None),
|
|
291
|
+
)
|
|
292
|
+
return model, model_config, tokenizer
|
|
293
|
+
|
|
294
|
+
raise RuntimeError(
|
|
295
|
+
"mlx_lm.utils is missing required helpers (fetch_from_hub/load_model). "
|
|
296
|
+
"Upgrade mlx-lm to a newer version."
|
|
297
|
+
)
|
|
298
|
+
|
|
209
299
|
def _requires_sentencepiece(self, model_path: Path) -> bool:
|
|
210
300
|
"""Return True if the model likely needs SentencePiece."""
|
|
211
301
|
# If a fast tokenizer is present, SentencePiece should not be required.
|
|
@@ -379,10 +469,17 @@ class MLXConverter:
|
|
|
379
469
|
# Build quantization configuration
|
|
380
470
|
quantize_config = self._build_quantization_config(config)
|
|
381
471
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
472
|
+
try:
|
|
473
|
+
model_path, hf_repo = self._mlx_get_model_path(Path(source_path))
|
|
474
|
+
except Exception as e:
|
|
475
|
+
return False, f"Model path resolution failed: {e}", None
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
model, model_config, tokenizer = self._mlx_fetch_from_hub(
|
|
479
|
+
model_path, trust_remote_code=False
|
|
480
|
+
)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
return False, f"Model fetch failed: {e}", None
|
|
386
483
|
|
|
387
484
|
dtype = model_config.get("torch_dtype", None)
|
|
388
485
|
if dtype in ["float16", "bfloat16", "float32"]:
|
|
@@ -398,6 +495,8 @@ class MLXConverter:
|
|
|
398
495
|
model.update(tree_map_with_path(set_dtype, model.parameters()))
|
|
399
496
|
|
|
400
497
|
if config.quantization != QuantizationRecipe.NONE:
|
|
498
|
+
if mlx_utils is None or not hasattr(mlx_utils, "quantize_model"):
|
|
499
|
+
return False, "MLX LM quantize_model not available; upgrade mlx-lm.", None
|
|
401
500
|
quant_predicate = None
|
|
402
501
|
if quantize_config and "quant_predicate" in quantize_config:
|
|
403
502
|
quant_predicate = quantize_config["quant_predicate"]
|
|
@@ -411,6 +510,8 @@ class MLXConverter:
|
|
|
411
510
|
)
|
|
412
511
|
|
|
413
512
|
normalized_hf_repo = self._normalize_hf_repo(hf_repo)
|
|
513
|
+
if mlx_utils is None or not hasattr(mlx_utils, "save"):
|
|
514
|
+
return False, "MLX LM save() not available; upgrade mlx-lm.", None
|
|
414
515
|
mlx_utils.save(output_path, model_path, model, tokenizer, model_config, hf_repo=normalized_hf_repo)
|
|
415
516
|
logger.info("MLX conversion completed")
|
|
416
517
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/complex/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/alpaca.py
RENAMED
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/chatml.py
RENAMED
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/gemma.py
RENAMED
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/llama.py
RENAMED
|
File without changes
|
{cortex_llm-1.0.7 → cortex_llm-1.0.8}/cortex/template_registry/template_profiles/standard/simple.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|