cortex-llm 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortex/__init__.py CHANGED
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
5
5
  with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
6
6
  """
7
7
 
8
- __version__ = "1.0.6"
8
+ __version__ = "1.0.7"
9
9
  __author__ = "Cortex Development Team"
10
10
  __license__ = "MIT"
11
11
 
cortex/config.py CHANGED
@@ -74,7 +74,7 @@ class InferenceConfig(BaseModel):
74
74
  top_p: float = Field(default=0.95, ge=0.0, le=1.0)
75
75
  top_k: int = Field(default=40, ge=0)
76
76
  repetition_penalty: float = Field(default=1.1, ge=0.0, le=2.0)
77
- max_tokens: int = Field(default=2048, ge=1)
77
+ max_tokens: int = Field(default=4096, ge=1)
78
78
  stream_output: bool = True
79
79
  seed: int = Field(default=-1)
80
80
 
@@ -138,7 +138,7 @@ class InferenceEngine:
138
138
  use_fp16=True,
139
139
  use_channels_last=True,
140
140
  optimize_memory=True,
141
- max_batch_size=self.config.performance.batch_size
141
+ max_batch_size=self.config.performance.max_batch_size
142
142
  )
143
143
  self.mps_optimizer = MPSOptimizer(mps_config)
144
144
 
@@ -153,7 +153,7 @@ class InferenceEngine:
153
153
  fuse_operations=True,
154
154
  lazy_evaluation=True,
155
155
  rotating_kv_cache=True,
156
- kv_cache_size=self.config.model.context_length if hasattr(self.config.model, 'context_length') else 4096,
156
+ kv_cache_size=self.config.performance.context_length,
157
157
  quantization_bits=4
158
158
  )
159
159
  self.mlx_accelerator = MLXAccelerator(mlx_config)
@@ -204,6 +204,9 @@ class InferenceEngine:
204
204
  yield from self._generate_pytorch(model, tokenizer, request)
205
205
  elif model_info.format == ModelFormat.SAFETENSORS:
206
206
  yield from self._generate_safetensors(model, tokenizer, request)
207
+ elif model_info.format == ModelFormat.QUANTIZED:
208
+ # Quantized models are loaded as PyTorch-compatible modules
209
+ yield from self._generate_pytorch(model, tokenizer, request)
207
210
  elif model_info.format == ModelFormat.GGUF:
208
211
  yield from self._generate_gguf(model, tokenizer, request)
209
212
  else:
@@ -401,7 +404,18 @@ class InferenceEngine:
401
404
  last_metrics_update = time.time()
402
405
 
403
406
  try:
404
- device = torch.device("mps")
407
+ # Use the model's device when available (quantized models may be CPU-only on macOS)
408
+ device = None
409
+ try:
410
+ first_param = next(model.parameters())
411
+ device = first_param.device
412
+ except Exception:
413
+ device = None
414
+
415
+ if device is None or str(device) == "meta":
416
+ device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
417
+ elif device.type == "mps" and not torch.backends.mps.is_available():
418
+ device = torch.device("cpu")
405
419
 
406
420
  inputs = tokenizer(request.prompt, return_tensors="pt").to(device)
407
421
 
cortex/model_manager.py CHANGED
@@ -133,7 +133,8 @@ class ModelManager:
133
133
  self.quantizer = DynamicQuantizer(QuantizationConfig(
134
134
  mode=QuantizationMode.DYNAMIC,
135
135
  per_channel=True,
136
- cache_quantized=True
136
+ cache_quantized=True,
137
+ cache_dir=self.config.model.quantization_cache
137
138
  ))
138
139
 
139
140
  # Initialize MLX converter for native conversion
@@ -201,6 +202,39 @@ class ModelManager:
201
202
  level = getattr(self.config.gpu, "gpu_optimization_level", "maximum")
202
203
  level = str(level).lower().strip()
203
204
  return level in {"maximum", "max", "speed", "fast", "performance"}
205
+
206
+ def _get_default_quant_recipe(self) -> Optional[QuantizationRecipe]:
207
+ """Map configured default_quantization to an MLX quantization recipe."""
208
+ raw = getattr(self.config.model, "default_quantization", "") or ""
209
+ value = str(raw).strip().lower()
210
+ if not value or value == "auto":
211
+ return None
212
+
213
+ mapping = {
214
+ "q4_k_m": QuantizationRecipe.SPEED_4BIT,
215
+ "q5_k_m": QuantizationRecipe.BALANCED_5BIT,
216
+ "q6_k": QuantizationRecipe.QUALITY_8BIT, # closest available MLX recipe
217
+ "q8_0": QuantizationRecipe.QUALITY_8BIT,
218
+ "4bit": QuantizationRecipe.SPEED_4BIT,
219
+ "5bit": QuantizationRecipe.BALANCED_5BIT,
220
+ "8bit": QuantizationRecipe.QUALITY_8BIT,
221
+ "mixed": QuantizationRecipe.MIXED_PRECISION,
222
+ "none": QuantizationRecipe.NONE,
223
+ }
224
+
225
+ recipe = mapping.get(value)
226
+ if recipe is None:
227
+ logger.warning("Unknown default_quantization value: %s", raw)
228
+ return None
229
+
230
+ supported = getattr(self.config.model, "supported_quantizations", None)
231
+ if supported:
232
+ supported_norm = {str(s).strip().lower() for s in supported}
233
+ if value.startswith("q") and value not in supported_norm:
234
+ logger.warning("default_quantization '%s' not in supported_quantizations", raw)
235
+ return None
236
+
237
+ return recipe
204
238
 
205
239
  def load_model(
206
240
  self,
@@ -374,6 +408,10 @@ class ModelManager:
374
408
  except Exception as e:
375
409
  logger.warning(f"Could not estimate model parameters: {e}, defaulting to 4-bit")
376
410
  quant_recipe = QuantizationRecipe.SPEED_4BIT # Fallback
411
+
412
+ default_recipe = self._get_default_quant_recipe()
413
+ if default_recipe is not None:
414
+ quant_recipe = default_recipe
377
415
 
378
416
  if quantization:
379
417
  quant_map = {
@@ -452,6 +490,10 @@ class ModelManager:
452
490
  else:
453
491
  quant_recipe = QuantizationRecipe.SPEED_4BIT # Default for larger models
454
492
 
493
+ default_recipe = self._get_default_quant_recipe()
494
+ if default_recipe is not None:
495
+ quant_recipe = default_recipe
496
+
455
497
  if quantization:
456
498
  quant_map = {
457
499
  "4bit": QuantizationRecipe.SPEED_4BIT,
@@ -563,6 +605,8 @@ class ModelManager:
563
605
  )
564
606
 
565
607
  if not can_load and can_apply_quantization:
608
+ if not getattr(self.config.model, "auto_quantize", True):
609
+ return False, f"GPU incompatible: {message} (auto_quantize disabled)"
566
610
  # Check if quantization would help
567
611
  gpu_status = self.gpu_validator.get_gpu_memory_status()
568
612
  available_gb = gpu_status['available_gb']
@@ -3,7 +3,7 @@
3
3
  import torch
4
4
  import torch.nn as nn
5
5
  from typing import Dict, Any, Optional, Tuple, Union
6
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, field
7
7
  from enum import Enum
8
8
  import gc
9
9
  from pathlib import Path
@@ -40,6 +40,7 @@ class QuantizationConfig:
40
40
  cache_quantized: bool = True # Cache quantized models to disk
41
41
  compress_cache: bool = False # Compress cached models (slower but smaller)
42
42
  validate_quantization: bool = True # Validate quantized models work correctly
43
+ cache_dir: Path = field(default_factory=lambda: Path.home() / ".cortex" / "quantized_models")
43
44
 
44
45
  def to_dict(self) -> Dict[str, Any]:
45
46
  """Convert to dictionary for serialization."""
@@ -118,6 +119,8 @@ class DynamicQuantizer:
118
119
  def __init__(self, config: Optional[QuantizationConfig] = None):
119
120
  """Initialize quantizer with configuration."""
120
121
  self.config = config or QuantizationConfig()
122
+ self.config.cache_dir = Path(self.config.cache_dir).expanduser()
123
+ self.config.cache_dir.mkdir(parents=True, exist_ok=True)
121
124
  self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
122
125
  self._quantization_cache: Dict[str, Dict[str, Any]] = {}
123
126
 
@@ -681,10 +684,10 @@ class DynamicQuantizer:
681
684
 
682
685
  # Generate cache key including model metadata
683
686
  cache_key = hashlib.md5(
684
- f"{model_path}_{model_mtime}_{model_size}_{json.dumps(quantization_info)}".encode()
687
+ f"{model_path}_{model_mtime}_{model_size}_{json.dumps(self.config.to_dict())}".encode()
685
688
  ).hexdigest()
686
689
 
687
- cache_dir = Path.home() / ".cortex" / "quantized_cache"
690
+ cache_dir = self.config.cache_dir
688
691
  cache_dir.mkdir(parents=True, exist_ok=True)
689
692
 
690
693
  cache_path = cache_dir / f"{cache_key}.pt"
@@ -723,7 +726,7 @@ class DynamicQuantizer:
723
726
  f"{model_path}_{model_mtime}_{model_size}_{json.dumps(config.to_dict())}".encode()
724
727
  ).hexdigest()
725
728
 
726
- cache_path = Path.home() / ".cortex" / "quantized_cache" / f"{cache_key}.pt"
729
+ cache_path = Path(self.config.cache_dir) / f"{cache_key}.pt"
727
730
 
728
731
  if cache_path.exists():
729
732
  try:
@@ -733,4 +736,4 @@ class DynamicQuantizer:
733
736
  # Cache corrupted, will re-quantize
734
737
  cache_path.unlink()
735
738
 
736
- return None
739
+ return None
cortex/ui/cli.py CHANGED
@@ -30,7 +30,7 @@ from cortex.conversation_manager import ConversationManager, MessageRole
30
30
  from cortex.model_downloader import ModelDownloader
31
31
  from cortex.template_registry import TemplateRegistry
32
32
  from cortex.fine_tuning import FineTuneWizard
33
- from cortex.ui.markdown_render import ThinkMarkdown, PrefixedRenderable
33
+ from cortex.ui.markdown_render import ThinkMarkdown, PrefixedRenderable, render_plain_with_think
34
34
 
35
35
 
36
36
  class CortexCLI:
@@ -1135,15 +1135,15 @@ class CortexCLI:
1135
1135
  logger.debug(f"Could not get stop sequences: {e}")
1136
1136
 
1137
1137
  # Create generation request with formatted prompt
1138
- # Use lower temperature for more focused responses
1139
1138
  request = GenerationRequest(
1140
1139
  prompt=formatted_prompt,
1141
1140
  max_tokens=self.config.inference.max_tokens,
1142
- temperature=0.3, # Lower temperature for less randomness
1143
- top_p=0.9, # Slightly lower top_p
1141
+ temperature=self.config.inference.temperature,
1142
+ top_p=self.config.inference.top_p,
1144
1143
  top_k=self.config.inference.top_k,
1145
1144
  repetition_penalty=self.config.inference.repetition_penalty,
1146
- stream=True,
1145
+ stream=self.config.inference.stream_output,
1146
+ seed=self.config.inference.seed if self.config.inference.seed >= 0 else None,
1147
1147
  stop_sequences=stop_sequences
1148
1148
  )
1149
1149
 
@@ -1167,50 +1167,65 @@ class CortexCLI:
1167
1167
  prefix_style = Style(color="cyan")
1168
1168
 
1169
1169
  def build_renderable(text: str):
1170
- markdown = ThinkMarkdown(text, code_theme="monokai", use_line_numbers=False)
1171
- return PrefixedRenderable(markdown, prefix="⏺ ", prefix_style=prefix_style, indent=" ")
1170
+ if getattr(self.config.ui, "markdown_rendering", True):
1171
+ markdown = ThinkMarkdown(
1172
+ text,
1173
+ code_theme="monokai",
1174
+ use_line_numbers=False,
1175
+ syntax_highlighting=getattr(self.config.ui, "syntax_highlighting", True),
1176
+ )
1177
+ renderable = markdown
1178
+ else:
1179
+ renderable = render_plain_with_think(text)
1172
1180
 
1173
- with Live(
1174
- build_renderable(""),
1175
- console=self.console,
1176
- refresh_per_second=20,
1177
- transient=False,
1178
- ) as live:
1179
- for token in self.inference_engine.generate(request):
1180
- if first_token_time is None:
1181
- first_token_time = time.time()
1181
+ return PrefixedRenderable(renderable, prefix="⏺ ", prefix_style=prefix_style, indent=" ")
1182
1182
 
1183
- generated_text += token
1184
- token_count += 1
1183
+ original_console_width = self.console._width
1184
+ target_width = max(40, int(self.get_terminal_width() * 0.75))
1185
+ self.console.width = target_width
1186
+ try:
1187
+ with Live(
1188
+ build_renderable(""),
1189
+ console=self.console,
1190
+ auto_refresh=False,
1191
+ refresh_per_second=20,
1192
+ transient=False,
1193
+ vertical_overflow="visible",
1194
+ ) as live:
1195
+ for token in self.inference_engine.generate(request):
1196
+ if first_token_time is None:
1197
+ first_token_time = time.time()
1185
1198
 
1186
- display_token = token
1187
- if uses_reasoning_template and template_profile and template_profile.supports_streaming():
1188
- display_token, should_display = template_profile.process_streaming_response(
1189
- token, accumulated_response
1190
- )
1191
- accumulated_response += token
1192
- if not should_display:
1193
- display_token = ""
1199
+ generated_text += token
1200
+ token_count += 1
1201
+
1202
+ display_token = token
1203
+ if uses_reasoning_template and template_profile and template_profile.supports_streaming():
1204
+ display_token, should_display = template_profile.process_streaming_response(
1205
+ token, accumulated_response
1206
+ )
1207
+ accumulated_response += token
1208
+ if not should_display:
1209
+ display_token = ""
1194
1210
 
1195
- if display_token:
1196
- display_text += display_token
1211
+ if display_token:
1212
+ display_text += display_token
1197
1213
 
1198
- now = time.time()
1199
- if display_token and ("\n" in display_token or now - last_render_time >= render_interval):
1200
- live.update(build_renderable(display_text))
1201
- last_render_time = now
1214
+ now = time.time()
1215
+ if display_token and ("\n" in display_token or now - last_render_time >= render_interval):
1216
+ live.update(build_renderable(display_text), refresh=True)
1217
+ last_render_time = now
1202
1218
 
1203
- if uses_reasoning_template and template_profile:
1204
- final_text = template_profile.process_response(generated_text)
1205
- generated_text = final_text
1206
- if not template_profile.config.show_reasoning:
1207
- display_text = final_text
1219
+ if uses_reasoning_template and template_profile:
1220
+ final_text = template_profile.process_response(generated_text)
1221
+ generated_text = final_text
1222
+ if not template_profile.config.show_reasoning:
1223
+ display_text = final_text
1208
1224
 
1209
- live.update(build_renderable(display_text))
1225
+ live.update(build_renderable(display_text), refresh=True)
1226
+ finally:
1227
+ self.console._width = original_console_width
1210
1228
 
1211
- # Add blank line for spacing between response and metrics
1212
- print()
1213
-
1214
1229
  # Display final metrics in a clean, professional way
1215
1230
  elapsed = time.time() - start_time
1216
1231
  if token_count > 0 and elapsed > 0:
@@ -1238,6 +1253,9 @@ class CortexCLI:
1238
1253
  metrics_line = " · ".join(metrics_parts)
1239
1254
  print(f" \033[2m{metrics_line}\033[0m")
1240
1255
 
1256
+ if token_count >= request.max_tokens:
1257
+ print(f" \033[2m(output truncated at max_tokens={request.max_tokens}; increase in config.yaml)\033[0m")
1258
+
1241
1259
  # Add assistant message to conversation history
1242
1260
  self.conversation_manager.add_message(MessageRole.ASSISTANT, generated_text)
1243
1261
 
@@ -3,10 +3,12 @@
3
3
  from typing import List
4
4
 
5
5
  from rich.console import Console
6
+ from rich.cells import cell_len
6
7
  from rich.markdown import Markdown
7
8
  from rich.segment import Segment
8
9
  from rich.style import Style
9
10
  from rich.syntax import Syntax
11
+ from rich.text import Text
10
12
 
11
13
  THINK_START_MARKER = "[[[THINK_START]]]"
12
14
  THINK_END_MARKER = "[[[THINK_END]]]"
@@ -45,6 +47,14 @@ class CodeBlockWithLineNumbers(Markdown.elements["fence"]):
45
47
  yield syntax
46
48
 
47
49
 
50
+ class CodeBlockPlain(Markdown.elements["fence"]):
51
+ """Markdown code block rendered as plain text (no syntax highlighting)."""
52
+
53
+ def __rich_console__(self, console: Console, options):
54
+ code = str(self.text).rstrip()
55
+ yield Text(code)
56
+
57
+
48
58
  class MarkdownWithLineNumbers(Markdown):
49
59
  """Markdown renderer that keeps line numbers for fenced code blocks."""
50
60
 
@@ -55,6 +65,26 @@ class MarkdownWithLineNumbers(Markdown):
55
65
  })
56
66
 
57
67
 
68
+ class MarkdownPlainCode(Markdown):
69
+ """Markdown renderer that disables syntax highlighting for code blocks."""
70
+
71
+ elements = Markdown.elements.copy()
72
+ elements.update({
73
+ "fence": CodeBlockPlain,
74
+ "code_block": CodeBlockPlain,
75
+ })
76
+
77
+
78
+ class MarkdownPlainCodeWithLineNumbers(Markdown):
79
+ """Markdown renderer with plain code blocks and line numbers."""
80
+
81
+ elements = MarkdownWithLineNumbers.elements.copy()
82
+ elements.update({
83
+ "fence": CodeBlockPlain,
84
+ "code_block": CodeBlockPlain,
85
+ })
86
+
87
+
58
88
  class ThinkMarkdown:
59
89
  """Markdown renderer that dims content inside <think> tags."""
60
90
 
@@ -63,10 +93,15 @@ class ThinkMarkdown:
63
93
  markup: str,
64
94
  code_theme: str = "monokai",
65
95
  use_line_numbers: bool = False,
96
+ syntax_highlighting: bool = True,
66
97
  ) -> None:
67
98
  marked = _mark_think_sections(markup)
68
- markdown_cls = MarkdownWithLineNumbers if use_line_numbers else Markdown
69
- self._markdown = markdown_cls(marked, code_theme=code_theme)
99
+ if syntax_highlighting:
100
+ markdown_cls = MarkdownWithLineNumbers if use_line_numbers else Markdown
101
+ self._markdown = markdown_cls(marked, code_theme=code_theme)
102
+ else:
103
+ markdown_cls = MarkdownPlainCodeWithLineNumbers if use_line_numbers else MarkdownPlainCode
104
+ self._markdown = markdown_cls(marked)
70
105
 
71
106
  def __rich_console__(self, console: Console, options):
72
107
  segments = console.render(self._markdown, options)
@@ -162,9 +197,15 @@ class PrefixedRenderable:
162
197
  self.indent = indent if indent is not None else " " * len(prefix)
163
198
 
164
199
  def __rich_console__(self, console: Console, options):
200
+ prefix_width = cell_len(self.prefix)
201
+ indent_width = cell_len(self.indent) if self.indent is not None else prefix_width
202
+ offset = max(prefix_width, indent_width)
203
+ inner_width = max(1, options.max_width - offset)
204
+ inner_options = options.update_width(inner_width)
205
+
165
206
  yield Segment(self.prefix, self.prefix_style)
166
207
 
167
- for segment in console.render(self.renderable, options):
208
+ for segment in console.render(self.renderable, inner_options):
168
209
  if segment.control:
169
210
  yield segment
170
211
  continue
@@ -183,3 +224,27 @@ class PrefixedRenderable:
183
224
  if index < len(parts) - 1:
184
225
  yield Segment("\n", style)
185
226
  yield Segment(self.indent, None)
227
+
228
+
229
+ def render_plain_with_think(text: str) -> Text:
230
+ """Render plain text while dimming content inside <think> tags."""
231
+ output = Text()
232
+ dim_style = Style(dim=True)
233
+ idx = 0
234
+ in_think = False
235
+
236
+ while idx < len(text):
237
+ if text.startswith("<think>", idx):
238
+ in_think = True
239
+ idx += len("<think>")
240
+ continue
241
+ if text.startswith("</think>", idx):
242
+ in_think = False
243
+ idx += len("</think>")
244
+ continue
245
+
246
+ char = text[idx]
247
+ output.append(char, dim_style if in_think else None)
248
+ idx += 1
249
+
250
+ return output
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cortex-llm
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: GPU-Accelerated LLM Terminal for Apple Silicon
5
5
  Home-page: https://github.com/faisalmumtaz/Cortex
6
6
  Author: Cortex Development Team
@@ -68,7 +68,7 @@ Cortex is a fast, native CLI for running and fine-tuning LLMs on Apple Silicon u
68
68
  - Multi-format model support: MLX, GGUF, SafeTensors, PyTorch, GPTQ, AWQ
69
69
  - Built-in LoRA fine-tuning wizard
70
70
  - Chat template auto-detection (ChatML, Llama, Alpaca, Gemma, Reasoning)
71
- - Conversation history with branching
71
+ - Conversation history with autosave and export
72
72
 
73
73
  ## Quick Start
74
74
 
@@ -119,6 +119,19 @@ Cortex supports:
119
119
  - **PyTorch** (Transformers + MPS)
120
120
  - **GPTQ** / **AWQ** quantized models
121
121
 
122
+ ## Advanced Features
123
+
124
+ - **Dynamic quantization fallback** for PyTorch/SafeTensors models that do not fit GPU memory (INT8 preferred, INT4 fallback)
125
+ - `docs/dynamic-quantization.md`
126
+ - **MLX conversion with quantization recipes** (4/5/8-bit, mixed precision) for speed vs quality control
127
+ - `docs/mlx-acceleration.md`
128
+ - **LoRA fine-tuning wizard** for local adapters (`/finetune`)
129
+ - `docs/fine-tuning.md`
130
+ - **Template registry and auto-detection** for chat formatting (ChatML, Llama, Alpaca, Gemma, Reasoning)
131
+ - `docs/template-registry.md`
132
+ - **Inference engine details** and backend behavior
133
+ - `docs/inference-engine.md`
134
+
122
135
  ## Configuration
123
136
 
124
137
  Cortex reads `config.yaml` from the current working directory. For tuning GPU memory limits, quantization defaults, and inference parameters, see:
@@ -138,6 +151,7 @@ Advanced topics:
138
151
 
139
152
  - `docs/mlx-acceleration.md`
140
153
  - `docs/inference-engine.md`
154
+ - `docs/dynamic-quantization.md`
141
155
  - `docs/template-registry.md`
142
156
  - `docs/fine-tuning.md`
143
157
  - `docs/development.md`
@@ -1,11 +1,11 @@
1
- cortex/__init__.py,sha256=HQeri23e7w2It4MeziwPP2gTDfF9GgmBp9A0A2Zmrn0,2202
1
+ cortex/__init__.py,sha256=zd80dwfLqU5IbsIPvILFhFEI58aI4oOjk1jpzzqMKKw,2202
2
2
  cortex/__main__.py,sha256=I7Njt7BjGoHtPhftDoA44OyOYbwWNNaPwP_qlJSn0J4,2857
3
- cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
3
+ cortex/config.py,sha256=IQnMaXznTflTSvr91aybtPMnNW088r-BYeVMhxny63w,13444
4
4
  cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
5
5
  cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
6
- cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
6
+ cortex/inference_engine.py,sha256=bklCjmiMn3psFp14EZxRzePEuA33NCHJ1bQdsbvMlfg,29343
7
7
  cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
8
- cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
8
+ cortex/model_manager.py,sha256=Ra21TjhtFS-7_hRzDMh9m0BUazIGWoKr7Gye3GiVRJM,102671
9
9
  cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
10
10
  cortex/fine_tuning/dataset.py,sha256=hIz_dfFSaJoiFzWZ6vwlwqjpTfdsnFNIEmwhhTD2d9k,15414
11
11
  cortex/fine_tuning/mlx_lora_trainer.py,sha256=idNzKtVG8pObwsnSrP0N1rU1EanhrIRvHiNL1asdzr8,22438
@@ -21,7 +21,7 @@ cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ
21
21
  cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
22
22
  cortex/metal/performance_profiler.py,sha256=GMxxqwqE2kVJ4WePwVdUp2ADqhrV6wCCNrFnaMfBDpI,12274
23
23
  cortex/quantization/__init__.py,sha256=ElLP3ZO_XItddTl-PeoJ5GPb16RYIAk8m5sqwfAVE9s,184
24
- cortex/quantization/dynamic_quantizer.py,sha256=sAoHoQ6wfs6FvejG-iehB2Qij-0WC9qSTlBfj3D1pTI,31724
24
+ cortex/quantization/dynamic_quantizer.py,sha256=vV0RSPMoWeOPALwFOs0DzqIA2MkGpeEpqB2vTeudhW0,31934
25
25
  cortex/template_registry/__init__.py,sha256=O5BWmHRmfMSK-Ukpu8UqFO_kaN0kum-d-Wsz0Ds-sC0,491
26
26
  cortex/template_registry/auto_detector.py,sha256=lqI19Ef_w6ClZvD5dzDw1i5gnf2AUN_L4WjCMvW99Yg,5432
27
27
  cortex/template_registry/config_manager.py,sha256=vh7cXAUTJ4dLY74u5EHTpTa46jXxj34BlMyWsC_ZIaM,8658
@@ -38,12 +38,12 @@ cortex/template_registry/template_profiles/standard/gemma.py,sha256=D4wZN3_6QzUj
38
38
  cortex/template_registry/template_profiles/standard/llama.py,sha256=jz4MyvmISSPtIAcffPE7LrTosHvlC0NoJhzTw1DCvpY,3209
39
39
  cortex/template_registry/template_profiles/standard/simple.py,sha256=dGOOcL6HRoJFxkixLrYC4w7c63h-QmOOWC2TsOihYog,2422
40
40
  cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
41
- cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
42
- cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
41
+ cortex/ui/cli.py,sha256=QZhiV9z8hP9Fu5mvpzURSWLptDDRaJLmNLm2AqTGlqE,75734
42
+ cortex/ui/markdown_render.py,sha256=D4gSvv0TERFIAXYs3e76eaPsuvvD2cNT98PDKyUPnWI,7776
43
43
  cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
44
- cortex_llm-1.0.6.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
45
- cortex_llm-1.0.6.dist-info/METADATA,sha256=6lu4S6Jq8ijbV8MqFFjRU8b0dEp7QcJwPEPo7VFvtBk,4447
46
- cortex_llm-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- cortex_llm-1.0.6.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
48
- cortex_llm-1.0.6.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
49
- cortex_llm-1.0.6.dist-info/RECORD,,
44
+ cortex_llm-1.0.7.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
45
+ cortex_llm-1.0.7.dist-info/METADATA,sha256=jUwV2nVs0EL01Iqap64U3mI5QFPrHv3pt5sE1SvmAA0,5119
46
+ cortex_llm-1.0.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
47
+ cortex_llm-1.0.7.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
48
+ cortex_llm-1.0.7.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
49
+ cortex_llm-1.0.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5