PyPI - cortex-llm - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

cortex-llm 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

cortex/__init__.py +1 -1
cortex/config.py +1 -1
cortex/inference_engine.py +17 -3
cortex/model_manager.py +45 -1
cortex/quantization/dynamic_quantizer.py +8 -5
cortex/ui/cli.py +59 -41
cortex/ui/markdown_render.py +68 -3
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/METADATA +16 -2
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/RECORD +13 -13
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/WHEEL +1 -1
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/entry_points.txt +0 -0
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/licenses/LICENSE +0 -0
{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/top_level.txt +0 -0

cortex/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
 with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
 """
-__version__ = "1.0.6"
+__version__ = "1.0.7"
 __author__ = "Cortex Development Team"
 __license__ = "MIT"

cortex/config.py CHANGED Viewed

@@ -74,7 +74,7 @@ class InferenceConfig(BaseModel):
     top_p: float = Field(default=0.95, ge=0.0, le=1.0)
     top_k: int = Field(default=40, ge=0)
     repetition_penalty: float = Field(default=1.1, ge=0.0, le=2.0)
-    max_tokens: int = Field(default=2048, ge=1)
+    max_tokens: int = Field(default=4096, ge=1)
     stream_output: bool = True
     seed: int = Field(default=-1)

cortex/inference_engine.py CHANGED Viewed

@@ -138,7 +138,7 @@ class InferenceEngine:
                 use_fp16=True,
                 use_channels_last=True,
                 optimize_memory=True,
-                max_batch_size=self.config.performance.batch_size
+                max_batch_size=self.config.performance.max_batch_size
             )
             self.mps_optimizer = MPSOptimizer(mps_config)
@@ -153,7 +153,7 @@ class InferenceEngine:
                 fuse_operations=True,
                 lazy_evaluation=True,
                 rotating_kv_cache=True,
-                kv_cache_size=self.config.model.context_length if hasattr(self.config.model, 'context_length') else 4096,
+                kv_cache_size=self.config.performance.context_length,
                 quantization_bits=4
             )
             self.mlx_accelerator = MLXAccelerator(mlx_config)
@@ -204,6 +204,9 @@ class InferenceEngine:
                     yield from self._generate_pytorch(model, tokenizer, request)
                 elif model_info.format == ModelFormat.SAFETENSORS:
                     yield from self._generate_safetensors(model, tokenizer, request)
+                elif model_info.format == ModelFormat.QUANTIZED:
+                    # Quantized models are loaded as PyTorch-compatible modules
+                    yield from self._generate_pytorch(model, tokenizer, request)
                 elif model_info.format == ModelFormat.GGUF:
                     yield from self._generate_gguf(model, tokenizer, request)
                 else:
@@ -401,7 +404,18 @@ class InferenceEngine:
         last_metrics_update = time.time()
         try:
-            device = torch.device("mps")
+            # Use the model's device when available (quantized models may be CPU-only on macOS)
+            device = None
+            try:
+                first_param = next(model.parameters())
+                device = first_param.device
+            except Exception:
+                device = None
+            if device is None or str(device) == "meta":
+                device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
+            elif device.type == "mps" and not torch.backends.mps.is_available():
+                device = torch.device("cpu")
             inputs = tokenizer(request.prompt, return_tensors="pt").to(device)

cortex/model_manager.py CHANGED Viewed

@@ -133,7 +133,8 @@ class ModelManager:
         self.quantizer = DynamicQuantizer(QuantizationConfig(
             mode=QuantizationMode.DYNAMIC,
             per_channel=True,
-            cache_quantized=True
+            cache_quantized=True,
+            cache_dir=self.config.model.quantization_cache
         ))
         # Initialize MLX converter for native conversion
@@ -201,6 +202,39 @@ class ModelManager:
         level = getattr(self.config.gpu, "gpu_optimization_level", "maximum")
         level = str(level).lower().strip()
         return level in {"maximum", "max", "speed", "fast", "performance"}
+    def _get_default_quant_recipe(self) -> Optional[QuantizationRecipe]:
+        """Map configured default_quantization to an MLX quantization recipe."""
+        raw = getattr(self.config.model, "default_quantization", "") or ""
+        value = str(raw).strip().lower()
+        if not value or value == "auto":
+            return None
+        mapping = {
+            "q4_k_m": QuantizationRecipe.SPEED_4BIT,
+            "q5_k_m": QuantizationRecipe.BALANCED_5BIT,
+            "q6_k": QuantizationRecipe.QUALITY_8BIT,  # closest available MLX recipe
+            "q8_0": QuantizationRecipe.QUALITY_8BIT,
+            "4bit": QuantizationRecipe.SPEED_4BIT,
+            "5bit": QuantizationRecipe.BALANCED_5BIT,
+            "8bit": QuantizationRecipe.QUALITY_8BIT,
+            "mixed": QuantizationRecipe.MIXED_PRECISION,
+            "none": QuantizationRecipe.NONE,
+        }
+        recipe = mapping.get(value)
+        if recipe is None:
+            logger.warning("Unknown default_quantization value: %s", raw)
+            return None
+        supported = getattr(self.config.model, "supported_quantizations", None)
+        if supported:
+            supported_norm = {str(s).strip().lower() for s in supported}
+            if value.startswith("q") and value not in supported_norm:
+                logger.warning("default_quantization '%s' not in supported_quantizations", raw)
+                return None
+        return recipe
     def load_model(
         self,
@@ -374,6 +408,10 @@ class ModelManager:
                         except Exception as e:
                             logger.warning(f"Could not estimate model parameters: {e}, defaulting to 4-bit")
                             quant_recipe = QuantizationRecipe.SPEED_4BIT  # Fallback
+                    default_recipe = self._get_default_quant_recipe()
+                    if default_recipe is not None:
+                        quant_recipe = default_recipe
                     if quantization:
                         quant_map = {
@@ -452,6 +490,10 @@ class ModelManager:
                 else:
                     quant_recipe = QuantizationRecipe.SPEED_4BIT  # Default for larger models
+                default_recipe = self._get_default_quant_recipe()
+                if default_recipe is not None:
+                    quant_recipe = default_recipe
                 if quantization:
                     quant_map = {
                         "4bit": QuantizationRecipe.SPEED_4BIT,
@@ -563,6 +605,8 @@ class ModelManager:
         )
         if not can_load and can_apply_quantization:
+            if not getattr(self.config.model, "auto_quantize", True):
+                return False, f"GPU incompatible: {message} (auto_quantize disabled)"
             # Check if quantization would help
             gpu_status = self.gpu_validator.get_gpu_memory_status()
             available_gb = gpu_status['available_gb']

cortex/quantization/dynamic_quantizer.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import torch
 import torch.nn as nn
 from typing import Dict, Any, Optional, Tuple, Union
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum
 import gc
 from pathlib import Path
@@ -40,6 +40,7 @@ class QuantizationConfig:
     cache_quantized: bool = True  # Cache quantized models to disk
     compress_cache: bool = False  # Compress cached models (slower but smaller)
     validate_quantization: bool = True  # Validate quantized models work correctly
+    cache_dir: Path = field(default_factory=lambda: Path.home() / ".cortex" / "quantized_models")
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary for serialization."""
@@ -118,6 +119,8 @@ class DynamicQuantizer:
     def __init__(self, config: Optional[QuantizationConfig] = None):
         """Initialize quantizer with configuration."""
         self.config = config or QuantizationConfig()
+        self.config.cache_dir = Path(self.config.cache_dir).expanduser()
+        self.config.cache_dir.mkdir(parents=True, exist_ok=True)
         self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
         self._quantization_cache: Dict[str, Dict[str, Any]] = {}
@@ -681,10 +684,10 @@ class DynamicQuantizer:
         # Generate cache key including model metadata
         cache_key = hashlib.md5(
-            f"{model_path}_{model_mtime}_{model_size}_{json.dumps(quantization_info)}".encode()
+            f"{model_path}_{model_mtime}_{model_size}_{json.dumps(self.config.to_dict())}".encode()
         ).hexdigest()
-        cache_dir = Path.home() / ".cortex" / "quantized_cache"
+        cache_dir = self.config.cache_dir
         cache_dir.mkdir(parents=True, exist_ok=True)
         cache_path = cache_dir / f"{cache_key}.pt"
@@ -723,7 +726,7 @@ class DynamicQuantizer:
             f"{model_path}_{model_mtime}_{model_size}_{json.dumps(config.to_dict())}".encode()
         ).hexdigest()
-        cache_path = Path.home() / ".cortex" / "quantized_cache" / f"{cache_key}.pt"
+        cache_path = Path(self.config.cache_dir) / f"{cache_key}.pt"
         if cache_path.exists():
             try:
@@ -733,4 +736,4 @@ class DynamicQuantizer:
                 # Cache corrupted, will re-quantize
                 cache_path.unlink()
-        return None
+        return None

cortex/ui/cli.py CHANGED Viewed

@@ -30,7 +30,7 @@ from cortex.conversation_manager import ConversationManager, MessageRole
 from cortex.model_downloader import ModelDownloader
 from cortex.template_registry import TemplateRegistry
 from cortex.fine_tuning import FineTuneWizard
-from cortex.ui.markdown_render import ThinkMarkdown, PrefixedRenderable
+from cortex.ui.markdown_render import ThinkMarkdown, PrefixedRenderable, render_plain_with_think
 class CortexCLI:
@@ -1135,15 +1135,15 @@ class CortexCLI:
                 logger.debug(f"Could not get stop sequences: {e}")
         # Create generation request with formatted prompt
-        # Use lower temperature for more focused responses
         request = GenerationRequest(
             prompt=formatted_prompt,
             max_tokens=self.config.inference.max_tokens,
-            temperature=0.3,  # Lower temperature for less randomness
-            top_p=0.9,  # Slightly lower top_p
+            temperature=self.config.inference.temperature,
+            top_p=self.config.inference.top_p,
             top_k=self.config.inference.top_k,
             repetition_penalty=self.config.inference.repetition_penalty,
-            stream=True,
+            stream=self.config.inference.stream_output,
+            seed=self.config.inference.seed if self.config.inference.seed >= 0 else None,
             stop_sequences=stop_sequences
         )
@@ -1167,50 +1167,65 @@ class CortexCLI:
             prefix_style = Style(color="cyan")
             def build_renderable(text: str):
-                markdown = ThinkMarkdown(text, code_theme="monokai", use_line_numbers=False)
-                return PrefixedRenderable(markdown, prefix="⏺ ", prefix_style=prefix_style, indent="  ")
+                if getattr(self.config.ui, "markdown_rendering", True):
+                    markdown = ThinkMarkdown(
+                        text,
+                        code_theme="monokai",
+                        use_line_numbers=False,
+                        syntax_highlighting=getattr(self.config.ui, "syntax_highlighting", True),
+                    )
+                    renderable = markdown
+                else:
+                    renderable = render_plain_with_think(text)
-            with Live(
-                build_renderable(""),
-                console=self.console,
-                refresh_per_second=20,
-                transient=False,
-            ) as live:
-                for token in self.inference_engine.generate(request):
-                    if first_token_time is None:
-                        first_token_time = time.time()
+                return PrefixedRenderable(renderable, prefix="⏺ ", prefix_style=prefix_style, indent="  ")
-                    generated_text += token
-                    token_count += 1
+            original_console_width = self.console._width
+            target_width = max(40, int(self.get_terminal_width() * 0.75))
+            self.console.width = target_width
+            try:
+                with Live(
+                    build_renderable(""),
+                    console=self.console,
+                    auto_refresh=False,
+                    refresh_per_second=20,
+                    transient=False,
+                    vertical_overflow="visible",
+                ) as live:
+                    for token in self.inference_engine.generate(request):
+                        if first_token_time is None:
+                            first_token_time = time.time()
-                    display_token = token
-                    if uses_reasoning_template and template_profile and template_profile.supports_streaming():
-                        display_token, should_display = template_profile.process_streaming_response(
-                            token, accumulated_response
-                        )
-                        accumulated_response += token
-                        if not should_display:
-                            display_token = ""
+                        generated_text += token
+                        token_count += 1
+                        display_token = token
+                        if uses_reasoning_template and template_profile and template_profile.supports_streaming():
+                            display_token, should_display = template_profile.process_streaming_response(
+                                token, accumulated_response
+                            )
+                            accumulated_response += token
+                            if not should_display:
+                                display_token = ""
-                    if display_token:
-                        display_text += display_token
+                        if display_token:
+                            display_text += display_token
-                    now = time.time()
-                    if display_token and ("\n" in display_token or now - last_render_time >= render_interval):
-                        live.update(build_renderable(display_text))
-                        last_render_time = now
+                        now = time.time()
+                        if display_token and ("\n" in display_token or now - last_render_time >= render_interval):
+                            live.update(build_renderable(display_text), refresh=True)
+                            last_render_time = now
-                if uses_reasoning_template and template_profile:
-                    final_text = template_profile.process_response(generated_text)
-                    generated_text = final_text
-                    if not template_profile.config.show_reasoning:
-                        display_text = final_text
+                    if uses_reasoning_template and template_profile:
+                        final_text = template_profile.process_response(generated_text)
+                        generated_text = final_text
+                        if not template_profile.config.show_reasoning:
+                            display_text = final_text
-                live.update(build_renderable(display_text))
+                    live.update(build_renderable(display_text), refresh=True)
+            finally:
+                self.console._width = original_console_width
-            # Add blank line for spacing between response and metrics
-            print()
             # Display final metrics in a clean, professional way
             elapsed = time.time() - start_time
             if token_count > 0 and elapsed > 0:
@@ -1238,6 +1253,9 @@ class CortexCLI:
                 metrics_line = " · ".join(metrics_parts)
                 print(f"  \033[2m{metrics_line}\033[0m")
+            if token_count >= request.max_tokens:
+                print(f"  \033[2m(output truncated at max_tokens={request.max_tokens}; increase in config.yaml)\033[0m")
             # Add assistant message to conversation history
             self.conversation_manager.add_message(MessageRole.ASSISTANT, generated_text)

cortex/ui/markdown_render.py CHANGED Viewed

@@ -3,10 +3,12 @@
 from typing import List
 from rich.console import Console
+from rich.cells import cell_len
 from rich.markdown import Markdown
 from rich.segment import Segment
 from rich.style import Style
 from rich.syntax import Syntax
+from rich.text import Text
 THINK_START_MARKER = "[[[THINK_START]]]"
 THINK_END_MARKER = "[[[THINK_END]]]"
@@ -45,6 +47,14 @@ class CodeBlockWithLineNumbers(Markdown.elements["fence"]):
         yield syntax
+class CodeBlockPlain(Markdown.elements["fence"]):
+    """Markdown code block rendered as plain text (no syntax highlighting)."""
+    def __rich_console__(self, console: Console, options):
+        code = str(self.text).rstrip()
+        yield Text(code)
 class MarkdownWithLineNumbers(Markdown):
     """Markdown renderer that keeps line numbers for fenced code blocks."""
@@ -55,6 +65,26 @@ class MarkdownWithLineNumbers(Markdown):
     })
+class MarkdownPlainCode(Markdown):
+    """Markdown renderer that disables syntax highlighting for code blocks."""
+    elements = Markdown.elements.copy()
+    elements.update({
+        "fence": CodeBlockPlain,
+        "code_block": CodeBlockPlain,
+    })
+class MarkdownPlainCodeWithLineNumbers(Markdown):
+    """Markdown renderer with plain code blocks and line numbers."""
+    elements = MarkdownWithLineNumbers.elements.copy()
+    elements.update({
+        "fence": CodeBlockPlain,
+        "code_block": CodeBlockPlain,
+    })
 class ThinkMarkdown:
     """Markdown renderer that dims content inside <think> tags."""
@@ -63,10 +93,15 @@ class ThinkMarkdown:
         markup: str,
         code_theme: str = "monokai",
         use_line_numbers: bool = False,
+        syntax_highlighting: bool = True,
     ) -> None:
         marked = _mark_think_sections(markup)
-        markdown_cls = MarkdownWithLineNumbers if use_line_numbers else Markdown
-        self._markdown = markdown_cls(marked, code_theme=code_theme)
+        if syntax_highlighting:
+            markdown_cls = MarkdownWithLineNumbers if use_line_numbers else Markdown
+            self._markdown = markdown_cls(marked, code_theme=code_theme)
+        else:
+            markdown_cls = MarkdownPlainCodeWithLineNumbers if use_line_numbers else MarkdownPlainCode
+            self._markdown = markdown_cls(marked)
     def __rich_console__(self, console: Console, options):
         segments = console.render(self._markdown, options)
@@ -162,9 +197,15 @@ class PrefixedRenderable:
         self.indent = indent if indent is not None else " " * len(prefix)
     def __rich_console__(self, console: Console, options):
+        prefix_width = cell_len(self.prefix)
+        indent_width = cell_len(self.indent) if self.indent is not None else prefix_width
+        offset = max(prefix_width, indent_width)
+        inner_width = max(1, options.max_width - offset)
+        inner_options = options.update_width(inner_width)
         yield Segment(self.prefix, self.prefix_style)
-        for segment in console.render(self.renderable, options):
+        for segment in console.render(self.renderable, inner_options):
             if segment.control:
                 yield segment
                 continue
@@ -183,3 +224,27 @@ class PrefixedRenderable:
                 if index < len(parts) - 1:
                     yield Segment("\n", style)
                     yield Segment(self.indent, None)
+def render_plain_with_think(text: str) -> Text:
+    """Render plain text while dimming content inside <think> tags."""
+    output = Text()
+    dim_style = Style(dim=True)
+    idx = 0
+    in_think = False
+    while idx < len(text):
+        if text.startswith("<think>", idx):
+            in_think = True
+            idx += len("<think>")
+            continue
+        if text.startswith("</think>", idx):
+            in_think = False
+            idx += len("</think>")
+            continue
+        char = text[idx]
+        output.append(char, dim_style if in_think else None)
+        idx += 1
+    return output

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cortex-llm
-Version: 1.0.6
+Version: 1.0.7
 Summary: GPU-Accelerated LLM Terminal for Apple Silicon
 Home-page: https://github.com/faisalmumtaz/Cortex
 Author: Cortex Development Team
@@ -68,7 +68,7 @@ Cortex is a fast, native CLI for running and fine-tuning LLMs on Apple Silicon u
 - Multi-format model support: MLX, GGUF, SafeTensors, PyTorch, GPTQ, AWQ
 - Built-in LoRA fine-tuning wizard
 - Chat template auto-detection (ChatML, Llama, Alpaca, Gemma, Reasoning)
-- Conversation history with branching
+- Conversation history with autosave and export
 ## Quick Start
@@ -119,6 +119,19 @@ Cortex supports:
 - **PyTorch** (Transformers + MPS)
 - **GPTQ** / **AWQ** quantized models
+## Advanced Features
+- **Dynamic quantization fallback** for PyTorch/SafeTensors models that do not fit GPU memory (INT8 preferred, INT4 fallback)
+  - `docs/dynamic-quantization.md`
+- **MLX conversion with quantization recipes** (4/5/8-bit, mixed precision) for speed vs quality control
+  - `docs/mlx-acceleration.md`
+- **LoRA fine-tuning wizard** for local adapters (`/finetune`)
+  - `docs/fine-tuning.md`
+- **Template registry and auto-detection** for chat formatting (ChatML, Llama, Alpaca, Gemma, Reasoning)
+  - `docs/template-registry.md`
+- **Inference engine details** and backend behavior
+  - `docs/inference-engine.md`
 ## Configuration
 Cortex reads `config.yaml` from the current working directory. For tuning GPU memory limits, quantization defaults, and inference parameters, see:
@@ -138,6 +151,7 @@ Advanced topics:
 - `docs/mlx-acceleration.md`
 - `docs/inference-engine.md`
+- `docs/dynamic-quantization.md`
 - `docs/template-registry.md`
 - `docs/fine-tuning.md`
 - `docs/development.md`

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-cortex/__init__.py,sha256=HQeri23e7w2It4MeziwPP2gTDfF9GgmBp9A0A2Zmrn0,2202
+cortex/__init__.py,sha256=zd80dwfLqU5IbsIPvILFhFEI58aI4oOjk1jpzzqMKKw,2202
 cortex/__main__.py,sha256=I7Njt7BjGoHtPhftDoA44OyOYbwWNNaPwP_qlJSn0J4,2857
-cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
+cortex/config.py,sha256=IQnMaXznTflTSvr91aybtPMnNW088r-BYeVMhxny63w,13444
 cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
 cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
-cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
+cortex/inference_engine.py,sha256=bklCjmiMn3psFp14EZxRzePEuA33NCHJ1bQdsbvMlfg,29343
 cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
-cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
+cortex/model_manager.py,sha256=Ra21TjhtFS-7_hRzDMh9m0BUazIGWoKr7Gye3GiVRJM,102671
 cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
 cortex/fine_tuning/dataset.py,sha256=hIz_dfFSaJoiFzWZ6vwlwqjpTfdsnFNIEmwhhTD2d9k,15414
 cortex/fine_tuning/mlx_lora_trainer.py,sha256=idNzKtVG8pObwsnSrP0N1rU1EanhrIRvHiNL1asdzr8,22438
@@ -21,7 +21,7 @@ cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ
 cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
 cortex/metal/performance_profiler.py,sha256=GMxxqwqE2kVJ4WePwVdUp2ADqhrV6wCCNrFnaMfBDpI,12274
 cortex/quantization/__init__.py,sha256=ElLP3ZO_XItddTl-PeoJ5GPb16RYIAk8m5sqwfAVE9s,184
-cortex/quantization/dynamic_quantizer.py,sha256=sAoHoQ6wfs6FvejG-iehB2Qij-0WC9qSTlBfj3D1pTI,31724
+cortex/quantization/dynamic_quantizer.py,sha256=vV0RSPMoWeOPALwFOs0DzqIA2MkGpeEpqB2vTeudhW0,31934
 cortex/template_registry/__init__.py,sha256=O5BWmHRmfMSK-Ukpu8UqFO_kaN0kum-d-Wsz0Ds-sC0,491
 cortex/template_registry/auto_detector.py,sha256=lqI19Ef_w6ClZvD5dzDw1i5gnf2AUN_L4WjCMvW99Yg,5432
 cortex/template_registry/config_manager.py,sha256=vh7cXAUTJ4dLY74u5EHTpTa46jXxj34BlMyWsC_ZIaM,8658
@@ -38,12 +38,12 @@ cortex/template_registry/template_profiles/standard/gemma.py,sha256=D4wZN3_6QzUj
 cortex/template_registry/template_profiles/standard/llama.py,sha256=jz4MyvmISSPtIAcffPE7LrTosHvlC0NoJhzTw1DCvpY,3209
 cortex/template_registry/template_profiles/standard/simple.py,sha256=dGOOcL6HRoJFxkixLrYC4w7c63h-QmOOWC2TsOihYog,2422
 cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
-cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
-cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
+cortex/ui/cli.py,sha256=QZhiV9z8hP9Fu5mvpzURSWLptDDRaJLmNLm2AqTGlqE,75734
+cortex/ui/markdown_render.py,sha256=D4gSvv0TERFIAXYs3e76eaPsuvvD2cNT98PDKyUPnWI,7776
 cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
-cortex_llm-1.0.6.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
-cortex_llm-1.0.6.dist-info/METADATA,sha256=6lu4S6Jq8ijbV8MqFFjRU8b0dEp7QcJwPEPo7VFvtBk,4447
-cortex_llm-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cortex_llm-1.0.6.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
-cortex_llm-1.0.6.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
-cortex_llm-1.0.6.dist-info/RECORD,,
+cortex_llm-1.0.7.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
+cortex_llm-1.0.7.dist-info/METADATA,sha256=jUwV2nVs0EL01Iqap64U3mI5QFPrHv3pt5sE1SvmAA0,5119
+cortex_llm-1.0.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+cortex_llm-1.0.7.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
+cortex_llm-1.0.7.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
+cortex_llm-1.0.7.dist-info/RECORD,,

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cortex_llm-1.0.6.dist-info → cortex_llm-1.0.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

cortex-llm 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

cortex-llm 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl