PyPI - fount-vlm-nell-02 - Versions diffs - 0.3.11__py3-none-any.whl - Mend

fount-vlm-nell-02 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (258) hide show

fount_vlm_nell_02-0.3.11.dist-info/METADATA +418 -0
fount_vlm_nell_02-0.3.11.dist-info/RECORD +258 -0
fount_vlm_nell_02-0.3.11.dist-info/WHEEL +5 -0
fount_vlm_nell_02-0.3.11.dist-info/entry_points.txt +5 -0
fount_vlm_nell_02-0.3.11.dist-info/licenses/LICENSE +21 -0
fount_vlm_nell_02-0.3.11.dist-info/top_level.txt +1 -0
mlx_vlm/__init__.py +16 -0
mlx_vlm/__main__.py +24 -0
mlx_vlm/chat.py +234 -0
mlx_vlm/chat_ui.py +508 -0
mlx_vlm/convert.py +284 -0
mlx_vlm/deprecation.py +52 -0
mlx_vlm/evals/__init__.py +0 -0
mlx_vlm/evals/math_vista.py +565 -0
mlx_vlm/evals/mmmu.py +528 -0
mlx_vlm/evals/mmstar.py +343 -0
mlx_vlm/evals/ocrbench.py +453 -0
mlx_vlm/evals/utils.py +37 -0
mlx_vlm/generate.py +1457 -0
mlx_vlm/lora.py +207 -0
mlx_vlm/models/__init__.py +0 -0
mlx_vlm/models/aya_vision/__init__.py +2 -0
mlx_vlm/models/aya_vision/aya_vision.py +188 -0
mlx_vlm/models/aya_vision/config.py +52 -0
mlx_vlm/models/aya_vision/language.py +202 -0
mlx_vlm/models/aya_vision/vision.py +340 -0
mlx_vlm/models/base.py +356 -0
mlx_vlm/models/cache.py +238 -0
mlx_vlm/models/deepseek_vl_v2/__init__.py +2 -0
mlx_vlm/models/deepseek_vl_v2/config.py +159 -0
mlx_vlm/models/deepseek_vl_v2/conversation.py +264 -0
mlx_vlm/models/deepseek_vl_v2/deepseek_vl_v2.py +418 -0
mlx_vlm/models/deepseek_vl_v2/language.py +539 -0
mlx_vlm/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +536 -0
mlx_vlm/models/deepseek_vl_v2/vision.py +322 -0
mlx_vlm/models/deepseekocr/__init__.py +2 -0
mlx_vlm/models/deepseekocr/config.py +173 -0
mlx_vlm/models/deepseekocr/conversation.py +264 -0
mlx_vlm/models/deepseekocr/deepseekocr.py +371 -0
mlx_vlm/models/deepseekocr/language.py +547 -0
mlx_vlm/models/deepseekocr/processing_deepseekocr.py +655 -0
mlx_vlm/models/deepseekocr/sam.py +489 -0
mlx_vlm/models/deepseekocr/vision.py +263 -0
mlx_vlm/models/deepseekocr_2/__init__.py +12 -0
mlx_vlm/models/deepseekocr_2/config.py +216 -0
mlx_vlm/models/deepseekocr_2/deepseekocr_2.py +297 -0
mlx_vlm/models/deepseekocr_2/processing_deepseekocr.py +624 -0
mlx_vlm/models/deepseekocr_2/vision.py +439 -0
mlx_vlm/models/ernie4_5_moe_vl/__init__.py +5 -0
mlx_vlm/models/ernie4_5_moe_vl/config.py +139 -0
mlx_vlm/models/ernie4_5_moe_vl/ernie4_5_moe_vl.py +337 -0
mlx_vlm/models/ernie4_5_moe_vl/language.py +770 -0
mlx_vlm/models/ernie4_5_moe_vl/processor.py +686 -0
mlx_vlm/models/ernie4_5_moe_vl/vision.py +322 -0
mlx_vlm/models/fastvlm/__init__.py +2 -0
mlx_vlm/models/fastvlm/config.py +79 -0
mlx_vlm/models/fastvlm/fastvlm.py +198 -0
mlx_vlm/models/fastvlm/language.py +49 -0
mlx_vlm/models/fastvlm/vision.py +692 -0
mlx_vlm/models/florence2/__init__.py +2 -0
mlx_vlm/models/florence2/config.py +84 -0
mlx_vlm/models/florence2/florence2.py +383 -0
mlx_vlm/models/florence2/language.py +452 -0
mlx_vlm/models/florence2/processing_florence2.py +30 -0
mlx_vlm/models/florence2/vision.py +552 -0
mlx_vlm/models/gemma3/__init__.py +2 -0
mlx_vlm/models/gemma3/config.py +52 -0
mlx_vlm/models/gemma3/gemma3.py +194 -0
mlx_vlm/models/gemma3/language.py +293 -0
mlx_vlm/models/gemma3/vision.py +215 -0
mlx_vlm/models/gemma3n/__init__.py +2 -0
mlx_vlm/models/gemma3n/audio.py +1038 -0
mlx_vlm/models/gemma3n/config.py +130 -0
mlx_vlm/models/gemma3n/gemma3n.py +322 -0
mlx_vlm/models/gemma3n/language.py +631 -0
mlx_vlm/models/gemma3n/vision.py +994 -0
mlx_vlm/models/glm4v/__init__.py +3 -0
mlx_vlm/models/glm4v/config.py +79 -0
mlx_vlm/models/glm4v/glm4v.py +188 -0
mlx_vlm/models/glm4v/language.py +574 -0
mlx_vlm/models/glm4v/processing.py +220 -0
mlx_vlm/models/glm4v/vision.py +406 -0
mlx_vlm/models/glm4v_moe/__init__.py +3 -0
mlx_vlm/models/glm4v_moe/config.py +81 -0
mlx_vlm/models/glm4v_moe/glm4v_moe.py +176 -0
mlx_vlm/models/glm4v_moe/language.py +674 -0
mlx_vlm/models/glm4v_moe/processing.py +229 -0
mlx_vlm/models/glm4v_moe/vision.py +405 -0
mlx_vlm/models/glm_ocr/__init__.py +3 -0
mlx_vlm/models/glm_ocr/config.py +93 -0
mlx_vlm/models/glm_ocr/glm_ocr.py +180 -0
mlx_vlm/models/glm_ocr/language.py +585 -0
mlx_vlm/models/glm_ocr/processing.py +208 -0
mlx_vlm/models/glm_ocr/vision.py +342 -0
mlx_vlm/models/hunyuan_vl/__init__.py +7 -0
mlx_vlm/models/hunyuan_vl/config.py +136 -0
mlx_vlm/models/hunyuan_vl/hunyuan_vl.py +181 -0
mlx_vlm/models/hunyuan_vl/language.py +509 -0
mlx_vlm/models/hunyuan_vl/processing_hunyuan_vl.py +607 -0
mlx_vlm/models/hunyuan_vl/vision.py +322 -0
mlx_vlm/models/idefics2/__init__.py +2 -0
mlx_vlm/models/idefics2/config.py +65 -0
mlx_vlm/models/idefics2/idefics2.py +321 -0
mlx_vlm/models/idefics2/language.py +161 -0
mlx_vlm/models/idefics2/vision.py +244 -0
mlx_vlm/models/idefics3/__init__.py +4 -0
mlx_vlm/models/idefics3/config.py +54 -0
mlx_vlm/models/idefics3/idefics3.py +221 -0
mlx_vlm/models/idefics3/language.py +157 -0
mlx_vlm/models/idefics3/vision.py +265 -0
mlx_vlm/models/internvl_chat/__init__.py +3 -0
mlx_vlm/models/internvl_chat/config.py +89 -0
mlx_vlm/models/internvl_chat/internvl_chat.py +115 -0
mlx_vlm/models/internvl_chat/language.py +187 -0
mlx_vlm/models/internvl_chat/processor.py +395 -0
mlx_vlm/models/internvl_chat/vision.py +265 -0
mlx_vlm/models/interpolate.py +183 -0
mlx_vlm/models/jina_vlm/__init__.py +3 -0
mlx_vlm/models/jina_vlm/config.py +142 -0
mlx_vlm/models/jina_vlm/image_processor.py +430 -0
mlx_vlm/models/jina_vlm/jina_vlm.py +280 -0
mlx_vlm/models/jina_vlm/language.py +272 -0
mlx_vlm/models/jina_vlm/processing_jinavlm.py +266 -0
mlx_vlm/models/jina_vlm/vision.py +202 -0
mlx_vlm/models/kernels.py +447 -0
mlx_vlm/models/kimi_vl/__init__.py +4 -0
mlx_vlm/models/kimi_vl/config.py +84 -0
mlx_vlm/models/kimi_vl/kimi_vl.py +127 -0
mlx_vlm/models/kimi_vl/language.py +460 -0
mlx_vlm/models/kimi_vl/processing_kimi_vl.py +560 -0
mlx_vlm/models/kimi_vl/vision.py +485 -0
mlx_vlm/models/lfm2_vl/__init__.py +2 -0
mlx_vlm/models/lfm2_vl/config.py +94 -0
mlx_vlm/models/lfm2_vl/language.py +49 -0
mlx_vlm/models/lfm2_vl/lfm2_vl.py +223 -0
mlx_vlm/models/lfm2_vl/processing_lfm2_vl.py +320 -0
mlx_vlm/models/lfm2_vl/vision.py +223 -0
mlx_vlm/models/llama4/__init__.py +2 -0
mlx_vlm/models/llama4/config.py +83 -0
mlx_vlm/models/llama4/language.py +334 -0
mlx_vlm/models/llama4/llama4.py +146 -0
mlx_vlm/models/llama4/vision.py +526 -0
mlx_vlm/models/llava/__init__.py +2 -0
mlx_vlm/models/llava/config.py +61 -0
mlx_vlm/models/llava/language.py +200 -0
mlx_vlm/models/llava/llava.py +132 -0
mlx_vlm/models/llava/vision.py +233 -0
mlx_vlm/models/llava_bunny/__init__.py +2 -0
mlx_vlm/models/llava_bunny/config.py +85 -0
mlx_vlm/models/llava_bunny/language.py +194 -0
mlx_vlm/models/llava_bunny/llava_bunny.py +217 -0
mlx_vlm/models/llava_bunny/vision.py +278 -0
mlx_vlm/models/llava_next/__init__.py +2 -0
mlx_vlm/models/llava_next/config.py +60 -0
mlx_vlm/models/llava_next/language.py +192 -0
mlx_vlm/models/llava_next/llava_next.py +138 -0
mlx_vlm/models/llava_next/vision.py +217 -0
mlx_vlm/models/mistral3/__init__.py +2 -0
mlx_vlm/models/mistral3/config.py +59 -0
mlx_vlm/models/mistral3/language.py +269 -0
mlx_vlm/models/mistral3/mistral3.py +383 -0
mlx_vlm/models/mllama/__init__.py +4 -0
mlx_vlm/models/mllama/config.py +74 -0
mlx_vlm/models/mllama/language.py +377 -0
mlx_vlm/models/mllama/mllama.py +210 -0
mlx_vlm/models/mllama/vision.py +458 -0
mlx_vlm/models/molmo/__init__.py +5 -0
mlx_vlm/models/molmo/config.py +93 -0
mlx_vlm/models/molmo/language.py +208 -0
mlx_vlm/models/molmo/molmo.py +108 -0
mlx_vlm/models/molmo/processing_molmo.py +763 -0
mlx_vlm/models/molmo/vision.py +408 -0
mlx_vlm/models/molmo2/__init__.py +6 -0
mlx_vlm/models/molmo2/config.py +137 -0
mlx_vlm/models/molmo2/language.py +206 -0
mlx_vlm/models/molmo2/molmo2.py +330 -0
mlx_vlm/models/molmo2/processing.py +773 -0
mlx_vlm/models/molmo2/vision.py +286 -0
mlx_vlm/models/moondream2/__init__.py +11 -0
mlx_vlm/models/moondream2/config.py +92 -0
mlx_vlm/models/moondream2/image_crops.py +269 -0
mlx_vlm/models/moondream2/language.py +267 -0
mlx_vlm/models/moondream2/moondream2.py +522 -0
mlx_vlm/models/moondream2/processing_moondream.py +144 -0
mlx_vlm/models/moondream2/vision.py +200 -0
mlx_vlm/models/multi_modality/__init__.py +4 -0
mlx_vlm/models/multi_modality/config.py +108 -0
mlx_vlm/models/multi_modality/language.py +191 -0
mlx_vlm/models/multi_modality/multi_modality.py +338 -0
mlx_vlm/models/multi_modality/sam.py +543 -0
mlx_vlm/models/multi_modality/vision.py +450 -0
mlx_vlm/models/paddleocr_vl/__init__.py +3 -0
mlx_vlm/models/paddleocr_vl/config.py +93 -0
mlx_vlm/models/paddleocr_vl/language.py +522 -0
mlx_vlm/models/paddleocr_vl/paddleocr_vl.py +207 -0
mlx_vlm/models/paddleocr_vl/processing_paddleocr_vl.py +425 -0
mlx_vlm/models/paddleocr_vl/vision.py +358 -0
mlx_vlm/models/paligemma/__init__.py +4 -0
mlx_vlm/models/paligemma/config.py +50 -0
mlx_vlm/models/paligemma/language.py +253 -0
mlx_vlm/models/paligemma/paligemma.py +140 -0
mlx_vlm/models/paligemma/vision.py +218 -0
mlx_vlm/models/phi3_v/__init__.py +5 -0
mlx_vlm/models/phi3_v/config.py +55 -0
mlx_vlm/models/phi3_v/language.py +2 -0
mlx_vlm/models/phi3_v/phi3_v.py +239 -0
mlx_vlm/models/phi3_v/processing_phi3_v.py +704 -0
mlx_vlm/models/phi3_v/vision.py +294 -0
mlx_vlm/models/pixtral/__init__.py +4 -0
mlx_vlm/models/pixtral/config.py +69 -0
mlx_vlm/models/pixtral/language.py +195 -0
mlx_vlm/models/pixtral/pixtral.py +208 -0
mlx_vlm/models/pixtral/vision.py +293 -0
mlx_vlm/models/qwen2_5_vl/__init__.py +2 -0
mlx_vlm/models/qwen2_5_vl/config.py +90 -0
mlx_vlm/models/qwen2_5_vl/language.py +541 -0
mlx_vlm/models/qwen2_5_vl/qwen2_5_vl.py +184 -0
mlx_vlm/models/qwen2_5_vl/vision.py +414 -0
mlx_vlm/models/qwen2_vl/__init__.py +2 -0
mlx_vlm/models/qwen2_vl/config.py +86 -0
mlx_vlm/models/qwen2_vl/language.py +539 -0
mlx_vlm/models/qwen2_vl/qwen2_vl.py +180 -0
mlx_vlm/models/qwen2_vl/vision.py +308 -0
mlx_vlm/models/qwen3_omni_moe/__init__.py +29 -0
mlx_vlm/models/qwen3_omni_moe/audio.py +317 -0
mlx_vlm/models/qwen3_omni_moe/code2wav.py +542 -0
mlx_vlm/models/qwen3_omni_moe/config.py +264 -0
mlx_vlm/models/qwen3_omni_moe/language.py +622 -0
mlx_vlm/models/qwen3_omni_moe/omni_utils.py +69 -0
mlx_vlm/models/qwen3_omni_moe/qwen3_omni_moe.py +706 -0
mlx_vlm/models/qwen3_omni_moe/talker.py +873 -0
mlx_vlm/models/qwen3_omni_moe/thinker.py +366 -0
mlx_vlm/models/qwen3_omni_moe/vision.py +419 -0
mlx_vlm/models/qwen3_vl/__init__.py +2 -0
mlx_vlm/models/qwen3_vl/config.py +103 -0
mlx_vlm/models/qwen3_vl/language.py +596 -0
mlx_vlm/models/qwen3_vl/qwen3_vl.py +166 -0
mlx_vlm/models/qwen3_vl/vision.py +441 -0
mlx_vlm/models/qwen3_vl_moe/__init__.py +2 -0
mlx_vlm/models/qwen3_vl_moe/config.py +108 -0
mlx_vlm/models/qwen3_vl_moe/language.py +656 -0
mlx_vlm/models/qwen3_vl_moe/qwen3_vl_moe.py +184 -0
mlx_vlm/models/qwen3_vl_moe/vision.py +442 -0
mlx_vlm/models/smolvlm/__init__.py +4 -0
mlx_vlm/models/smolvlm/config.py +59 -0
mlx_vlm/models/smolvlm/smolvlm.py +60 -0
mlx_vlm/prompt_utils.py +565 -0
mlx_vlm/sample_utils.py +39 -0
mlx_vlm/server.py +1107 -0
mlx_vlm/smolvlm_video_generate.py +109 -0
mlx_vlm/tokenizer_utils.py +371 -0
mlx_vlm/trainer/__init__.py +9 -0
mlx_vlm/trainer/lora.py +70 -0
mlx_vlm/trainer/trainer.py +299 -0
mlx_vlm/trainer/utils.py +160 -0
mlx_vlm/utils.py +1339 -0
mlx_vlm/version.py +1 -0
mlx_vlm/video_generate.py +611 -0

mlx_vlm/chat_ui.py ADDED Viewed

@@ -0,0 +1,508 @@
+import argparse
+import gc
+import json
+import threading
+import gradio as gr
+import mlx.core as mx
+from mlx_vlm import load
+from .generate import stream_generate
+from .prompt_utils import get_chat_template, get_message_json
+from .utils import load_config, load_image_processor
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Generate text from an image using a model."
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="qnguyen3/nanoLLaVA",
+        help="The path to the local model directory or Hugging Face repo.",
+    )
+    return parser.parse_args()
+# Global state for model
+class ModelState:
+    def __init__(self):
+        self.model = None
+        self.processor = None
+        self.config = None
+        self.image_processor = None
+        self.current_model_name = None
+    def load(self, model_name):
+        """Load a model, clearing previous one from memory."""
+        # Clear previous model from memory
+        if self.model is not None:
+            del self.model
+            del self.processor
+            del self.config
+            del self.image_processor
+            mx.metal.clear_cache()
+            gc.collect()
+        # Load new model
+        self.config = load_config(model_name)
+        self.model, self.processor = load(
+            model_name, processor_kwargs={"trust_remote_code": True}
+        )
+        self.image_processor = load_image_processor(model_name)
+        self.current_model_name = model_name
+state = ModelState()
+# Parse args and load initial model
+args = parse_arguments()
+state.load(args.model)
+# Use most of the viewport for conversation
+chatbot_height = "clamp(380px, calc(100vh - 450px), 820px)"
+# Global flag for stopping generation
+stop_generation = threading.Event()
+def get_cached_vlm_models():
+    """Scan HF cache for vision-capable models."""
+    try:
+        from huggingface_hub import scan_cache_dir
+        vlm_models = []
+        cache_info = scan_cache_dir()
+        for repo in cache_info.repos:
+            if repo.repo_type != "model":
+                continue
+            # Check for refs
+            refs = getattr(repo, "refs", {})
+            if not refs or "main" not in refs:
+                # Try revisions instead
+                revisions = getattr(repo, "revisions", None)
+                if revisions:
+                    for rev in revisions:
+                        snapshot_path = getattr(rev, "snapshot_path", None)
+                        if snapshot_path:
+                            config_path = snapshot_path / "config.json"
+                            if config_path.exists():
+                                try:
+                                    with open(config_path, "r") as f:
+                                        config = json.load(f)
+                                    if "vision_config" in config:
+                                        vlm_models.append(repo.repo_id)
+                                        break
+                                except Exception:
+                                    pass
+                continue
+            # Check config.json for vision_config
+            main_ref = refs["main"]
+            snapshot_path = getattr(main_ref, "snapshot_path", None)
+            if snapshot_path:
+                config_path = snapshot_path / "config.json"
+                if config_path.exists():
+                    try:
+                        with open(config_path, "r") as f:
+                            config = json.load(f)
+                        if "vision_config" in config:
+                            vlm_models.append(repo.repo_id)
+                    except Exception:
+                        pass
+        # Ensure current model is in the list
+        if state.current_model_name and state.current_model_name not in vlm_models:
+            vlm_models.insert(0, state.current_model_name)
+        return sorted(set(vlm_models))
+    except Exception as e:
+        print(f"Error scanning cache: {e}")
+        # Return at least the current model
+        return [state.current_model_name] if state.current_model_name else []
+def load_model_by_name(model_name, progress=gr.Progress()):
+    """Load a model and return status."""
+    if not model_name:
+        return "✓ Loaded", gr.update()
+    if model_name == state.current_model_name:
+        return "✓ Loaded", gr.update()
+    try:
+        progress(0.1, desc="Clearing memory...")
+        progress(0.3, desc="Loading...")
+        state.load(model_name)
+        progress(1.0, desc="Done!")
+        return "✓ Loaded", gr.update(value=[])
+    except Exception as e:
+        error_msg = str(e)
+        # Truncate error for display
+        short_err = error_msg[:60] + "..." if len(error_msg) > 60 else error_msg
+        return f"⚠ {short_err}", gr.update()
+def refresh_model_list():
+    """Refresh the list of cached models."""
+    models = get_cached_vlm_models()
+    return gr.update(choices=models, value=state.current_model_name)
+def extract_image_from_message(message):
+    """Extract image file path from various message formats."""
+    if isinstance(message, dict):
+        if "files" in message and message["files"]:
+            img = message["files"][-1]
+            if isinstance(img, dict) and "path" in img:
+                return img["path"]
+            elif isinstance(img, str):
+                return img
+        if "file" in message and message["file"]:
+            f = message["file"]
+            if isinstance(f, dict) and "path" in f:
+                return f["path"]
+            elif isinstance(f, str):
+                return f
+    elif isinstance(message, str):
+        return message if message else ""
+    return ""
+def extract_text_from_message(message):
+    """Extract text content from various message formats."""
+    if isinstance(message, str):
+        return message
+    if isinstance(message, dict):
+        if "text" in message:
+            return message["text"] or ""
+        if "content" in message:
+            content = message["content"]
+            if isinstance(content, str):
+                return content
+            elif isinstance(content, list):
+                text_parts = []
+                for c in content:
+                    if isinstance(c, str):
+                        text_parts.append(c)
+                    elif isinstance(c, dict) and c.get("type") == "text":
+                        text_parts.append(c.get("text", ""))
+                return " ".join(text_parts)
+    return ""
+def chat(
+    message,
+    history,
+    temperature,
+    max_tokens,
+    top_p,
+    repetition_penalty,
+    system_prompt,
+):
+    global stop_generation
+    stop_generation.clear()
+    image_file = extract_image_from_message(message)
+    num_images = 1 if image_file else 0
+    if state.config["model_type"] != "paligemma":
+        chat_history = []
+        if system_prompt and system_prompt.strip():
+            chat_history.append({"role": "system", "content": system_prompt.strip()})
+        for item in history:
+            if isinstance(item, dict):
+                role = item.get("role", "user")
+                content = item.get("content", "")
+                if isinstance(content, str):
+                    pass
+                elif isinstance(content, dict) and "text" in content:
+                    content = content["text"]
+                elif isinstance(content, list):
+                    text_parts = []
+                    for c in content:
+                        if isinstance(c, str):
+                            text_parts.append(c)
+                        elif isinstance(c, dict) and c.get("type") == "text":
+                            text_parts.append(c.get("text", ""))
+                    content = " ".join(text_parts) if text_parts else ""
+                else:
+                    content = ""
+                if role == "assistant" and isinstance(content, str) and content:
+                    content = content.split("\n\n---\n")[0]
+                if content:
+                    chat_history.append({"role": role, "content": content})
+            elif isinstance(item, (list, tuple)):
+                if isinstance(item[0], str):
+                    chat_history.append({"role": "user", "content": item[0]})
+                elif isinstance(item[0], dict) and "text" in item[0]:
+                    chat_history.append({"role": "user", "content": item[0]["text"]})
+                if item[1] is not None:
+                    content = (
+                        item[1].split("\n\n---\n")[0]
+                        if isinstance(item[1], str)
+                        else item[1]
+                    )
+                    chat_history.append({"role": "assistant", "content": content})
+        chat_history.append(
+            {"role": "user", "content": extract_text_from_message(message)}
+        )
+        messages = []
+        for i, m in enumerate(chat_history):
+            skip_token = True
+            if i == len(chat_history) - 1 and m["role"] == "user" and image_file:
+                skip_token = False
+            messages.append(
+                get_message_json(
+                    state.config["model_type"],
+                    m["content"],
+                    role=m["role"],
+                    skip_image_token=skip_token,
+                    num_images=num_images if not skip_token else 0,
+                )
+            )
+        messages = get_chat_template(
+            state.processor, messages, add_generation_prompt=True
+        )
+    else:
+        messages = extract_text_from_message(message)
+    response = ""
+    last_chunk = None
+    gen_kwargs = {
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+    }
+    if top_p < 1.0:
+        gen_kwargs["top_p"] = top_p
+    if repetition_penalty != 1.0:
+        gen_kwargs["repetition_penalty"] = repetition_penalty
+    for chunk in stream_generate(
+        state.model,
+        state.processor,
+        messages,
+        image=image_file,
+        **gen_kwargs,
+    ):
+        if stop_generation.is_set():
+            response += "\n\n*[Generation stopped]*"
+            yield response
+            return
+        response += chunk.text
+        last_chunk = chunk
+        yield response
+    if last_chunk is not None:
+        stats = (
+            f"\n\n---\n"
+            f"<sub>📊 Prompt: {last_chunk.prompt_tokens} tokens @ {last_chunk.prompt_tps:.1f} t/s | "
+            f"Generation: {last_chunk.generation_tokens} tokens @ {last_chunk.generation_tps:.1f} t/s | "
+            f"Peak memory: {last_chunk.peak_memory:.2f} GB</sub>"
+        )
+        yield response + stats
+def stop_generating():
+    """Set the stop flag to interrupt generation."""
+    stop_generation.set()
+    return gr.update(interactive=False)
+# Create custom theme with dark mode support
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="slate",
+).set(
+    body_background_fill="*neutral_50",
+    body_background_fill_dark="*neutral_950",
+    block_background_fill="*neutral_100",
+    block_background_fill_dark="*neutral_900",
+)
+# Get initial model list
+initial_models = get_cached_vlm_models()
+# JavaScript to toggle dark mode and set dark as default
+dark_mode_js = """
+() => {
+    // Always set dark mode on load unless user explicitly chose light
+    const savedTheme = localStorage.getItem('theme');
+    const isDark = savedTheme !== 'light';
+    document.body.classList.toggle('dark', isDark);
+    return isDark ? '☀️' : '🌙';
+}
+"""
+toggle_dark_js = """
+() => {
+    const isDark = document.body.classList.toggle('dark');
+    localStorage.setItem('theme', isDark ? 'dark' : 'light');
+    return isDark ? '☀️' : '🌙';
+}
+"""
+# JavaScript to persist and restore selected model
+save_model_js = """
+(model_name) => {
+    if (model_name) {
+        localStorage.setItem('mlx_vlm_model', model_name);
+    }
+    return model_name;
+}
+"""
+load_model_js = """
+(server_model) => {
+    const savedModel = localStorage.getItem('mlx_vlm_model');
+    // Return saved model if available, otherwise use server's current model
+    return savedModel || server_model;
+}
+"""
+with gr.Blocks(fill_height=True, title="MLX-VLM Chat") as demo:
+    gr.Markdown("## MLX-VLM Chat UI")
+    # Model selector row
+    with gr.Row():
+        with gr.Column(scale=5):
+            model_dropdown = gr.Dropdown(
+                label="Model",
+                choices=initial_models,
+                value=state.current_model_name,
+                show_label=True,
+                allow_custom_value=True,
+            )
+        with gr.Column(scale=0):
+            refresh_btn = gr.Button("🔄", size="sm", min_width=20, scale=0)
+            theme_btn = gr.Button("☀️", size="sm", min_width=20, scale=0)
+        with gr.Column(scale=5):
+            model_status = gr.Textbox(
+                value="✓ Loaded",
+                label="Status",
+                interactive=False,
+            )
+    # Main controls row
+    with gr.Row():
+        with gr.Column(scale=6):
+            with gr.Accordion("⚙️ Generation Settings", open=False):
+                with gr.Row():
+                    temperature = gr.Slider(
+                        minimum=0,
+                        maximum=2,
+                        step=0.05,
+                        value=0.1,
+                        label="Temperature",
+                        info="Higher = more creative, lower = more focused",
+                    )
+                    max_tokens = gr.Slider(
+                        minimum=128,
+                        maximum=4096,
+                        step=64,
+                        value=1024,
+                        label="Max Tokens",
+                        info="Maximum length of response",
+                    )
+                with gr.Row():
+                    top_p = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        step=0.05,
+                        value=1.0,
+                        label="Top-p (Nucleus Sampling)",
+                        info="1.0 = disabled, lower = more focused",
+                    )
+                    repetition_penalty = gr.Slider(
+                        minimum=1.0,
+                        maximum=2.0,
+                        step=0.05,
+                        value=1.0,
+                        label="Repetition Penalty",
+                        info="1.0 = disabled, higher = less repetition",
+                    )
+                with gr.Row():
+                    system_prompt = gr.Textbox(
+                        label="System Prompt (optional)",
+                        placeholder="You are a helpful assistant...",
+                        lines=2,
+                        max_lines=4,
+                    )
+        with gr.Column(scale=1, min_width=200):
+            stop_btn = gr.Button("⏹️ Stop", variant="stop", size="sm")
+    # Chatbot component
+    chatbot = gr.Chatbot(
+        height=chatbot_height,
+        scale=1,
+        buttons=["copy", "copy_all"],
+    )
+    # Chat interface
+    chat_interface = gr.ChatInterface(
+        fn=chat,
+        additional_inputs=[
+            temperature,
+            max_tokens,
+            top_p,
+            repetition_penalty,
+            system_prompt,
+        ],
+        multimodal=True,
+        fill_height=True,
+        chatbot=chatbot,
+        save_history=True,
+    )
+    # Connect model selector
+    model_dropdown.change(
+        fn=load_model_by_name,
+        inputs=[model_dropdown],
+        outputs=[model_status, chatbot],
+    ).then(
+        fn=None,
+        inputs=[model_dropdown],
+        js=save_model_js,
+    )
+    refresh_btn.click(
+        fn=refresh_model_list,
+        outputs=[model_dropdown],
+    )
+    # Connect theme toggle
+    theme_btn.click(fn=None, js=toggle_dark_js, outputs=[theme_btn])
+    # On page load: restore theme and model from localStorage
+    demo.load(fn=None, js=dark_mode_js, outputs=[theme_btn])
+    demo.load(
+        fn=lambda: state.current_model_name,
+        inputs=[],
+        outputs=[model_dropdown],
+        js=load_model_js,
+    )
+    # Connect control buttons
+    stop_btn.click(fn=stop_generating, outputs=[stop_btn])
+def main():
+    demo.launch(inbrowser=True, theme=theme)
+if __name__ == "__main__":
+    main()