PyPI - loreguard-cli - Versions diffs - 0.16.0__tar.gz → 0.20.3__tar.gz - Mend

loreguard-cli 0.16.0tar.gz → 0.20.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.16.0
+Version: 0.20.3
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -29,7 +29,7 @@ Requires-Dist: rich>=13.0.0
 Requires-Dist: textual>=0.47.0
 Requires-Dist: tf-keras>=2.16.0
 Requires-Dist: torch>=2.0.0
-Requires-Dist: transformers>=5.0.0
+Requires-Dist: transformers<5,>=4.36.0
 Requires-Dist: uvicorn>=0.27.0
 Requires-Dist: websockets>=12.0
 Provides-Extra: build

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.16.0"
+version = "0.20.3"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"
@@ -28,7 +28,7 @@ dependencies = [
     "aiofiles>=24.1.0",
     "rich>=13.0.0",
     "textual>=0.47.0",
-    "transformers>=5.0.0",
+    "transformers>=4.36.0,<5",
     "torch>=2.0.0",
     "fastapi>=0.109.0",
     "uvicorn>=0.27.0",

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/__main__.py RENAMED Viewed

@@ -28,6 +28,29 @@ def main():
         print(json.dumps(status, indent=2))
         sys.exit(0 if status.get("running") else 1)
+    # Handle 'download-llama-server' command - for bundle tool delegation (ADR-0027)
+    if args and args[0] == "download-llama-server":
+        import asyncio
+        from pathlib import Path
+        from .llama_server import download_llama_server
+        output_dir = None
+        for i, a in enumerate(args):
+            if a == "--output-dir" and i + 1 < len(args):
+                output_dir = Path(args[i + 1])
+        if not output_dir:
+            print("Usage: loreguard download-llama-server --output-dir <path>", file=sys.stderr)
+            sys.exit(1)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        def on_progress(msg, progress=None):
+            print(f"       {msg}")
+        asyncio.run(download_llama_server(progress_callback=on_progress, target_dir=output_dir))
+        sys.exit(0)
     # Filter out help flags - these should show CLI help
     if any(a in ('-h', '--help') for a in args):
         from .cli import main as cli_main

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/cli.py RENAMED Viewed

@@ -11,7 +11,8 @@ Environment variables (alternative to args):
     LOREGUARD_MODEL     Path to model file
     LOREGUARD_MODEL_ID  Model ID to download (if not using custom model)
     LOREGUARD_PORT      Local llama-server port (default: 8080)
-    LOREGUARD_BACKEND   Backend URL (default: wss://api.loreguard.com/workers)
+    LOREGUARD_BACKEND   Backend WebSocket URL (default: wss://console.loreguard.com/workers)
+    LOREGUARD_API       API base URL (default: https://console.loreguard.com)
     LOREGUARD_WORKER_ID Worker ID (default: hostname)
 """
@@ -26,6 +27,8 @@ from datetime import datetime
 from pathlib import Path
 from typing import Optional
+from .config import DEFAULT_API_URL, DEFAULT_BACKEND_URL
 # Setup logging
 logging.basicConfig(
     level=logging.INFO,
@@ -44,7 +47,7 @@ class LoreguardCLI:
         model_path: Optional[Path] = None,
         model_id: Optional[str] = None,
         port: int = 8080,
-        backend_url: str = "wss://api.loreguard.com/workers",
+        backend_url: str = DEFAULT_BACKEND_URL,
         worker_id: Optional[str] = None,
         model_family: str = "llama3",
     ):
@@ -454,9 +457,14 @@ Available model IDs:
     )
     parser.add_argument(
         "--backend",
-        default=os.getenv("LOREGUARD_BACKEND", "wss://api.loreguard.com/workers"),
+        default=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
         help="Backend WebSocket URL",
     )
+    parser.add_argument(
+        "--api-url",
+        default=os.getenv("LOREGUARD_API", DEFAULT_API_URL),
+        help=f"API base URL (default: {DEFAULT_API_URL})",
+    )
     parser.add_argument(
         "-v", "--verbose",
         action="store_true",

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/config.py RENAMED Viewed

@@ -50,6 +50,7 @@ class LoreguardConfig:
     context_size: int = 16384  # llama-server context window size (configurable per game)
     max_speech_tokens: int = 50  # Max tokens for NPC speech output (Pass 4). Default: 50 (~40 words)
     model_family: str = "auto"  # Model family profile (auto, llama3, qwen3, gemma, chatml)
+    dialogue_act_enabled: bool = False  # Dialogue act classifier for filler selection
     def save(self) -> None:
         """Save configuration to disk."""
@@ -73,6 +74,7 @@ class LoreguardConfig:
                     context_size=data.get("context_size", 16384),
                     max_speech_tokens=data.get("max_speech_tokens", 50),
                     model_family=data.get("model_family", "auto"),
+                    dialogue_act_enabled=data.get("dialogue_act_enabled", False),
                 )
             except (json.JSONDecodeError, KeyError):
                 pass
@@ -121,6 +123,14 @@ class LoreguardConfig:
 # Environment Variable Configuration
 # =============================================================================
+DEFAULT_API_URL = "https://console.loreguard.com"
+DEFAULT_BACKEND_URL = "wss://console.loreguard.com/workers"
+def get_api_url() -> str:
+    """Get the Loreguard API base URL (configurable via LOREGUARD_API env var)."""
+    return os.getenv("LOREGUARD_API", DEFAULT_API_URL)
 @lru_cache(maxsize=1)
 def load_config() -> dict:
@@ -133,12 +143,13 @@ def load_config() -> dict:
     return {
         # Server settings
         "LLM_ENDPOINT": os.getenv("LLM_ENDPOINT", "http://localhost:8080"),
-        "BACKEND_URL": os.getenv("LOREGUARD_BACKEND", "wss://api.loreguard.com/workers"),
+        "BACKEND_URL": os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
+        "API_URL": os.getenv("LOREGUARD_API", DEFAULT_API_URL),
         "HOST": os.getenv("HOST", "127.0.0.1"),
         "PORT": os.getenv("PORT", "8081"),
         # Worker authentication (required for backend connection)
-        # Get API token from loreguard.com dashboard
+        # Get API token from console.loreguard.com
         "WORKER_ID": os.getenv("LOREGUARD_WORKER_ID", os.getenv("WORKER_ID", "")),
         # LOREGUARD_TOKEN is preferred, WORKER_TOKEN kept for backwards compatibility
         "LOREGUARD_TOKEN": os.getenv("LOREGUARD_TOKEN", os.getenv("WORKER_TOKEN", "")),
@@ -232,20 +243,21 @@ def get_models_dir() -> Optional[Path]:
 def resolve_model_path(model_name: str, subdir: str = "") -> str:
-    """Resolve a model path, preferring pre-shipped models over HF downloads.
+    """Resolve a model path, preferring local models over HF downloads.
     Resolution order:
     1. LOREGUARD_MODELS_DIR/<subdir>  (explicit override)
-    2. Bundle models dir using manifest.txt  (HF name → manifest key → local dir)
-    3. Bundle models dir using HF name → org--model convention  (fallback)
-    4. Original HF model name  (download from HuggingFace)
+    2. Application Support models dir/<subdir>  (standard install location)
+    3. Bundle models dir using manifest.txt  (HF name → manifest key → local dir)
+    4. Bundle models dir using HF name → org--model convention  (fallback)
+    5. Download from HuggingFace to Application Support models dir
     Args:
         model_name: HuggingFace model name (e.g., 'vectara/hallucination_evaluation_model')
         subdir: Subdirectory within MODELS_DIR to check (e.g., 'hhem', 'deberta')
     Returns:
-        Local path if pre-shipped model found, otherwise the original HF model name.
+        Local path to the model directory.
     """
     # 1. Explicit LOREGUARD_MODELS_DIR/<subdir>
     explicit_dir = get_config_value("MODELS_DIR")
@@ -254,7 +266,14 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
         if local_path.exists() and any(local_path.iterdir()):
             return str(local_path)
-    # 2 & 3. Bundle directory resolution
+    # 2. Application Support models dir/<subdir>
+    app_models = get_data_dir() / "models"
+    if subdir:
+        local_path = app_models / subdir
+        if local_path.exists() and any(local_path.iterdir()):
+            return str(local_path)
+    # 3 & 4. Bundle directory resolution
     bundle_dir = get_bundle_dir()
     if bundle_dir:
         bundle_models = bundle_dir / "models"
@@ -275,9 +294,37 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
         if local_path.exists() and any(local_path.iterdir()):
             return str(local_path)
+    # 5. Download from HuggingFace to Application Support models dir
+    if subdir:
+        return _download_hf_model(model_name, app_models / subdir)
     return model_name
+def _download_hf_model(model_name: str, target_dir: Path) -> str:
+    """Download a HuggingFace model to the loreguard models directory.
+    Returns:
+        Path to the downloaded model directory.
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    try:
+        from huggingface_hub import snapshot_download
+        target_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Downloading {model_name} to {target_dir}")
+        snapshot_download(
+            model_name,
+            local_dir=str(target_dir),
+            local_dir_use_symlinks=False,
+        )
+        logger.info(f"Downloaded {model_name} to {target_dir}")
+        return str(target_dir)
+    except Exception as e:
+        logger.warning(f"Failed to download {model_name}: {e}")
+        return model_name
 def get_config_value(key: str, default: Optional[str] = None) -> Optional[str]:
     """Get a single configuration value."""
     config = load_config()

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/http_server.py RENAMED Viewed

@@ -367,7 +367,7 @@ class EmbeddedHTTPServer:
             # Derive HTTP base URL from WebSocket URL
             # ws://localhost:8090/workers → http://localhost:8090
-            # wss://api.loreguard.com/workers → https://api.loreguard.com
+            # wss://console.loreguard.com/workers → https://console.loreguard.com
             backend_ws = server.tunnel.backend_url
             if backend_ws.startswith("wss://"):
                 base_url = "https://" + backend_ws[6:].split("/")[0]
@@ -535,15 +535,15 @@ class EmbeddedHTTPServer:
                     content={"error": "Missing 'model' field"},
                 )
-            # Security: prevent path traversal
-            if "/" in model_name or "\\" in model_name or ".." in model_name:
+            # Security: resolve and verify path stays inside models_dir
+            model_path = (server.models_dir / model_name).resolve()
+            if model_path.parent != server.models_dir.resolve():
                 return JSONResponse(
                     status_code=400,
                     content={"error": "Invalid model name"},
                 )
-            model_path = server.models_dir / model_name
-            if not model_path.exists() or not model_path.suffix == ".gguf":
+            if not model_path.exists() or model_path.suffix != ".gguf":
                 return JSONResponse(
                     status_code=404,
                     content={"error": f"Model '{model_name}' not found"},
@@ -553,6 +553,9 @@ class EmbeddedHTTPServer:
             if hasattr(server.llama_process, "model_path") and server.llama_process.model_path.name == model_name:
                 return {"status": "already_active", "model": model_name}
+            # Save original model_path for rollback on failure
+            original_model_path = server.llama_process.model_path
             try:
                 # Stop current llama-server
                 server.llama_process.stop()
@@ -564,21 +567,35 @@ class EmbeddedHTTPServer:
                 # Wait for health check (llama-server takes a few seconds to load model)
                 import httpx
                 llama_url = f"http://127.0.0.1:{server.llama_process.port}/health"
-                for attempt in range(60):  # 60 attempts × 0.5s = 30s timeout
-                    await asyncio.sleep(0.5)
-                    try:
-                        async with httpx.AsyncClient(timeout=2.0) as client:
+                async with httpx.AsyncClient(timeout=2.0) as client:
+                    for attempt in range(60):  # 60 attempts × 0.5s = 30s timeout
+                        await asyncio.sleep(0.5)
+                        try:
                             resp = await client.get(llama_url)
                             if resp.status_code == 200:
+                                # Persist selection so it survives restarts
+                                try:
+                                    from .config import LoreguardConfig
+                                    cfg = LoreguardConfig.load()
+                                    cfg.set_model_path(model_path)
+                                    cfg.save()
+                                except Exception:
+                                    pass  # Best-effort persistence
                                 return {"status": "ok", "model": model_name}
-                    except Exception:
-                        continue
+                        except Exception:
+                            continue
                 return JSONResponse(
                     status_code=500,
                     content={"error": "Model loaded but health check timed out after 30s"},
                 )
             except Exception as e:
+                # Rollback: restore original model path and try to restart
+                server.llama_process.model_path = original_model_path
+                try:
+                    server.llama_process.start()
+                except Exception:
+                    pass  # Best-effort rollback
                 return JSONResponse(
                     status_code=500,
                     content={"error": f"Failed to reload model: {e}"},

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llama_server.py RENAMED Viewed

@@ -30,7 +30,7 @@ def _get_templates_dir() -> Path:
     return Path(__file__).parent.parent / "templates"
-LLAMA_VERSION = "b7789"  # Must match loreguard-engine bundle version
+LLAMA_VERSION = "b8467"  # Must match loreguard-engine bundle version
 # Download URLs for each platform
 BINARIES = {
@@ -265,18 +265,21 @@ def make_executable(path: Path) -> None:
 async def download_llama_server(
     progress_callback: Optional[Callable[[str, DownloadProgress | None], None]] = None,
+    target_dir: Optional[Path] = None,
 ) -> Path:
     """Download and install llama-server for the current platform.
     Args:
         progress_callback: Called with (status_message, progress_or_none)
+        target_dir: If provided, install into this directory instead of the default.
+                    Used by the bundle tool to pre-ship llama-server.
     Returns:
         Path to the installed llama-server binary
     """
     plat = get_platform()
     config = BINARIES[plat]
-    bin_dir = get_bin_dir()
+    bin_dir = target_dir or get_bin_dir()
     def notify(msg: str, progress: DownloadProgress | None = None):
         if progress_callback:
@@ -355,12 +358,12 @@ async def download_llama_server(
                 make_executable(lib)
         # Write version marker file for future version checks
-        version_file = get_version_file_path()
+        version_file = bin_dir / ".llama_version" if target_dir else get_version_file_path()
         version_file.write_text(LLAMA_VERSION)
         notify(f"llama-server {LLAMA_VERSION} installed successfully!")
-    return get_llama_server_path()
+    return bin_dir / config["binary_name"]
 class LlamaServerProcess:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llm.py RENAMED Viewed

@@ -61,7 +61,9 @@ class LLMRequest:
     stop: list[str] = field(default_factory=lambda: DEFAULT_STOP_SEQUENCES.copy())
     # Thinking mode control (for Qwen3)
-    disable_thinking: bool = False
+    # Defaults to True: thinking wastes tokens and breaks pipelines.
+    # Only enable explicitly when extended reasoning is desired.
+    disable_thinking: bool = True
     # If true, error if content is empty instead of falling back to reasoning_content
     require_content: bool = False
@@ -257,9 +259,10 @@ class LLMProxy:
             payload["id_slot"] = 0
             logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
-        # Disable thinking mode if requested (for Qwen3)
+        # Disable thinking mode (for Qwen3/3.5).
+        # Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
         if req.disable_thinking:
-            payload["enable_thinking"] = False
+            payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
         # Note: JSON mode is not compatible with streaming in llama.cpp
         # If force_json is requested, fall back to non-streaming
@@ -573,9 +576,10 @@ class LLMProxy:
             payload["id_slot"] = 0
             logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
-        # Disable thinking mode if requested (for Qwen3)
+        # Disable thinking mode (for Qwen3/3.5).
+        # Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
         if req.disable_thinking:
-            payload["enable_thinking"] = False
+            payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
         # Force JSON output if requested
         if req.force_json:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/main.py RENAMED Viewed

@@ -31,7 +31,7 @@ from rich.console import Console
 from .tunnel import BackendTunnel
 from .llm import LLMProxy
-from .config import get_config_value, resolve_model_path
+from .config import get_config_value, resolve_model_path, DEFAULT_BACKEND_URL
 from .nli import NLIService, is_nli_model_available
 from .intent_classifier import IntentClassifier, is_intent_model_available
 from .dialogue_act_classifier import (
@@ -118,7 +118,7 @@ async def startup():
         console.print("[yellow]Intent classifier disabled (set LOREGUARD_INTENT_ENABLED=true to enable)[/yellow]")
     # Initialize dialogue act classifier (optional, for filler selection)
-    enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
+    enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
     if enable_dialogue_act:
         console.print("[cyan]Initializing dialogue act classifier...[/cyan]")
         if is_dialogue_act_model_available():
@@ -170,7 +170,7 @@ async def startup():
             chunk_detector = None
     # Connect to remote backend
-    backend_url = get_config_value("BACKEND_URL", "wss://api.lorekeeper.ai/workers")
+    backend_url = get_config_value("BACKEND_URL", DEFAULT_BACKEND_URL)
     worker_id = get_config_value("WORKER_ID", "")
     worker_token = get_config_value("WORKER_TOKEN", "")
     model_id = get_config_value("MODEL_ID", "default")

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/models_registry.py RENAMED Viewed

@@ -92,6 +92,18 @@ SUPPORTED_MODELS: list[ModelInfo] = [
         hardware="32GB RAM • 20GB VRAM",
         recommended=False,
     ),
+    ModelInfo(
+        id="qwen3.5-9b-q4km",
+        name="Qwen 3.5 9B Q4_K_M",
+        filename="Qwen3.5-9B-Q4_K_M.gguf",
+        size_gb=5.2,
+        size_bytes=5_627_044_256,
+        context_length=32768,
+        url="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf",
+        description="Strong general model. 32K context, good reasoning.",
+        hardware="12GB RAM • 8GB VRAM",
+        recommended=False,
+    ),
 ]

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/nli.py RENAMED Viewed

@@ -328,6 +328,26 @@ class NLIService:
             return results
+    def _resolve_model_dir(self) -> Optional[str]:
+        """Resolve the actual directory containing model files.
+        For local paths, returns as-is. For HuggingFace repo IDs (e.g.
+        'vectara/hallucination_evaluation_model'), resolves the cache snapshot dir.
+        """
+        if os.path.isdir(self._model_path):
+            return self._model_path
+        try:
+            from huggingface_hub import snapshot_download
+            return snapshot_download(self._model_path, local_files_only=True)
+        except Exception:
+            # Cache miss — trigger a download so the snapshot exists
+            try:
+                from huggingface_hub import snapshot_download
+                return snapshot_download(self._model_path)
+            except Exception as e:
+                logger.warning(f"Could not resolve model dir for {self._model_path}: {e}")
+                return None
     def _patch_hhem_model_files(self):
         """Patch vendored HHEM files for transformers 5.x compatibility.
@@ -336,8 +356,31 @@ class NLIService:
         2. Is stricter about model_type matching between config.json and config class
         Since trust_remote_code loads the .py files directly, we patch before loading.
         """
+        model_dir = self._resolve_model_dir()
+        if not model_dir:
+            logger.warning("Cannot resolve model directory for patching")
+            return
+        # Also patch the modules cache (transformers copies .py files there)
+        modules_dirs = [model_dir]
+        modules_cache = os.path.join(
+            os.path.expanduser("~"), ".cache", "huggingface", "modules",
+            "transformers_modules",
+        )
+        if os.path.isdir(modules_cache):
+            for root, dirs, files in os.walk(modules_cache):
+                if "modeling_hhem_v2.py" in files:
+                    modules_dirs.append(root)
+        for patch_dir in modules_dirs:
+            self._patch_dir(patch_dir)
+    def _patch_dir(self, patch_dir: str):
+        """Apply HHEM patches to a single directory."""
         # Patch 1: modeling_hhem_v2.py — add missing class attributes
-        model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
+        model_file = os.path.join(patch_dir, "modeling_hhem_v2.py")
+        if not os.path.exists(model_file):
+            return
         if os.path.exists(model_file):
             try:
                 content = open(model_file, "r").read()
@@ -354,13 +397,18 @@ class NLIService:
                     if patched != content:
                         with open(model_file, "w") as f:
                             f.write(patched)
-                        logger.info("Patched modeling_hhem_v2.py for transformers 5.x")
+                        # Clear __pycache__ so patched file is reloaded
+                        pycache = os.path.join(patch_dir, "__pycache__")
+                        if os.path.isdir(pycache):
+                            import shutil
+                            shutil.rmtree(pycache)
+                        logger.info(f"Patched {model_file} for transformers 5.x")
             except Exception as e:
                 logger.warning(f"Could not patch modeling_hhem_v2.py: {e}")
         # Patch 2: config.json — fix model_type mismatch
         # config.json has "HHEMv2Config" but the config class defines model_type = "HHEMv2"
-        config_file = os.path.join(self._model_path, "config.json")
+        config_file = os.path.join(patch_dir, "config.json")
         if os.path.exists(config_file):
             try:
                 content = open(config_file, "r").read()
@@ -375,24 +423,40 @@ class NLIService:
             except Exception as e:
                 logger.warning(f"Could not patch config.json: {e}")
-        # Patch 3: configuration_hhem_v2.py — use local flan-t5-base instead of HuggingFace
-        # The HHEM model downloads google/flan-t5-base config+tokenizer at init.
-        # If we've bundled those files locally, rewrite the foundation path.
-        config_py = os.path.join(self._model_path, "configuration_hhem_v2.py")
-        local_foundation = os.path.join(self._model_path, "flan-t5-base")
+        # Patch 3: configuration_hhem_v2.py, point the foundation at the bundled
+        # flan-t5-base. Resolve it RELATIVE to the config file at runtime (via
+        # __file__) so the path is portable and never a baked absolute machine path.
+        config_py = os.path.join(patch_dir, "configuration_hhem_v2.py")
+        local_foundation = os.path.join(patch_dir, "flan-t5-base")
         if os.path.exists(config_py) and os.path.isdir(local_foundation):
             try:
+                import re
                 content = open(config_py, "r").read()
-                if '"google/flan-t5-base"' in content:
-                    # Use absolute path to the bundled flan-t5-base files
-                    abs_path = os.path.abspath(local_foundation)
-                    patched = content.replace(
-                        '"google/flan-t5-base"',
-                        f'"{abs_path}"',
+                resolver = (
+                    'os.path.join(os.path.dirname(os.path.abspath(__file__)), '
+                    '"flan-t5-base")'
+                )
+                patched = content
+                if "import os" not in patched:
+                    patched = patched.replace(
+                        "from transformers import PretrainedConfig",
+                        "from transformers import PretrainedConfig\nimport os",
+                        1,
                     )
+                # Replace the class-level foundation assignment, whatever it is now
+                # (the HuggingFace default OR a previously-baked absolute path), with
+                # the self-resolving expression. count=1 leaves any __init__ default.
+                patched = re.sub(
+                    r'foundation\s*=\s*"[^"]*"',
+                    "foundation = " + resolver,
+                    patched,
+                    count=1,
+                )
+                if patched != content:
                     with open(config_py, "w") as f:
                         f.write(patched)
-                    logger.info(f"Patched foundation to local: {abs_path}")
+                    logger.info("Patched foundation to self-resolving relative path")
             except Exception as e:
                 logger.warning(f"Could not patch configuration_hhem_v2.py: {e}")

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/npc_chat.py RENAMED Viewed

@@ -15,6 +15,7 @@ Rate Limits (when using Player JWT):
 import asyncio
 import logging
+import os
 from dataclasses import dataclass
 from typing import Optional
@@ -35,8 +36,9 @@ from .term_ui import (
 # Configure module logger
 logger = logging.getLogger(__name__)
-# Loreguard API base URL
-LOREGUARD_API_URL = "https://api.loreguard.com"
+# Loreguard API base URL (configurable via LOREGUARD_API env var)
+from .config import get_api_url
+LOREGUARD_API_URL = get_api_url()
 @dataclass
@@ -407,7 +409,7 @@ class NPCChat:
                 raise Exception("Invalid API token - please check your authentication")
             elif e.response.status_code == 404:
                 logger.warning("No characters found for this account")
-                raise Exception("No characters found - register NPCs at loreguard.com first")
+                raise Exception("No characters found - register NPCs at console.loreguard.com first")
             logger.error("HTTP error fetching characters: %d", e.response.status_code)
             raise
         except httpx.RequestError as e:
@@ -428,7 +430,7 @@ class NPCChat:
             return None
         if not characters:
-            print_error("No NPCs registered. Create NPCs at loreguard.com first.")
+            print_error("No NPCs registered. Create NPCs at console.loreguard.com first.")
             return None
         items = [
@@ -642,7 +644,7 @@ async def run_npc_chat(
     Args:
         api_token: Loreguard API token for authentication (for server-side use)
         player_jwt: Player JWT from Steam exchange (for game clients)
-        base_url: Loreguard API base URL (default: https://api.loreguard.com)
+        base_url: Loreguard API base URL (default: https://console.loreguard.com)
         config: Optional client configuration for timeouts
         verbose: If True, show pipeline pass updates via WebSocket
         tunnel: BackendTunnel instance for receiving pass_update messages (required for verbose)

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/steam.py RENAMED Viewed

@@ -12,7 +12,7 @@ Usage:
         read_timeout=15.0,
         max_retries=3
     )
-    steam_auth = SteamAuth(api_url="https://api.loreguard.com", config=config)
+    steam_auth = SteamAuth(api_url="https://console.loreguard.com", config=config)
     # Exchange Steam ticket for Player JWT
     result = await steam_auth.exchange_ticket(
@@ -40,8 +40,9 @@ import httpx
 # Configure module logger
 logger = logging.getLogger(__name__)
-# Default Loreguard API URL
-LOREGUARD_API_URL = "https://api.loreguard.com"
+# Default Loreguard API URL (configurable via LOREGUARD_API env var)
+from .config import get_api_url
+LOREGUARD_API_URL = get_api_url()
 # Validation patterns
 STEAM_APP_ID_PATTERN = re.compile(r"^\d{1,10}$")

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/auth_menu.py RENAMED Viewed

@@ -12,6 +12,7 @@ from textual.widgets import Input, Static, ListView, ListItem, Label
 from rich.text import Text
 from ..styles import PURPLE, CYAN, PINK, FG, FG_DIM, GREEN, RED
+from ...config import get_api_url
 from ..widgets.banner import get_gradient_color
 if TYPE_CHECKING:
@@ -227,7 +228,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
         # Update status
         status = self.query_one("#status-line", Static)
-        status.update(Text("Get your token at loreguard.com/dashboard", style=FG_DIM))
+        status.update(Text("Get your token at console.loreguard.com", style=FG_DIM))
     def _switch_to_menu(self) -> None:
         """Switch back to menu mode."""
@@ -270,7 +271,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
         try:
             async with httpx.AsyncClient(timeout=10.0) as client:
                 response = await client.get(
-                    "https://api.loreguard.com/api/auth/me",
+                    f"{get_api_url()}/api/auth/me",
                     headers={"Authorization": f"Bearer {token}"},
                 )
                 if response.status_code == 200:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/token_input.py RENAMED Viewed

@@ -59,7 +59,7 @@ class TokenInputModal(ModalScreen[str | None]):
         """Compose the modal layout."""
         with Vertical():
             yield Static("Enter API Token", classes="modal-title")
-            yield Static("Get your token at loreguard.com/dashboard", classes="modal-hint")
+            yield Static("Get your token at console.loreguard.com", classes="modal-hint")
             yield Input(placeholder="Paste your token here...", password=True, id="token-input")
             yield Static("enter submit • esc cancel", classes="modal-footer")

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/unified_palette.py RENAMED Viewed

@@ -284,7 +284,7 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
         # Add models
         if self._show_models:
             from ...llama_server import get_models_dir
-            from ...models_registry import SUPPORTED_MODELS
+            from ...models_registry import SUPPORTED_MODELS, ModelInfo
             from ...hf_discovery import discover_models
             from ..widgets.hardware_info import detect_hardware
@@ -302,6 +302,33 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
             except Exception:
                 all_models = list(SUPPORTED_MODELS)
+            # Ensure all static registry models are included
+            known_filenames = {m.filename for m in all_models}
+            for model in SUPPORTED_MODELS:
+                if model.filename not in known_filenames:
+                    all_models.append(model)
+                    known_filenames.add(model.filename)
+            # Scan local models dir for GGUF files not in registry/discovery
+            if self._models_dir and self._models_dir.exists():
+                for gguf_file in self._models_dir.glob("*.gguf"):
+                    if gguf_file.name not in known_filenames:
+                        size_bytes = gguf_file.stat().st_size
+                        size_gb = size_bytes / (1024 ** 3)
+                        stem = gguf_file.stem
+                        all_models.append(ModelInfo(
+                            id=f"local-{stem.lower()}",
+                            name=stem.replace("-", " ").replace("_", " "),
+                            filename=gguf_file.name,
+                            size_gb=round(size_gb, 1),
+                            size_bytes=size_bytes,
+                            context_length=8192,
+                            url="",
+                            description="Local model",
+                            hardware="",
+                        ))
+                        known_filenames.add(gguf_file.name)
             # Sort: most recent first, then by size descending
             def model_sort_key(m):
                 # Primary: sort by recency (days_ago), None goes last

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/auth.py RENAMED Viewed

@@ -16,6 +16,7 @@ from ..widgets.banner import LoreguardBanner
 from ..widgets.hardware_info import HardwareInfo
 from ..widgets.footer import LoreguardFooter
 from ..styles import CYAN, PINK, GREEN, RED, FG_DIM
+from ...config import get_api_url
 if TYPE_CHECKING:
     from ..app import LoreguardApp
@@ -115,7 +116,7 @@ class AuthScreen(Screen):
         try:
             async with httpx.AsyncClient(timeout=10.0) as client:
                 response = await client.get(
-                    "https://api.loreguard.com/api/auth/me",
+                    f"{get_api_url()}/api/auth/me",
                     headers={"Authorization": f"Bearer {token}"},
                 )
                 if response.status_code == 200:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/main.py RENAMED Viewed

@@ -16,7 +16,7 @@ from ..widgets.hardware_info import HardwareInfo
 from ..widgets.server_monitor import ServerMonitor
 from ..widgets.npc_chat import NPCChat
 from ..widgets.footer import LoreguardFooter
-from ...config import LoreguardConfig
+from ...config import LoreguardConfig, get_api_url, DEFAULT_BACKEND_URL
 if TYPE_CHECKING:
     from ..app import LoreguardApp
@@ -450,7 +450,7 @@ class MainScreen(Screen):
                     self._log(f"Still loading... ({elapsed}s)")
                 # Check if process died
-                if app._llama_process.process and app._llama_process.process.poll() is not None:
+                if app._llama_process and app._llama_process.process and app._llama_process.process.poll() is not None:
                     self._log("llama-server process died", "error")
                     break
@@ -548,7 +548,7 @@ class MainScreen(Screen):
             # Load dialogue act classifier (filler selection) - run in thread pool
             dialogue_act_classifier = None
-            enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
+            enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
             if not enable_dialogue_act:
                 self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED")
             else:
@@ -649,7 +649,7 @@ class MainScreen(Screen):
                     self._update_connection_status("connecting")
             app._tunnel = BackendTunnel(
-                backend_url="wss://api.loreguard.com/workers",
+                backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
                 llm_proxy=llm_proxy,
                 worker_id=app.worker_id,
                 worker_token=app.api_token,
@@ -851,7 +851,7 @@ class MainScreen(Screen):
         try:
             async with httpx.AsyncClient(timeout=10.0) as client:
                 response = await client.get(
-                    "https://api.loreguard.com/api/characters",
+                    f"{get_api_url()}/api/characters",
                     headers={"Authorization": f"Bearer {app.api_token}"},
                 )
@@ -862,7 +862,7 @@ class MainScreen(Screen):
                     npcs = [c for c in characters if c.get("type") != "world"]
                     if not npcs:
-                        self._update_status("No NPCs registered. Create NPCs at loreguard.com first.")
+                        self._update_status("No NPCs registered. Create NPCs at console.loreguard.com first.")
                         return
                     # Create NPC items

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/running.py RENAMED Viewed

@@ -17,6 +17,7 @@ from ..widgets.banner import LoreguardBanner
 from ..widgets.hardware_info import HardwareInfo
 from ..widgets.footer import LoreguardFooter
 from ..styles import CYAN, GREEN, YELLOW, RED, FG_DIM
+from ...config import DEFAULT_BACKEND_URL
 if TYPE_CHECKING:
     from ..app import LoreguardApp
@@ -256,7 +257,7 @@ class RunningScreen(Screen):
                 # Load Dialogue Act Classifier
                 dialogue_act_classifier = None
-                enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
+                enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
                 if not enable_dialogue_act:
                     self._update_status("dialogue_act", "Dialogue Act", "Disabled", "info")
                     self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED", "info")
@@ -322,7 +323,7 @@ class RunningScreen(Screen):
                 model_id = app.model_path.stem
                 self._tunnel = BackendTunnel(
-                    backend_url="wss://api.loreguard.com/workers",
+                    backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
                     llm_proxy=llm_proxy,
                     worker_id=app.worker_id,
                     worker_token=app.api_token,

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/npc_chat.py RENAMED Viewed

@@ -6,6 +6,7 @@ Uses the local proxy for NPC conversations with token streaming:
 import json
 import logging
+import os
 from typing import TYPE_CHECKING
 import httpx
@@ -23,8 +24,9 @@ from ...runtime import RuntimeInfo
 if TYPE_CHECKING:
     from ..app import LoreguardApp
-# Fallback to cloud API if local proxy unavailable
-LOREGUARD_API_URL = "https://api.loreguard.com"
+# Fallback to cloud API if local proxy unavailable (configurable via LOREGUARD_API env var)
+from ...config import get_api_url
+LOREGUARD_API_URL = get_api_url()
 def get_local_proxy_url() -> str | None:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tunnel.py RENAMED Viewed

@@ -81,7 +81,8 @@ class BackendTunnel:
         self.registered = False
         self.backend_version = ""  # Populated from worker_ack
         self._reconnect_delay = 1  # Start with 1 second
-        self._max_reconnect_delay = 60  # Max 60 seconds
+        self._max_reconnect_delay = 3  # Cap at 3s until first successful connection
+        self._has_connected = False  # Tracks if we've ever connected successfully
         self._running = True
         self._shutdown_requested = False
         self._heartbeat_task: asyncio.Task | None = None
@@ -122,7 +123,7 @@ class BackendTunnel:
         """Establish and maintain connection to backend with auto-reconnect."""
         if not self.worker_id or not self.worker_token:
             self._log("Error: Worker ID and API token are required", "error")
-            self._log("Get an API token from loreguard.com dashboard", "warn")
+            self._log("Get an API token from console.loreguard.com", "warn")
             return
         last_error = ""
@@ -210,6 +211,11 @@ class BackendTunnel:
         connection_start = time.time()
         self._log("Connected to backend!", "success")
+        # After first successful connection, use longer backoff for reconnections
+        if not self._has_connected:
+            self._has_connected = True
+            self._max_reconnect_delay = 60
         # Register as worker
         success, error_reason = await self._register_worker()
         if not success:

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/wizard.py RENAMED Viewed

@@ -54,6 +54,8 @@ from rich.box import ROUNDED
 from rich.align import Align
 from rich.layout import Layout
+from .config import get_api_url, DEFAULT_BACKEND_URL
 # Logger instance
 log = logging.getLogger("loreguard")
@@ -1044,7 +1046,7 @@ async def step_authentication(app: Optional[TUIApp] = None) -> tuple[Optional[st
     try:
         async with httpx.AsyncClient(timeout=10.0) as client:
             response = await client.get(
-                "https://api.loreguard.com/api/auth/me",
+                f"{get_api_url()}/api/auth/me",
                 headers={"Authorization": f"Bearer {token}"},
             )
             if response.status_code == 200:
@@ -1510,7 +1512,7 @@ async def step_start(
             model_id = _resolve_backend_model_id(model_path.stem)
             tunnel = BackendTunnel(
-                backend_url="wss://api.loreguard.com/workers",
+                backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
                 llm_proxy=llm_proxy,
                 worker_id=worker_id,
                 worker_token=token,

{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/uv.lock RENAMED Viewed

@@ -476,7 +476,7 @@ wheels = [
 [[package]]
 name = "huggingface-hub"
-version = "0.36.0"
+version = "0.36.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -488,9 +488,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload-time = "2025-10-23T12:12:01.413Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
 ]
 [[package]]
@@ -600,7 +600,7 @@ wheels = [
 [[package]]
 name = "loreguard-cli"
-version = "0.14.5"
+version = "0.16.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -649,7 +649,7 @@ requires-dist = [
     { name = "textual", specifier = ">=0.47.0" },
     { name = "tf-keras", specifier = ">=2.16.0" },
     { name = "torch", specifier = ">=2.0.0" },
-    { name = "transformers", specifier = ">=4.36.0" },
+    { name = "transformers", specifier = ">=4.36.0,<5" },
     { name = "uvicorn", specifier = ">=0.27.0" },
     { name = "websockets", specifier = ">=12.0" },
 ]
@@ -2226,7 +2226,7 @@ wheels = [
 [[package]]
 name = "transformers"
-version = "4.57.3"
+version = "4.57.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2241,9 +2241,9 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" },
+    { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
 ]
 [[package]]