PyPI - loreguard-cli - Versions diffs - 0.14.0__tar.gz → 0.14.1__tar.gz - Mend

loreguard-cli 0.14.0tar.gz → 0.14.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.14.0
+Version: 0.14.1
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme

loreguard_cli-0.14.1/loreguard.spec ADDED Viewed

@@ -0,0 +1,42 @@
+# -*- mode: python ; coding: utf-8 -*-
+from PyInstaller.utils.hooks import collect_submodules
+hiddenimports = ['src', 'src.config', 'src.llm', 'src.llama_server', 'src.tunnel', 'src.term_ui', 'src.models_registry', 'src.cli', 'src.npc_chat', 'httpx', 'websockets', 'aiofiles', 'pydantic']
+hiddenimports += collect_submodules('src')
+a = Analysis(
+    ['loreguard_entry.py'],
+    pathex=['.'],
+    binaries=[],
+    datas=[('templates', 'templates')],
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+    optimize=0,
+)
+pyz = PYZ(a.pure)
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.datas,
+    [],
+    name='loreguard',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.14.0"
+version = "0.14.1"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"
@@ -62,3 +62,8 @@ packages = ["src"]
 [tool.ruff]
 line-length = 100
 target-version = "py310"
+[dependency-groups]
+dev = [
+    "pyinstaller>=6.17.0",
+]

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/scripts/build.py RENAMED Viewed

@@ -36,6 +36,8 @@ def main():
         "--clean",
         # Add src to path so imports work
         "--paths", ".",
+        # Include templates directory (jinja chat templates for llama-server)
+        "--add-data", "templates:templates",
         # Collect all src submodules
         "--collect-submodules", "src",
         # Add hidden imports that PyInstaller might miss

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/src/cli.py RENAMED Viewed

@@ -52,8 +52,11 @@ class LoreguardCLI:
         self.model_id = model_id
         self.port = port
         self.backend_url = backend_url
-        # Worker ID: use provided value, or default to hostname
-        self.worker_id = worker_id or socket.gethostname() or "worker"
+        # Worker ID: use provided value, or default to sanitized hostname.
+        # Validator requires ^[a-zA-Z0-9_-]{1,64}$ — replace dots with hyphens.
+        raw_id = worker_id or socket.gethostname() or "worker"
+        import re
+        self.worker_id = re.sub(r'[^a-zA-Z0-9_-]', '-', raw_id)[:64]
         self._llama = None
         self._tunnel = None
@@ -275,25 +278,8 @@ class LoreguardCLI:
             except Exception as e:
                 log.warning(f"Intent classifier error: {e}")
-            # Initialize dialogue act classifier
+            # Dialogue act classifier disabled
             dialogue_act_classifier = None
-            try:
-                from .dialogue_act_classifier import (
-                    DialogueActClassifier,
-                    is_dialogue_act_model_available,
-                )
-                if is_dialogue_act_model_available():
-                    log.info("Loading dialogue act classifier...")
-                    dialogue_act_classifier = DialogueActClassifier()
-                    if dialogue_act_classifier.load_model():
-                        log.info(f"Dialogue act classifier ready (device: {dialogue_act_classifier.device})")
-                    else:
-                        log.warning("Dialogue act classifier failed to load")
-                        dialogue_act_classifier = None
-                else:
-                    log.info("Dialogue act model not available, skipping")
-            except Exception as e:
-                log.warning(f"Dialogue act classifier error: {e}")
             # Initialize chunk detector (ADR-0023) - shares model with intent classifier
             chunk_detector = None
@@ -469,6 +455,11 @@ Available model IDs:
         action="store_true",
         help="Enable debug logging and show pipeline pass updates (in wizard mode)",
     )
+    parser.add_argument(
+        "--bundle-dir",
+        default=os.getenv("LOREGUARD_BUNDLE_DIR", ""),
+        help="Loreguard bundle directory. Auto-discovers models from manifest.txt.",
+    )
     parser.add_argument(
         "--dev",
         action="store_true",
@@ -485,6 +476,10 @@ Available model IDs:
     if args.verbose:
         logging.getLogger().setLevel(logging.DEBUG)
+    # Propagate --bundle-dir to env so config.py picks it up via get_bundle_dir()
+    if args.bundle_dir:
+        os.environ["LOREGUARD_BUNDLE_DIR"] = args.bundle_dir
     # Chat mode - test NPC chat directly via API (no model needed)
     if args.chat:
         if not args.token:
@@ -509,8 +504,17 @@ Available model IDs:
         args.token = "dev_mock_token"
         log.info("Running in DEV MODE - no backend connection")
     else:
-        # Validate token is present (server will validate format)
-        if not args.token:
+        # Local bundle backends (ws:// to localhost/127.0.0.1) run with RequireAuth=false,
+        # so any non-empty token is accepted. Only require a real token for cloud backends.
+        backend = args.backend
+        is_local_backend = (
+            backend.startswith("ws://") and
+            any(backend.startswith(f"ws://{h}") for h in ("localhost", "127.0.0.1", "[::1]"))
+        )
+        if not args.token and is_local_backend:
+            # Local bundle backends run with RequireAuth=false — any non-empty token works.
+            args.token = "local"
+        elif not args.token:
             log.error("Token required. Use --token or set LOREGUARD_TOKEN (or use --dev)")
             sys.exit(1)

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/src/config.py RENAMED Viewed

@@ -161,25 +161,83 @@ def load_config() -> dict:
         # Pre-shipped llama-server binary path (enterprise bundles).
         # When set, skips auto-download and uses this binary directly.
         "LLAMA_SERVER_PATH": os.getenv("LOREGUARD_LLAMA_SERVER_PATH", ""),
+        # Bundle directory (set by game launchers that ship a loreguard bundle).
+        # When set, the client auto-discovers models from manifest.txt inside the bundle.
+        # This is the single env var a game needs to set — no per-model configuration required.
+        "BUNDLE_DIR": os.getenv("LOREGUARD_BUNDLE_DIR", ""),
     }
+def get_bundle_dir() -> Optional[Path]:
+    """Get the loreguard bundle directory, if configured via LOREGUARD_BUNDLE_DIR.
+    Game launchers set this to the bundle root so the client can auto-discover
+    models from manifest.txt without any per-model configuration.
+    """
+    bundle_dir = get_config_value("BUNDLE_DIR")
+    if bundle_dir:
+        path = Path(bundle_dir)
+        if path.exists() and path.is_dir():
+            return path
+    return None
+def get_bundle_manifest() -> dict:
+    """Parse the bundle's manifest.txt into a logical-name → dir-name mapping.
+    Returns an empty dict if no bundle dir is configured or manifest is missing.
+    Manifest format:
+        nli=vectara--hallucination_evaluation_model
+        embedding=BAAI--bge-small-en-v1.5
+        reranker=cross-encoder--ms-marco-MiniLM-L-6-v2
+    """
+    bundle_dir = get_bundle_dir()
+    if not bundle_dir:
+        return {}
+    manifest_path = bundle_dir / "models" / "manifest.txt"
+    if not manifest_path.exists():
+        return {}
+    result = {}
+    for line in manifest_path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if "=" in line:
+            key, _, value = line.partition("=")
+            result[key.strip()] = value.strip()
+    return result
 def get_models_dir() -> Optional[Path]:
     """Get the pre-shipped models directory, if configured (ADR-0027).
-    Returns None if not set, meaning models should be auto-downloaded from HF.
+    Checks LOREGUARD_MODELS_DIR first, then falls back to the bundle's models dir.
+    Returns None if neither is set, meaning models should be auto-downloaded from HF.
     """
     models_dir = get_config_value("MODELS_DIR")
     if models_dir:
         path = Path(models_dir)
         if path.exists() and path.is_dir():
             return path
+    bundle_dir = get_bundle_dir()
+    if bundle_dir:
+        path = bundle_dir / "models"
+        if path.exists() and path.is_dir():
+            return path
     return None
 def resolve_model_path(model_name: str, subdir: str = "") -> str:
     """Resolve a model path, preferring pre-shipped models over HF downloads.
+    Resolution order:
+    1. LOREGUARD_MODELS_DIR/<subdir>  (explicit override)
+    2. Bundle models dir using manifest.txt  (HF name → manifest key → local dir)
+    3. Bundle models dir using HF name → org--model convention  (fallback)
+    4. Original HF model name  (download from HuggingFace)
     Args:
         model_name: HuggingFace model name (e.g., 'vectara/hallucination_evaluation_model')
         subdir: Subdirectory within MODELS_DIR to check (e.g., 'hhem', 'deberta')
@@ -187,11 +245,34 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
     Returns:
         Local path if pre-shipped model found, otherwise the original HF model name.
     """
-    models_dir = get_models_dir()
-    if models_dir and subdir:
-        local_path = models_dir / subdir
+    # 1. Explicit LOREGUARD_MODELS_DIR/<subdir>
+    explicit_dir = get_config_value("MODELS_DIR")
+    if explicit_dir and subdir:
+        local_path = Path(explicit_dir) / subdir
         if local_path.exists() and any(local_path.iterdir()):
             return str(local_path)
+    # 2 & 3. Bundle directory resolution
+    bundle_dir = get_bundle_dir()
+    if bundle_dir:
+        bundle_models = bundle_dir / "models"
+        # Try manifest.txt: find the dir name for this HF model name
+        manifest = get_bundle_manifest()
+        # The manifest uses org--model naming (/ replaced by --)
+        hf_as_dir = model_name.replace("/", "--")
+        for _key, dir_name in manifest.items():
+            if dir_name == hf_as_dir:
+                local_path = bundle_models / dir_name
+                if local_path.exists() and any(local_path.iterdir()):
+                    return str(local_path)
+                break
+        # Fallback: check bundle models dir using org--model convention directly
+        local_path = bundle_models / hf_as_dir
+        if local_path.exists() and any(local_path.iterdir()):
+            return str(local_path)
     return model_name

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/src/llama_server.py RENAMED Viewed

@@ -22,6 +22,14 @@ from typing import AsyncGenerator, Callable, Optional
 import httpx
+def _get_templates_dir() -> Path:
+    """Return the templates directory, handling PyInstaller onefile bundles."""
+    if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
+        return Path(sys._MEIPASS) / "templates"
+    return Path(__file__).parent.parent / "templates"
 LLAMA_VERSION = "b7789"  # Must match loreguard-engine bundle version
 # Download URLs for each platform
@@ -402,7 +410,7 @@ class LlamaServerProcess:
             # Llama 3.1's built-in template forces tool-calling format even without tools,
             # so we use a stripped-down template that only handles chat messages.
             "--jinja",
-            "--chat-template-file", str(Path(__file__).parent.parent / "templates" / "llama31-no-tools.jinja"),
+            "--chat-template-file", str(_get_templates_dir() / "llama31-no-tools.jinja"),
         ]
         # Add LoRA adapter if specified

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/src/llm.py RENAMED Viewed

@@ -797,8 +797,8 @@ class LLMProxy:
             safe_filename = self._validate_cache_filename(filename)
             response = await self.client.post(
-                f"{self.endpoint}/slots/{slot_id}?action=save&filename={safe_filename}",
-                json={},  # llama-server expects JSON body
+                f"{self.endpoint}/slots/{slot_id}?action=save",
+                json={"filename": safe_filename},
                 timeout=30.0,
             )
             if response.status_code == 200:
@@ -889,8 +889,8 @@ class LLMProxy:
             safe_filename = self._validate_cache_filename(filename)
             response = await self.client.post(
-                f"{self.endpoint}/slots/{slot_id}?action=restore&filename={safe_filename}",
-                json={},  # llama-server expects JSON body
+                f"{self.endpoint}/slots/{slot_id}?action=restore",
+                json={"filename": safe_filename},
                 timeout=30.0,
             )
             if response.status_code == 200:

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/src/models_registry.py RENAMED Viewed

@@ -52,58 +52,44 @@ class AdapterInfo:
 # Supported models for NPC inference
-# Fine-tuned Loreguard models with multi-pass pipeline training
-# Based on unsloth/Llama-3.1-8B-Instruct with Unsloth Dynamic (UD) quantization
-# UD = per-layer optimized quantization for better accuracy at same VRAM
+# Fine-tuned Loreguard NPC model based on Llama 3.1 8B Instruct
+# https://huggingface.co/beyond-logic-labs/loreguard-npc-llama3.1-8b-gguf
 # Ordered by recommendation (best first)
 SUPPORTED_MODELS: list[ModelInfo] = [
-    # GGUF models with Unsloth Dynamic quantization (cross-platform, uses llama-server)
     ModelInfo(
-        id="loreguard-vanilla-ud-q6k",
-        name="Loreguard Vanilla UD Q6_K",
-        filename="loreguard-vanilla-UD-Q6_K.gguf",
-        size_gb=6.6,
-        size_bytes=7_085_559_072,
+        id="loreguard-npc-q6k",
+        name="Loreguard NPC Llama 3.1 8B Q6_K",
+        filename="loreguard-npc-llama3.1-8b-Q6_K.gguf",
+        size_gb=6.1,
+        size_bytes=6_596_010_976,
         context_length=8192,
-        url=f"https://huggingface.co/{HF_ORG}/loreguard-vanilla-gguf/resolve/main/loreguard-vanilla-UD-Q6_K.gguf",
+        url=f"https://huggingface.co/{HF_ORG}/loreguard-npc-llama3.1-8b-gguf/resolve/main/loreguard-npc-llama3.1-8b-Q6_K.gguf",
         description="Recommended. Best quality/size balance.",
         hardware="12GB RAM • 8GB VRAM",
         recommended=True,
     ),
     ModelInfo(
-        id="loreguard-vanilla-ud-q5km",
-        name="Loreguard Vanilla UD Q5_K_M",
-        filename="loreguard-vanilla-UD-Q5_K_M.gguf",
-        size_gb=5.7,
-        size_bytes=6_145_035_552,
+        id="loreguard-npc-q8",
+        name="Loreguard NPC Llama 3.1 8B Q8_0",
+        filename="loreguard-npc-llama3.1-8b-Q8_0.gguf",
+        size_gb=7.9,
+        size_bytes=8_540_775_392,
         context_length=8192,
-        url=f"https://huggingface.co/{HF_ORG}/loreguard-vanilla-gguf/resolve/main/loreguard-vanilla-UD-Q5_K_M.gguf",
-        description="Good quality, smaller size.",
-        hardware="10GB RAM • 6-8GB VRAM",
-        recommended=False,
-    ),
-    ModelInfo(
-        id="loreguard-vanilla-ud-q4km",
-        name="Loreguard Vanilla UD Q4_K_M",
-        filename="loreguard-vanilla-UD-Q4_K_M.gguf",
-        size_gb=4.9,
-        size_bytes=5_282_912_544,
-        context_length=8192,
-        url=f"https://huggingface.co/{HF_ORG}/loreguard-vanilla-gguf/resolve/main/loreguard-vanilla-UD-Q4_K_M.gguf",
-        description="Best for 6GB VRAM. Smallest size.",
-        hardware="8GB RAM • 6GB VRAM",
+        url=f"https://huggingface.co/{HF_ORG}/loreguard-npc-llama3.1-8b-gguf/resolve/main/loreguard-npc-llama3.1-8b-Q8_0.gguf",
+        description="Maximum quality. Requires more VRAM.",
+        hardware="16GB RAM • 12GB VRAM",
         recommended=False,
     ),
     ModelInfo(
-        id="loreguard-vanilla-ud-q8",
-        name="Loreguard Vanilla UD Q8_0",
-        filename="loreguard-vanilla-UD-Q8_0.gguf",
-        size_gb=8.5,
-        size_bytes=9_177_550_624,
+        id="loreguard-npc-f16",
+        name="Loreguard NPC Llama 3.1 8B F16",
+        filename="loreguard-npc-llama3.1-8b-f16.gguf",
+        size_gb=14.9,
+        size_bytes=16_068_895_712,
         context_length=8192,
-        url=f"https://huggingface.co/{HF_ORG}/loreguard-vanilla-gguf/resolve/main/loreguard-vanilla-UD-Q8_0.gguf",
-        description="Maximum quality. Requires more VRAM.",
-        hardware="16GB RAM • 12GB VRAM",
+        url=f"https://huggingface.co/{HF_ORG}/loreguard-npc-llama3.1-8b-gguf/resolve/main/loreguard-npc-llama3.1-8b-f16.gguf",
+        description="Full precision. Research/fine-tuning use.",
+        hardware="32GB RAM • 20GB VRAM",
         recommended=False,
     ),
 ]

{loreguard_cli-0.14.0 → loreguard_cli-0.14.1}/uv.lock RENAMED Viewed

@@ -600,7 +600,7 @@ wheels = [
 [[package]]
 name = "loreguard-cli"
-version = "0.12.2"
+version = "0.14.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -628,6 +628,11 @@ dev = [
     { name = "ruff" },
 ]
+[package.dev-dependencies]
+dev = [
+    { name = "pyinstaller" },
+]
 [package.metadata]
 requires-dist = [
     { name = "aiofiles", specifier = ">=24.1.0" },
@@ -650,6 +655,9 @@ requires-dist = [
 ]
 provides-extras = ["dev", "build"]
+[package.metadata.requires-dev]
+dev = [{ name = "pyinstaller", specifier = ">=6.17.0" }]
 [[package]]
 name = "macholib"
 version = "1.16.4"