loreguard-cli 0.16.0__tar.gz → 0.20.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/PKG-INFO +2 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/pyproject.toml +2 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/__main__.py +23 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/cli.py +11 -3
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/config.py +55 -8
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/http_server.py +28 -11
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llama_server.py +7 -4
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llm.py +9 -5
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/main.py +3 -3
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/models_registry.py +12 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/nli.py +79 -15
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/npc_chat.py +7 -5
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/steam.py +4 -3
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/auth_menu.py +3 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/token_input.py +1 -1
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/unified_palette.py +28 -1
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/auth.py +2 -1
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/main.py +6 -6
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/running.py +3 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/npc_chat.py +4 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tunnel.py +8 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/wizard.py +4 -2
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/uv.lock +8 -8
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.env.example +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.github/workflows/release.yml +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.gitignore +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/LICENSE +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/README.md +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/THIRD_PARTY_NOTICES.md +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/loreguard.spec +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/loreguard_entry.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/scripts/build.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/API.md +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/csharp/LoreguardSDK.cs +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/gdscript/LoreguardSDK.gd +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/javascript/loreguard-sdk.js +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/python/loreguard_sdk.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/__init__.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/chunk_detector.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/dialogue_act_classifier.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/hf_discovery.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/intent_classifier.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/model_families.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/runtime.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/term_ui.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/__init__.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/app.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/__init__.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/npc_chat.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/__init__.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/model_select.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/nli_setup.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/styles.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/__init__.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/banner.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/footer.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/hardware_info.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/server_monitor.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/status_panel.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/templates/llama31-no-tools.jinja +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_intent_classifier.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_nli_hhem.py +0 -0
- {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_websocket_timeout.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loreguard-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.3
|
|
4
4
|
Summary: Local inference client for Loreguard NPCs
|
|
5
5
|
Project-URL: Homepage, https://loreguard.com
|
|
6
6
|
Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
|
|
@@ -29,7 +29,7 @@ Requires-Dist: rich>=13.0.0
|
|
|
29
29
|
Requires-Dist: textual>=0.47.0
|
|
30
30
|
Requires-Dist: tf-keras>=2.16.0
|
|
31
31
|
Requires-Dist: torch>=2.0.0
|
|
32
|
-
Requires-Dist: transformers
|
|
32
|
+
Requires-Dist: transformers<5,>=4.36.0
|
|
33
33
|
Requires-Dist: uvicorn>=0.27.0
|
|
34
34
|
Requires-Dist: websockets>=12.0
|
|
35
35
|
Provides-Extra: build
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "loreguard-cli"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.20.3"
|
|
8
8
|
description = "Local inference client for Loreguard NPCs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -28,7 +28,7 @@ dependencies = [
|
|
|
28
28
|
"aiofiles>=24.1.0",
|
|
29
29
|
"rich>=13.0.0",
|
|
30
30
|
"textual>=0.47.0",
|
|
31
|
-
"transformers>=
|
|
31
|
+
"transformers>=4.36.0,<5",
|
|
32
32
|
"torch>=2.0.0",
|
|
33
33
|
"fastapi>=0.109.0",
|
|
34
34
|
"uvicorn>=0.27.0",
|
|
@@ -28,6 +28,29 @@ def main():
|
|
|
28
28
|
print(json.dumps(status, indent=2))
|
|
29
29
|
sys.exit(0 if status.get("running") else 1)
|
|
30
30
|
|
|
31
|
+
# Handle 'download-llama-server' command - for bundle tool delegation (ADR-0027)
|
|
32
|
+
if args and args[0] == "download-llama-server":
|
|
33
|
+
import asyncio
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from .llama_server import download_llama_server
|
|
36
|
+
|
|
37
|
+
output_dir = None
|
|
38
|
+
for i, a in enumerate(args):
|
|
39
|
+
if a == "--output-dir" and i + 1 < len(args):
|
|
40
|
+
output_dir = Path(args[i + 1])
|
|
41
|
+
|
|
42
|
+
if not output_dir:
|
|
43
|
+
print("Usage: loreguard download-llama-server --output-dir <path>", file=sys.stderr)
|
|
44
|
+
sys.exit(1)
|
|
45
|
+
|
|
46
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
def on_progress(msg, progress=None):
|
|
49
|
+
print(f" {msg}")
|
|
50
|
+
|
|
51
|
+
asyncio.run(download_llama_server(progress_callback=on_progress, target_dir=output_dir))
|
|
52
|
+
sys.exit(0)
|
|
53
|
+
|
|
31
54
|
# Filter out help flags - these should show CLI help
|
|
32
55
|
if any(a in ('-h', '--help') for a in args):
|
|
33
56
|
from .cli import main as cli_main
|
|
@@ -11,7 +11,8 @@ Environment variables (alternative to args):
|
|
|
11
11
|
LOREGUARD_MODEL Path to model file
|
|
12
12
|
LOREGUARD_MODEL_ID Model ID to download (if not using custom model)
|
|
13
13
|
LOREGUARD_PORT Local llama-server port (default: 8080)
|
|
14
|
-
LOREGUARD_BACKEND Backend URL (default: wss://
|
|
14
|
+
LOREGUARD_BACKEND Backend WebSocket URL (default: wss://console.loreguard.com/workers)
|
|
15
|
+
LOREGUARD_API API base URL (default: https://console.loreguard.com)
|
|
15
16
|
LOREGUARD_WORKER_ID Worker ID (default: hostname)
|
|
16
17
|
"""
|
|
17
18
|
|
|
@@ -26,6 +27,8 @@ from datetime import datetime
|
|
|
26
27
|
from pathlib import Path
|
|
27
28
|
from typing import Optional
|
|
28
29
|
|
|
30
|
+
from .config import DEFAULT_API_URL, DEFAULT_BACKEND_URL
|
|
31
|
+
|
|
29
32
|
# Setup logging
|
|
30
33
|
logging.basicConfig(
|
|
31
34
|
level=logging.INFO,
|
|
@@ -44,7 +47,7 @@ class LoreguardCLI:
|
|
|
44
47
|
model_path: Optional[Path] = None,
|
|
45
48
|
model_id: Optional[str] = None,
|
|
46
49
|
port: int = 8080,
|
|
47
|
-
backend_url: str =
|
|
50
|
+
backend_url: str = DEFAULT_BACKEND_URL,
|
|
48
51
|
worker_id: Optional[str] = None,
|
|
49
52
|
model_family: str = "llama3",
|
|
50
53
|
):
|
|
@@ -454,9 +457,14 @@ Available model IDs:
|
|
|
454
457
|
)
|
|
455
458
|
parser.add_argument(
|
|
456
459
|
"--backend",
|
|
457
|
-
default=os.getenv("LOREGUARD_BACKEND",
|
|
460
|
+
default=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
|
|
458
461
|
help="Backend WebSocket URL",
|
|
459
462
|
)
|
|
463
|
+
parser.add_argument(
|
|
464
|
+
"--api-url",
|
|
465
|
+
default=os.getenv("LOREGUARD_API", DEFAULT_API_URL),
|
|
466
|
+
help=f"API base URL (default: {DEFAULT_API_URL})",
|
|
467
|
+
)
|
|
460
468
|
parser.add_argument(
|
|
461
469
|
"-v", "--verbose",
|
|
462
470
|
action="store_true",
|
|
@@ -50,6 +50,7 @@ class LoreguardConfig:
|
|
|
50
50
|
context_size: int = 16384 # llama-server context window size (configurable per game)
|
|
51
51
|
max_speech_tokens: int = 50 # Max tokens for NPC speech output (Pass 4). Default: 50 (~40 words)
|
|
52
52
|
model_family: str = "auto" # Model family profile (auto, llama3, qwen3, gemma, chatml)
|
|
53
|
+
dialogue_act_enabled: bool = False # Dialogue act classifier for filler selection
|
|
53
54
|
|
|
54
55
|
def save(self) -> None:
|
|
55
56
|
"""Save configuration to disk."""
|
|
@@ -73,6 +74,7 @@ class LoreguardConfig:
|
|
|
73
74
|
context_size=data.get("context_size", 16384),
|
|
74
75
|
max_speech_tokens=data.get("max_speech_tokens", 50),
|
|
75
76
|
model_family=data.get("model_family", "auto"),
|
|
77
|
+
dialogue_act_enabled=data.get("dialogue_act_enabled", False),
|
|
76
78
|
)
|
|
77
79
|
except (json.JSONDecodeError, KeyError):
|
|
78
80
|
pass
|
|
@@ -121,6 +123,14 @@ class LoreguardConfig:
|
|
|
121
123
|
# Environment Variable Configuration
|
|
122
124
|
# =============================================================================
|
|
123
125
|
|
|
126
|
+
DEFAULT_API_URL = "https://console.loreguard.com"
|
|
127
|
+
DEFAULT_BACKEND_URL = "wss://console.loreguard.com/workers"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_api_url() -> str:
|
|
131
|
+
"""Get the Loreguard API base URL (configurable via LOREGUARD_API env var)."""
|
|
132
|
+
return os.getenv("LOREGUARD_API", DEFAULT_API_URL)
|
|
133
|
+
|
|
124
134
|
|
|
125
135
|
@lru_cache(maxsize=1)
|
|
126
136
|
def load_config() -> dict:
|
|
@@ -133,12 +143,13 @@ def load_config() -> dict:
|
|
|
133
143
|
return {
|
|
134
144
|
# Server settings
|
|
135
145
|
"LLM_ENDPOINT": os.getenv("LLM_ENDPOINT", "http://localhost:8080"),
|
|
136
|
-
"BACKEND_URL": os.getenv("LOREGUARD_BACKEND",
|
|
146
|
+
"BACKEND_URL": os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
|
|
147
|
+
"API_URL": os.getenv("LOREGUARD_API", DEFAULT_API_URL),
|
|
137
148
|
"HOST": os.getenv("HOST", "127.0.0.1"),
|
|
138
149
|
"PORT": os.getenv("PORT", "8081"),
|
|
139
150
|
|
|
140
151
|
# Worker authentication (required for backend connection)
|
|
141
|
-
# Get API token from loreguard.com
|
|
152
|
+
# Get API token from console.loreguard.com
|
|
142
153
|
"WORKER_ID": os.getenv("LOREGUARD_WORKER_ID", os.getenv("WORKER_ID", "")),
|
|
143
154
|
# LOREGUARD_TOKEN is preferred, WORKER_TOKEN kept for backwards compatibility
|
|
144
155
|
"LOREGUARD_TOKEN": os.getenv("LOREGUARD_TOKEN", os.getenv("WORKER_TOKEN", "")),
|
|
@@ -232,20 +243,21 @@ def get_models_dir() -> Optional[Path]:
|
|
|
232
243
|
|
|
233
244
|
|
|
234
245
|
def resolve_model_path(model_name: str, subdir: str = "") -> str:
|
|
235
|
-
"""Resolve a model path, preferring
|
|
246
|
+
"""Resolve a model path, preferring local models over HF downloads.
|
|
236
247
|
|
|
237
248
|
Resolution order:
|
|
238
249
|
1. LOREGUARD_MODELS_DIR/<subdir> (explicit override)
|
|
239
|
-
2.
|
|
240
|
-
3. Bundle models dir using HF name →
|
|
241
|
-
4.
|
|
250
|
+
2. Application Support models dir/<subdir> (standard install location)
|
|
251
|
+
3. Bundle models dir using manifest.txt (HF name → manifest key → local dir)
|
|
252
|
+
4. Bundle models dir using HF name → org--model convention (fallback)
|
|
253
|
+
5. Download from HuggingFace to Application Support models dir
|
|
242
254
|
|
|
243
255
|
Args:
|
|
244
256
|
model_name: HuggingFace model name (e.g., 'vectara/hallucination_evaluation_model')
|
|
245
257
|
subdir: Subdirectory within MODELS_DIR to check (e.g., 'hhem', 'deberta')
|
|
246
258
|
|
|
247
259
|
Returns:
|
|
248
|
-
Local path
|
|
260
|
+
Local path to the model directory.
|
|
249
261
|
"""
|
|
250
262
|
# 1. Explicit LOREGUARD_MODELS_DIR/<subdir>
|
|
251
263
|
explicit_dir = get_config_value("MODELS_DIR")
|
|
@@ -254,7 +266,14 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
|
|
|
254
266
|
if local_path.exists() and any(local_path.iterdir()):
|
|
255
267
|
return str(local_path)
|
|
256
268
|
|
|
257
|
-
# 2
|
|
269
|
+
# 2. Application Support models dir/<subdir>
|
|
270
|
+
app_models = get_data_dir() / "models"
|
|
271
|
+
if subdir:
|
|
272
|
+
local_path = app_models / subdir
|
|
273
|
+
if local_path.exists() and any(local_path.iterdir()):
|
|
274
|
+
return str(local_path)
|
|
275
|
+
|
|
276
|
+
# 3 & 4. Bundle directory resolution
|
|
258
277
|
bundle_dir = get_bundle_dir()
|
|
259
278
|
if bundle_dir:
|
|
260
279
|
bundle_models = bundle_dir / "models"
|
|
@@ -275,9 +294,37 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
|
|
|
275
294
|
if local_path.exists() and any(local_path.iterdir()):
|
|
276
295
|
return str(local_path)
|
|
277
296
|
|
|
297
|
+
# 5. Download from HuggingFace to Application Support models dir
|
|
298
|
+
if subdir:
|
|
299
|
+
return _download_hf_model(model_name, app_models / subdir)
|
|
300
|
+
|
|
278
301
|
return model_name
|
|
279
302
|
|
|
280
303
|
|
|
304
|
+
def _download_hf_model(model_name: str, target_dir: Path) -> str:
|
|
305
|
+
"""Download a HuggingFace model to the loreguard models directory.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Path to the downloaded model directory.
|
|
309
|
+
"""
|
|
310
|
+
import logging
|
|
311
|
+
logger = logging.getLogger(__name__)
|
|
312
|
+
try:
|
|
313
|
+
from huggingface_hub import snapshot_download
|
|
314
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
315
|
+
logger.info(f"Downloading {model_name} to {target_dir}")
|
|
316
|
+
snapshot_download(
|
|
317
|
+
model_name,
|
|
318
|
+
local_dir=str(target_dir),
|
|
319
|
+
local_dir_use_symlinks=False,
|
|
320
|
+
)
|
|
321
|
+
logger.info(f"Downloaded {model_name} to {target_dir}")
|
|
322
|
+
return str(target_dir)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Failed to download {model_name}: {e}")
|
|
325
|
+
return model_name
|
|
326
|
+
|
|
327
|
+
|
|
281
328
|
def get_config_value(key: str, default: Optional[str] = None) -> Optional[str]:
|
|
282
329
|
"""Get a single configuration value."""
|
|
283
330
|
config = load_config()
|
|
@@ -367,7 +367,7 @@ class EmbeddedHTTPServer:
|
|
|
367
367
|
|
|
368
368
|
# Derive HTTP base URL from WebSocket URL
|
|
369
369
|
# ws://localhost:8090/workers → http://localhost:8090
|
|
370
|
-
# wss://
|
|
370
|
+
# wss://console.loreguard.com/workers → https://console.loreguard.com
|
|
371
371
|
backend_ws = server.tunnel.backend_url
|
|
372
372
|
if backend_ws.startswith("wss://"):
|
|
373
373
|
base_url = "https://" + backend_ws[6:].split("/")[0]
|
|
@@ -535,15 +535,15 @@ class EmbeddedHTTPServer:
|
|
|
535
535
|
content={"error": "Missing 'model' field"},
|
|
536
536
|
)
|
|
537
537
|
|
|
538
|
-
# Security:
|
|
539
|
-
|
|
538
|
+
# Security: resolve and verify path stays inside models_dir
|
|
539
|
+
model_path = (server.models_dir / model_name).resolve()
|
|
540
|
+
if model_path.parent != server.models_dir.resolve():
|
|
540
541
|
return JSONResponse(
|
|
541
542
|
status_code=400,
|
|
542
543
|
content={"error": "Invalid model name"},
|
|
543
544
|
)
|
|
544
545
|
|
|
545
|
-
model_path
|
|
546
|
-
if not model_path.exists() or not model_path.suffix == ".gguf":
|
|
546
|
+
if not model_path.exists() or model_path.suffix != ".gguf":
|
|
547
547
|
return JSONResponse(
|
|
548
548
|
status_code=404,
|
|
549
549
|
content={"error": f"Model '{model_name}' not found"},
|
|
@@ -553,6 +553,9 @@ class EmbeddedHTTPServer:
|
|
|
553
553
|
if hasattr(server.llama_process, "model_path") and server.llama_process.model_path.name == model_name:
|
|
554
554
|
return {"status": "already_active", "model": model_name}
|
|
555
555
|
|
|
556
|
+
# Save original model_path for rollback on failure
|
|
557
|
+
original_model_path = server.llama_process.model_path
|
|
558
|
+
|
|
556
559
|
try:
|
|
557
560
|
# Stop current llama-server
|
|
558
561
|
server.llama_process.stop()
|
|
@@ -564,21 +567,35 @@ class EmbeddedHTTPServer:
|
|
|
564
567
|
# Wait for health check (llama-server takes a few seconds to load model)
|
|
565
568
|
import httpx
|
|
566
569
|
llama_url = f"http://127.0.0.1:{server.llama_process.port}/health"
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
570
|
+
async with httpx.AsyncClient(timeout=2.0) as client:
|
|
571
|
+
for attempt in range(60): # 60 attempts × 0.5s = 30s timeout
|
|
572
|
+
await asyncio.sleep(0.5)
|
|
573
|
+
try:
|
|
571
574
|
resp = await client.get(llama_url)
|
|
572
575
|
if resp.status_code == 200:
|
|
576
|
+
# Persist selection so it survives restarts
|
|
577
|
+
try:
|
|
578
|
+
from .config import LoreguardConfig
|
|
579
|
+
cfg = LoreguardConfig.load()
|
|
580
|
+
cfg.set_model_path(model_path)
|
|
581
|
+
cfg.save()
|
|
582
|
+
except Exception:
|
|
583
|
+
pass # Best-effort persistence
|
|
573
584
|
return {"status": "ok", "model": model_name}
|
|
574
|
-
|
|
575
|
-
|
|
585
|
+
except Exception:
|
|
586
|
+
continue
|
|
576
587
|
|
|
577
588
|
return JSONResponse(
|
|
578
589
|
status_code=500,
|
|
579
590
|
content={"error": "Model loaded but health check timed out after 30s"},
|
|
580
591
|
)
|
|
581
592
|
except Exception as e:
|
|
593
|
+
# Rollback: restore original model path and try to restart
|
|
594
|
+
server.llama_process.model_path = original_model_path
|
|
595
|
+
try:
|
|
596
|
+
server.llama_process.start()
|
|
597
|
+
except Exception:
|
|
598
|
+
pass # Best-effort rollback
|
|
582
599
|
return JSONResponse(
|
|
583
600
|
status_code=500,
|
|
584
601
|
content={"error": f"Failed to reload model: {e}"},
|
|
@@ -30,7 +30,7 @@ def _get_templates_dir() -> Path:
|
|
|
30
30
|
return Path(__file__).parent.parent / "templates"
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
LLAMA_VERSION = "
|
|
33
|
+
LLAMA_VERSION = "b8467" # Must match loreguard-engine bundle version
|
|
34
34
|
|
|
35
35
|
# Download URLs for each platform
|
|
36
36
|
BINARIES = {
|
|
@@ -265,18 +265,21 @@ def make_executable(path: Path) -> None:
|
|
|
265
265
|
|
|
266
266
|
async def download_llama_server(
|
|
267
267
|
progress_callback: Optional[Callable[[str, DownloadProgress | None], None]] = None,
|
|
268
|
+
target_dir: Optional[Path] = None,
|
|
268
269
|
) -> Path:
|
|
269
270
|
"""Download and install llama-server for the current platform.
|
|
270
271
|
|
|
271
272
|
Args:
|
|
272
273
|
progress_callback: Called with (status_message, progress_or_none)
|
|
274
|
+
target_dir: If provided, install into this directory instead of the default.
|
|
275
|
+
Used by the bundle tool to pre-ship llama-server.
|
|
273
276
|
|
|
274
277
|
Returns:
|
|
275
278
|
Path to the installed llama-server binary
|
|
276
279
|
"""
|
|
277
280
|
plat = get_platform()
|
|
278
281
|
config = BINARIES[plat]
|
|
279
|
-
bin_dir = get_bin_dir()
|
|
282
|
+
bin_dir = target_dir or get_bin_dir()
|
|
280
283
|
|
|
281
284
|
def notify(msg: str, progress: DownloadProgress | None = None):
|
|
282
285
|
if progress_callback:
|
|
@@ -355,12 +358,12 @@ async def download_llama_server(
|
|
|
355
358
|
make_executable(lib)
|
|
356
359
|
|
|
357
360
|
# Write version marker file for future version checks
|
|
358
|
-
version_file = get_version_file_path()
|
|
361
|
+
version_file = bin_dir / ".llama_version" if target_dir else get_version_file_path()
|
|
359
362
|
version_file.write_text(LLAMA_VERSION)
|
|
360
363
|
|
|
361
364
|
notify(f"llama-server {LLAMA_VERSION} installed successfully!")
|
|
362
365
|
|
|
363
|
-
return
|
|
366
|
+
return bin_dir / config["binary_name"]
|
|
364
367
|
|
|
365
368
|
|
|
366
369
|
class LlamaServerProcess:
|
|
@@ -61,7 +61,9 @@ class LLMRequest:
|
|
|
61
61
|
stop: list[str] = field(default_factory=lambda: DEFAULT_STOP_SEQUENCES.copy())
|
|
62
62
|
|
|
63
63
|
# Thinking mode control (for Qwen3)
|
|
64
|
-
|
|
64
|
+
# Defaults to True: thinking wastes tokens and breaks pipelines.
|
|
65
|
+
# Only enable explicitly when extended reasoning is desired.
|
|
66
|
+
disable_thinking: bool = True
|
|
65
67
|
|
|
66
68
|
# If true, error if content is empty instead of falling back to reasoning_content
|
|
67
69
|
require_content: bool = False
|
|
@@ -257,9 +259,10 @@ class LLMProxy:
|
|
|
257
259
|
payload["id_slot"] = 0
|
|
258
260
|
logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
|
|
259
261
|
|
|
260
|
-
# Disable thinking mode
|
|
262
|
+
# Disable thinking mode (for Qwen3/3.5).
|
|
263
|
+
# Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
|
|
261
264
|
if req.disable_thinking:
|
|
262
|
-
payload["enable_thinking"] = False
|
|
265
|
+
payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
|
|
263
266
|
|
|
264
267
|
# Note: JSON mode is not compatible with streaming in llama.cpp
|
|
265
268
|
# If force_json is requested, fall back to non-streaming
|
|
@@ -573,9 +576,10 @@ class LLMProxy:
|
|
|
573
576
|
payload["id_slot"] = 0
|
|
574
577
|
logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
|
|
575
578
|
|
|
576
|
-
# Disable thinking mode
|
|
579
|
+
# Disable thinking mode (for Qwen3/3.5).
|
|
580
|
+
# Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
|
|
577
581
|
if req.disable_thinking:
|
|
578
|
-
payload["enable_thinking"] = False
|
|
582
|
+
payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
|
|
579
583
|
|
|
580
584
|
# Force JSON output if requested
|
|
581
585
|
if req.force_json:
|
|
@@ -31,7 +31,7 @@ from rich.console import Console
|
|
|
31
31
|
|
|
32
32
|
from .tunnel import BackendTunnel
|
|
33
33
|
from .llm import LLMProxy
|
|
34
|
-
from .config import get_config_value, resolve_model_path
|
|
34
|
+
from .config import get_config_value, resolve_model_path, DEFAULT_BACKEND_URL
|
|
35
35
|
from .nli import NLIService, is_nli_model_available
|
|
36
36
|
from .intent_classifier import IntentClassifier, is_intent_model_available
|
|
37
37
|
from .dialogue_act_classifier import (
|
|
@@ -118,7 +118,7 @@ async def startup():
|
|
|
118
118
|
console.print("[yellow]Intent classifier disabled (set LOREGUARD_INTENT_ENABLED=true to enable)[/yellow]")
|
|
119
119
|
|
|
120
120
|
# Initialize dialogue act classifier (optional, for filler selection)
|
|
121
|
-
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "
|
|
121
|
+
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
|
|
122
122
|
if enable_dialogue_act:
|
|
123
123
|
console.print("[cyan]Initializing dialogue act classifier...[/cyan]")
|
|
124
124
|
if is_dialogue_act_model_available():
|
|
@@ -170,7 +170,7 @@ async def startup():
|
|
|
170
170
|
chunk_detector = None
|
|
171
171
|
|
|
172
172
|
# Connect to remote backend
|
|
173
|
-
backend_url = get_config_value("BACKEND_URL",
|
|
173
|
+
backend_url = get_config_value("BACKEND_URL", DEFAULT_BACKEND_URL)
|
|
174
174
|
worker_id = get_config_value("WORKER_ID", "")
|
|
175
175
|
worker_token = get_config_value("WORKER_TOKEN", "")
|
|
176
176
|
model_id = get_config_value("MODEL_ID", "default")
|
|
@@ -92,6 +92,18 @@ SUPPORTED_MODELS: list[ModelInfo] = [
|
|
|
92
92
|
hardware="32GB RAM • 20GB VRAM",
|
|
93
93
|
recommended=False,
|
|
94
94
|
),
|
|
95
|
+
ModelInfo(
|
|
96
|
+
id="qwen3.5-9b-q4km",
|
|
97
|
+
name="Qwen 3.5 9B Q4_K_M",
|
|
98
|
+
filename="Qwen3.5-9B-Q4_K_M.gguf",
|
|
99
|
+
size_gb=5.2,
|
|
100
|
+
size_bytes=5_627_044_256,
|
|
101
|
+
context_length=32768,
|
|
102
|
+
url="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf",
|
|
103
|
+
description="Strong general model. 32K context, good reasoning.",
|
|
104
|
+
hardware="12GB RAM • 8GB VRAM",
|
|
105
|
+
recommended=False,
|
|
106
|
+
),
|
|
95
107
|
]
|
|
96
108
|
|
|
97
109
|
|
|
@@ -328,6 +328,26 @@ class NLIService:
|
|
|
328
328
|
|
|
329
329
|
return results
|
|
330
330
|
|
|
331
|
+
def _resolve_model_dir(self) -> Optional[str]:
|
|
332
|
+
"""Resolve the actual directory containing model files.
|
|
333
|
+
|
|
334
|
+
For local paths, returns as-is. For HuggingFace repo IDs (e.g.
|
|
335
|
+
'vectara/hallucination_evaluation_model'), resolves the cache snapshot dir.
|
|
336
|
+
"""
|
|
337
|
+
if os.path.isdir(self._model_path):
|
|
338
|
+
return self._model_path
|
|
339
|
+
try:
|
|
340
|
+
from huggingface_hub import snapshot_download
|
|
341
|
+
return snapshot_download(self._model_path, local_files_only=True)
|
|
342
|
+
except Exception:
|
|
343
|
+
# Cache miss — trigger a download so the snapshot exists
|
|
344
|
+
try:
|
|
345
|
+
from huggingface_hub import snapshot_download
|
|
346
|
+
return snapshot_download(self._model_path)
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.warning(f"Could not resolve model dir for {self._model_path}: {e}")
|
|
349
|
+
return None
|
|
350
|
+
|
|
331
351
|
def _patch_hhem_model_files(self):
|
|
332
352
|
"""Patch vendored HHEM files for transformers 5.x compatibility.
|
|
333
353
|
|
|
@@ -336,8 +356,31 @@ class NLIService:
|
|
|
336
356
|
2. Is stricter about model_type matching between config.json and config class
|
|
337
357
|
Since trust_remote_code loads the .py files directly, we patch before loading.
|
|
338
358
|
"""
|
|
359
|
+
model_dir = self._resolve_model_dir()
|
|
360
|
+
if not model_dir:
|
|
361
|
+
logger.warning("Cannot resolve model directory for patching")
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
# Also patch the modules cache (transformers copies .py files there)
|
|
365
|
+
modules_dirs = [model_dir]
|
|
366
|
+
modules_cache = os.path.join(
|
|
367
|
+
os.path.expanduser("~"), ".cache", "huggingface", "modules",
|
|
368
|
+
"transformers_modules",
|
|
369
|
+
)
|
|
370
|
+
if os.path.isdir(modules_cache):
|
|
371
|
+
for root, dirs, files in os.walk(modules_cache):
|
|
372
|
+
if "modeling_hhem_v2.py" in files:
|
|
373
|
+
modules_dirs.append(root)
|
|
374
|
+
|
|
375
|
+
for patch_dir in modules_dirs:
|
|
376
|
+
self._patch_dir(patch_dir)
|
|
377
|
+
|
|
378
|
+
def _patch_dir(self, patch_dir: str):
|
|
379
|
+
"""Apply HHEM patches to a single directory."""
|
|
339
380
|
# Patch 1: modeling_hhem_v2.py — add missing class attributes
|
|
340
|
-
model_file = os.path.join(
|
|
381
|
+
model_file = os.path.join(patch_dir, "modeling_hhem_v2.py")
|
|
382
|
+
if not os.path.exists(model_file):
|
|
383
|
+
return
|
|
341
384
|
if os.path.exists(model_file):
|
|
342
385
|
try:
|
|
343
386
|
content = open(model_file, "r").read()
|
|
@@ -354,13 +397,18 @@ class NLIService:
|
|
|
354
397
|
if patched != content:
|
|
355
398
|
with open(model_file, "w") as f:
|
|
356
399
|
f.write(patched)
|
|
357
|
-
|
|
400
|
+
# Clear __pycache__ so patched file is reloaded
|
|
401
|
+
pycache = os.path.join(patch_dir, "__pycache__")
|
|
402
|
+
if os.path.isdir(pycache):
|
|
403
|
+
import shutil
|
|
404
|
+
shutil.rmtree(pycache)
|
|
405
|
+
logger.info(f"Patched {model_file} for transformers 5.x")
|
|
358
406
|
except Exception as e:
|
|
359
407
|
logger.warning(f"Could not patch modeling_hhem_v2.py: {e}")
|
|
360
408
|
|
|
361
409
|
# Patch 2: config.json — fix model_type mismatch
|
|
362
410
|
# config.json has "HHEMv2Config" but the config class defines model_type = "HHEMv2"
|
|
363
|
-
config_file = os.path.join(
|
|
411
|
+
config_file = os.path.join(patch_dir, "config.json")
|
|
364
412
|
if os.path.exists(config_file):
|
|
365
413
|
try:
|
|
366
414
|
content = open(config_file, "r").read()
|
|
@@ -375,24 +423,40 @@ class NLIService:
|
|
|
375
423
|
except Exception as e:
|
|
376
424
|
logger.warning(f"Could not patch config.json: {e}")
|
|
377
425
|
|
|
378
|
-
# Patch 3: configuration_hhem_v2.py
|
|
379
|
-
#
|
|
380
|
-
#
|
|
381
|
-
config_py = os.path.join(
|
|
382
|
-
local_foundation = os.path.join(
|
|
426
|
+
# Patch 3: configuration_hhem_v2.py, point the foundation at the bundled
|
|
427
|
+
# flan-t5-base. Resolve it RELATIVE to the config file at runtime (via
|
|
428
|
+
# __file__) so the path is portable and never a baked absolute machine path.
|
|
429
|
+
config_py = os.path.join(patch_dir, "configuration_hhem_v2.py")
|
|
430
|
+
local_foundation = os.path.join(patch_dir, "flan-t5-base")
|
|
383
431
|
if os.path.exists(config_py) and os.path.isdir(local_foundation):
|
|
384
432
|
try:
|
|
433
|
+
import re
|
|
434
|
+
|
|
385
435
|
content = open(config_py, "r").read()
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
436
|
+
resolver = (
|
|
437
|
+
'os.path.join(os.path.dirname(os.path.abspath(__file__)), '
|
|
438
|
+
'"flan-t5-base")'
|
|
439
|
+
)
|
|
440
|
+
patched = content
|
|
441
|
+
if "import os" not in patched:
|
|
442
|
+
patched = patched.replace(
|
|
443
|
+
"from transformers import PretrainedConfig",
|
|
444
|
+
"from transformers import PretrainedConfig\nimport os",
|
|
445
|
+
1,
|
|
392
446
|
)
|
|
447
|
+
# Replace the class-level foundation assignment, whatever it is now
|
|
448
|
+
# (the HuggingFace default OR a previously-baked absolute path), with
|
|
449
|
+
# the self-resolving expression. count=1 leaves any __init__ default.
|
|
450
|
+
patched = re.sub(
|
|
451
|
+
r'foundation\s*=\s*"[^"]*"',
|
|
452
|
+
"foundation = " + resolver,
|
|
453
|
+
patched,
|
|
454
|
+
count=1,
|
|
455
|
+
)
|
|
456
|
+
if patched != content:
|
|
393
457
|
with open(config_py, "w") as f:
|
|
394
458
|
f.write(patched)
|
|
395
|
-
logger.info(
|
|
459
|
+
logger.info("Patched foundation to self-resolving relative path")
|
|
396
460
|
except Exception as e:
|
|
397
461
|
logger.warning(f"Could not patch configuration_hhem_v2.py: {e}")
|
|
398
462
|
|
|
@@ -15,6 +15,7 @@ Rate Limits (when using Player JWT):
|
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
17
|
import logging
|
|
18
|
+
import os
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from typing import Optional
|
|
20
21
|
|
|
@@ -35,8 +36,9 @@ from .term_ui import (
|
|
|
35
36
|
# Configure module logger
|
|
36
37
|
logger = logging.getLogger(__name__)
|
|
37
38
|
|
|
38
|
-
# Loreguard API base URL
|
|
39
|
-
|
|
39
|
+
# Loreguard API base URL (configurable via LOREGUARD_API env var)
|
|
40
|
+
from .config import get_api_url
|
|
41
|
+
LOREGUARD_API_URL = get_api_url()
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
@dataclass
|
|
@@ -407,7 +409,7 @@ class NPCChat:
|
|
|
407
409
|
raise Exception("Invalid API token - please check your authentication")
|
|
408
410
|
elif e.response.status_code == 404:
|
|
409
411
|
logger.warning("No characters found for this account")
|
|
410
|
-
raise Exception("No characters found - register NPCs at loreguard.com first")
|
|
412
|
+
raise Exception("No characters found - register NPCs at console.loreguard.com first")
|
|
411
413
|
logger.error("HTTP error fetching characters: %d", e.response.status_code)
|
|
412
414
|
raise
|
|
413
415
|
except httpx.RequestError as e:
|
|
@@ -428,7 +430,7 @@ class NPCChat:
|
|
|
428
430
|
return None
|
|
429
431
|
|
|
430
432
|
if not characters:
|
|
431
|
-
print_error("No NPCs registered. Create NPCs at loreguard.com first.")
|
|
433
|
+
print_error("No NPCs registered. Create NPCs at console.loreguard.com first.")
|
|
432
434
|
return None
|
|
433
435
|
|
|
434
436
|
items = [
|
|
@@ -642,7 +644,7 @@ async def run_npc_chat(
|
|
|
642
644
|
Args:
|
|
643
645
|
api_token: Loreguard API token for authentication (for server-side use)
|
|
644
646
|
player_jwt: Player JWT from Steam exchange (for game clients)
|
|
645
|
-
base_url: Loreguard API base URL (default: https://
|
|
647
|
+
base_url: Loreguard API base URL (default: https://console.loreguard.com)
|
|
646
648
|
config: Optional client configuration for timeouts
|
|
647
649
|
verbose: If True, show pipeline pass updates via WebSocket
|
|
648
650
|
tunnel: BackendTunnel instance for receiving pass_update messages (required for verbose)
|
|
@@ -12,7 +12,7 @@ Usage:
|
|
|
12
12
|
read_timeout=15.0,
|
|
13
13
|
max_retries=3
|
|
14
14
|
)
|
|
15
|
-
steam_auth = SteamAuth(api_url="https://
|
|
15
|
+
steam_auth = SteamAuth(api_url="https://console.loreguard.com", config=config)
|
|
16
16
|
|
|
17
17
|
# Exchange Steam ticket for Player JWT
|
|
18
18
|
result = await steam_auth.exchange_ticket(
|
|
@@ -40,8 +40,9 @@ import httpx
|
|
|
40
40
|
# Configure module logger
|
|
41
41
|
logger = logging.getLogger(__name__)
|
|
42
42
|
|
|
43
|
-
# Default Loreguard API URL
|
|
44
|
-
|
|
43
|
+
# Default Loreguard API URL (configurable via LOREGUARD_API env var)
|
|
44
|
+
from .config import get_api_url
|
|
45
|
+
LOREGUARD_API_URL = get_api_url()
|
|
45
46
|
|
|
46
47
|
# Validation patterns
|
|
47
48
|
STEAM_APP_ID_PATTERN = re.compile(r"^\d{1,10}$")
|
|
@@ -12,6 +12,7 @@ from textual.widgets import Input, Static, ListView, ListItem, Label
|
|
|
12
12
|
from rich.text import Text
|
|
13
13
|
|
|
14
14
|
from ..styles import PURPLE, CYAN, PINK, FG, FG_DIM, GREEN, RED
|
|
15
|
+
from ...config import get_api_url
|
|
15
16
|
from ..widgets.banner import get_gradient_color
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
@@ -227,7 +228,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
|
|
|
227
228
|
|
|
228
229
|
# Update status
|
|
229
230
|
status = self.query_one("#status-line", Static)
|
|
230
|
-
status.update(Text("Get your token at loreguard.com
|
|
231
|
+
status.update(Text("Get your token at console.loreguard.com", style=FG_DIM))
|
|
231
232
|
|
|
232
233
|
def _switch_to_menu(self) -> None:
|
|
233
234
|
"""Switch back to menu mode."""
|
|
@@ -270,7 +271,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
|
|
|
270
271
|
try:
|
|
271
272
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
272
273
|
response = await client.get(
|
|
273
|
-
"
|
|
274
|
+
f"{get_api_url()}/api/auth/me",
|
|
274
275
|
headers={"Authorization": f"Bearer {token}"},
|
|
275
276
|
)
|
|
276
277
|
if response.status_code == 200:
|
|
@@ -59,7 +59,7 @@ class TokenInputModal(ModalScreen[str | None]):
|
|
|
59
59
|
"""Compose the modal layout."""
|
|
60
60
|
with Vertical():
|
|
61
61
|
yield Static("Enter API Token", classes="modal-title")
|
|
62
|
-
yield Static("Get your token at loreguard.com
|
|
62
|
+
yield Static("Get your token at console.loreguard.com", classes="modal-hint")
|
|
63
63
|
yield Input(placeholder="Paste your token here...", password=True, id="token-input")
|
|
64
64
|
yield Static("enter submit • esc cancel", classes="modal-footer")
|
|
65
65
|
|
|
@@ -284,7 +284,7 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
|
|
|
284
284
|
# Add models
|
|
285
285
|
if self._show_models:
|
|
286
286
|
from ...llama_server import get_models_dir
|
|
287
|
-
from ...models_registry import SUPPORTED_MODELS
|
|
287
|
+
from ...models_registry import SUPPORTED_MODELS, ModelInfo
|
|
288
288
|
from ...hf_discovery import discover_models
|
|
289
289
|
from ..widgets.hardware_info import detect_hardware
|
|
290
290
|
|
|
@@ -302,6 +302,33 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
|
|
|
302
302
|
except Exception:
|
|
303
303
|
all_models = list(SUPPORTED_MODELS)
|
|
304
304
|
|
|
305
|
+
# Ensure all static registry models are included
|
|
306
|
+
known_filenames = {m.filename for m in all_models}
|
|
307
|
+
for model in SUPPORTED_MODELS:
|
|
308
|
+
if model.filename not in known_filenames:
|
|
309
|
+
all_models.append(model)
|
|
310
|
+
known_filenames.add(model.filename)
|
|
311
|
+
|
|
312
|
+
# Scan local models dir for GGUF files not in registry/discovery
|
|
313
|
+
if self._models_dir and self._models_dir.exists():
|
|
314
|
+
for gguf_file in self._models_dir.glob("*.gguf"):
|
|
315
|
+
if gguf_file.name not in known_filenames:
|
|
316
|
+
size_bytes = gguf_file.stat().st_size
|
|
317
|
+
size_gb = size_bytes / (1024 ** 3)
|
|
318
|
+
stem = gguf_file.stem
|
|
319
|
+
all_models.append(ModelInfo(
|
|
320
|
+
id=f"local-{stem.lower()}",
|
|
321
|
+
name=stem.replace("-", " ").replace("_", " "),
|
|
322
|
+
filename=gguf_file.name,
|
|
323
|
+
size_gb=round(size_gb, 1),
|
|
324
|
+
size_bytes=size_bytes,
|
|
325
|
+
context_length=8192,
|
|
326
|
+
url="",
|
|
327
|
+
description="Local model",
|
|
328
|
+
hardware="",
|
|
329
|
+
))
|
|
330
|
+
known_filenames.add(gguf_file.name)
|
|
331
|
+
|
|
305
332
|
# Sort: most recent first, then by size descending
|
|
306
333
|
def model_sort_key(m):
|
|
307
334
|
# Primary: sort by recency (days_ago), None goes last
|
|
@@ -16,6 +16,7 @@ from ..widgets.banner import LoreguardBanner
|
|
|
16
16
|
from ..widgets.hardware_info import HardwareInfo
|
|
17
17
|
from ..widgets.footer import LoreguardFooter
|
|
18
18
|
from ..styles import CYAN, PINK, GREEN, RED, FG_DIM
|
|
19
|
+
from ...config import get_api_url
|
|
19
20
|
|
|
20
21
|
if TYPE_CHECKING:
|
|
21
22
|
from ..app import LoreguardApp
|
|
@@ -115,7 +116,7 @@ class AuthScreen(Screen):
|
|
|
115
116
|
try:
|
|
116
117
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
117
118
|
response = await client.get(
|
|
118
|
-
"
|
|
119
|
+
f"{get_api_url()}/api/auth/me",
|
|
119
120
|
headers={"Authorization": f"Bearer {token}"},
|
|
120
121
|
)
|
|
121
122
|
if response.status_code == 200:
|
|
@@ -16,7 +16,7 @@ from ..widgets.hardware_info import HardwareInfo
|
|
|
16
16
|
from ..widgets.server_monitor import ServerMonitor
|
|
17
17
|
from ..widgets.npc_chat import NPCChat
|
|
18
18
|
from ..widgets.footer import LoreguardFooter
|
|
19
|
-
from ...config import LoreguardConfig
|
|
19
|
+
from ...config import LoreguardConfig, get_api_url, DEFAULT_BACKEND_URL
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from ..app import LoreguardApp
|
|
@@ -450,7 +450,7 @@ class MainScreen(Screen):
|
|
|
450
450
|
self._log(f"Still loading... ({elapsed}s)")
|
|
451
451
|
|
|
452
452
|
# Check if process died
|
|
453
|
-
if app._llama_process.process and app._llama_process.process.poll() is not None:
|
|
453
|
+
if app._llama_process and app._llama_process.process and app._llama_process.process.poll() is not None:
|
|
454
454
|
self._log("llama-server process died", "error")
|
|
455
455
|
break
|
|
456
456
|
|
|
@@ -548,7 +548,7 @@ class MainScreen(Screen):
|
|
|
548
548
|
|
|
549
549
|
# Load dialogue act classifier (filler selection) - run in thread pool
|
|
550
550
|
dialogue_act_classifier = None
|
|
551
|
-
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "
|
|
551
|
+
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
|
|
552
552
|
if not enable_dialogue_act:
|
|
553
553
|
self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED")
|
|
554
554
|
else:
|
|
@@ -649,7 +649,7 @@ class MainScreen(Screen):
|
|
|
649
649
|
self._update_connection_status("connecting")
|
|
650
650
|
|
|
651
651
|
app._tunnel = BackendTunnel(
|
|
652
|
-
backend_url=
|
|
652
|
+
backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
|
|
653
653
|
llm_proxy=llm_proxy,
|
|
654
654
|
worker_id=app.worker_id,
|
|
655
655
|
worker_token=app.api_token,
|
|
@@ -851,7 +851,7 @@ class MainScreen(Screen):
|
|
|
851
851
|
try:
|
|
852
852
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
853
853
|
response = await client.get(
|
|
854
|
-
"
|
|
854
|
+
f"{get_api_url()}/api/characters",
|
|
855
855
|
headers={"Authorization": f"Bearer {app.api_token}"},
|
|
856
856
|
)
|
|
857
857
|
|
|
@@ -862,7 +862,7 @@ class MainScreen(Screen):
|
|
|
862
862
|
npcs = [c for c in characters if c.get("type") != "world"]
|
|
863
863
|
|
|
864
864
|
if not npcs:
|
|
865
|
-
self._update_status("No NPCs registered. Create NPCs at loreguard.com first.")
|
|
865
|
+
self._update_status("No NPCs registered. Create NPCs at console.loreguard.com first.")
|
|
866
866
|
return
|
|
867
867
|
|
|
868
868
|
# Create NPC items
|
|
@@ -17,6 +17,7 @@ from ..widgets.banner import LoreguardBanner
|
|
|
17
17
|
from ..widgets.hardware_info import HardwareInfo
|
|
18
18
|
from ..widgets.footer import LoreguardFooter
|
|
19
19
|
from ..styles import CYAN, GREEN, YELLOW, RED, FG_DIM
|
|
20
|
+
from ...config import DEFAULT_BACKEND_URL
|
|
20
21
|
|
|
21
22
|
if TYPE_CHECKING:
|
|
22
23
|
from ..app import LoreguardApp
|
|
@@ -256,7 +257,7 @@ class RunningScreen(Screen):
|
|
|
256
257
|
|
|
257
258
|
# Load Dialogue Act Classifier
|
|
258
259
|
dialogue_act_classifier = None
|
|
259
|
-
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "
|
|
260
|
+
enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
|
|
260
261
|
if not enable_dialogue_act:
|
|
261
262
|
self._update_status("dialogue_act", "Dialogue Act", "Disabled", "info")
|
|
262
263
|
self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED", "info")
|
|
@@ -322,7 +323,7 @@ class RunningScreen(Screen):
|
|
|
322
323
|
model_id = app.model_path.stem
|
|
323
324
|
|
|
324
325
|
self._tunnel = BackendTunnel(
|
|
325
|
-
backend_url=
|
|
326
|
+
backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
|
|
326
327
|
llm_proxy=llm_proxy,
|
|
327
328
|
worker_id=app.worker_id,
|
|
328
329
|
worker_token=app.api_token,
|
|
@@ -6,6 +6,7 @@ Uses the local proxy for NPC conversations with token streaming:
|
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
|
+
import os
|
|
9
10
|
from typing import TYPE_CHECKING
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
@@ -23,8 +24,9 @@ from ...runtime import RuntimeInfo
|
|
|
23
24
|
if TYPE_CHECKING:
|
|
24
25
|
from ..app import LoreguardApp
|
|
25
26
|
|
|
26
|
-
# Fallback to cloud API if local proxy unavailable
|
|
27
|
-
|
|
27
|
+
# Fallback to cloud API if local proxy unavailable (configurable via LOREGUARD_API env var)
|
|
28
|
+
from ...config import get_api_url
|
|
29
|
+
LOREGUARD_API_URL = get_api_url()
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def get_local_proxy_url() -> str | None:
|
|
@@ -81,7 +81,8 @@ class BackendTunnel:
|
|
|
81
81
|
self.registered = False
|
|
82
82
|
self.backend_version = "" # Populated from worker_ack
|
|
83
83
|
self._reconnect_delay = 1 # Start with 1 second
|
|
84
|
-
self._max_reconnect_delay =
|
|
84
|
+
self._max_reconnect_delay = 3 # Cap at 3s until first successful connection
|
|
85
|
+
self._has_connected = False # Tracks if we've ever connected successfully
|
|
85
86
|
self._running = True
|
|
86
87
|
self._shutdown_requested = False
|
|
87
88
|
self._heartbeat_task: asyncio.Task | None = None
|
|
@@ -122,7 +123,7 @@ class BackendTunnel:
|
|
|
122
123
|
"""Establish and maintain connection to backend with auto-reconnect."""
|
|
123
124
|
if not self.worker_id or not self.worker_token:
|
|
124
125
|
self._log("Error: Worker ID and API token are required", "error")
|
|
125
|
-
self._log("Get an API token from loreguard.com
|
|
126
|
+
self._log("Get an API token from console.loreguard.com", "warn")
|
|
126
127
|
return
|
|
127
128
|
|
|
128
129
|
last_error = ""
|
|
@@ -210,6 +211,11 @@ class BackendTunnel:
|
|
|
210
211
|
connection_start = time.time()
|
|
211
212
|
self._log("Connected to backend!", "success")
|
|
212
213
|
|
|
214
|
+
# After first successful connection, use longer backoff for reconnections
|
|
215
|
+
if not self._has_connected:
|
|
216
|
+
self._has_connected = True
|
|
217
|
+
self._max_reconnect_delay = 60
|
|
218
|
+
|
|
213
219
|
# Register as worker
|
|
214
220
|
success, error_reason = await self._register_worker()
|
|
215
221
|
if not success:
|
|
@@ -54,6 +54,8 @@ from rich.box import ROUNDED
|
|
|
54
54
|
from rich.align import Align
|
|
55
55
|
from rich.layout import Layout
|
|
56
56
|
|
|
57
|
+
from .config import get_api_url, DEFAULT_BACKEND_URL
|
|
58
|
+
|
|
57
59
|
# Logger instance
|
|
58
60
|
log = logging.getLogger("loreguard")
|
|
59
61
|
|
|
@@ -1044,7 +1046,7 @@ async def step_authentication(app: Optional[TUIApp] = None) -> tuple[Optional[st
|
|
|
1044
1046
|
try:
|
|
1045
1047
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
1046
1048
|
response = await client.get(
|
|
1047
|
-
"
|
|
1049
|
+
f"{get_api_url()}/api/auth/me",
|
|
1048
1050
|
headers={"Authorization": f"Bearer {token}"},
|
|
1049
1051
|
)
|
|
1050
1052
|
if response.status_code == 200:
|
|
@@ -1510,7 +1512,7 @@ async def step_start(
|
|
|
1510
1512
|
|
|
1511
1513
|
model_id = _resolve_backend_model_id(model_path.stem)
|
|
1512
1514
|
tunnel = BackendTunnel(
|
|
1513
|
-
backend_url=
|
|
1515
|
+
backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
|
|
1514
1516
|
llm_proxy=llm_proxy,
|
|
1515
1517
|
worker_id=worker_id,
|
|
1516
1518
|
worker_token=token,
|
|
@@ -476,7 +476,7 @@ wheels = [
|
|
|
476
476
|
|
|
477
477
|
[[package]]
|
|
478
478
|
name = "huggingface-hub"
|
|
479
|
-
version = "0.36.
|
|
479
|
+
version = "0.36.2"
|
|
480
480
|
source = { registry = "https://pypi.org/simple" }
|
|
481
481
|
dependencies = [
|
|
482
482
|
{ name = "filelock" },
|
|
@@ -488,9 +488,9 @@ dependencies = [
|
|
|
488
488
|
{ name = "tqdm" },
|
|
489
489
|
{ name = "typing-extensions" },
|
|
490
490
|
]
|
|
491
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
491
|
+
sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
|
|
492
492
|
wheels = [
|
|
493
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
493
|
+
{ url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
|
|
494
494
|
]
|
|
495
495
|
|
|
496
496
|
[[package]]
|
|
@@ -600,7 +600,7 @@ wheels = [
|
|
|
600
600
|
|
|
601
601
|
[[package]]
|
|
602
602
|
name = "loreguard-cli"
|
|
603
|
-
version = "0.
|
|
603
|
+
version = "0.16.0"
|
|
604
604
|
source = { editable = "." }
|
|
605
605
|
dependencies = [
|
|
606
606
|
{ name = "aiofiles" },
|
|
@@ -649,7 +649,7 @@ requires-dist = [
|
|
|
649
649
|
{ name = "textual", specifier = ">=0.47.0" },
|
|
650
650
|
{ name = "tf-keras", specifier = ">=2.16.0" },
|
|
651
651
|
{ name = "torch", specifier = ">=2.0.0" },
|
|
652
|
-
{ name = "transformers", specifier = ">=4.36.0" },
|
|
652
|
+
{ name = "transformers", specifier = ">=4.36.0,<5" },
|
|
653
653
|
{ name = "uvicorn", specifier = ">=0.27.0" },
|
|
654
654
|
{ name = "websockets", specifier = ">=12.0" },
|
|
655
655
|
]
|
|
@@ -2226,7 +2226,7 @@ wheels = [
|
|
|
2226
2226
|
|
|
2227
2227
|
[[package]]
|
|
2228
2228
|
name = "transformers"
|
|
2229
|
-
version = "4.57.
|
|
2229
|
+
version = "4.57.6"
|
|
2230
2230
|
source = { registry = "https://pypi.org/simple" }
|
|
2231
2231
|
dependencies = [
|
|
2232
2232
|
{ name = "filelock" },
|
|
@@ -2241,9 +2241,9 @@ dependencies = [
|
|
|
2241
2241
|
{ name = "tokenizers" },
|
|
2242
2242
|
{ name = "tqdm" },
|
|
2243
2243
|
]
|
|
2244
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
2244
|
+
sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
|
|
2245
2245
|
wheels = [
|
|
2246
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
2246
|
+
{ url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
|
|
2247
2247
|
]
|
|
2248
2248
|
|
|
2249
2249
|
[[package]]
|
{loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.claude/skills/llama-cpp-troubleshooting/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|