loreguard-cli 0.16.0__tar.gz → 0.20.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/PKG-INFO +2 -2
  2. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/pyproject.toml +2 -2
  3. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/__main__.py +23 -0
  4. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/cli.py +11 -3
  5. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/config.py +55 -8
  6. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/http_server.py +28 -11
  7. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llama_server.py +7 -4
  8. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/llm.py +9 -5
  9. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/main.py +3 -3
  10. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/models_registry.py +12 -0
  11. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/nli.py +79 -15
  12. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/npc_chat.py +7 -5
  13. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/steam.py +4 -3
  14. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/auth_menu.py +3 -2
  15. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/token_input.py +1 -1
  16. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/unified_palette.py +28 -1
  17. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/auth.py +2 -1
  18. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/main.py +6 -6
  19. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/running.py +3 -2
  20. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/npc_chat.py +4 -2
  21. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tunnel.py +8 -2
  22. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/wizard.py +4 -2
  23. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/uv.lock +8 -8
  24. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
  25. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.env.example +0 -0
  26. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.github/workflows/release.yml +0 -0
  27. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/.gitignore +0 -0
  28. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/LICENSE +0 -0
  29. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/README.md +0 -0
  30. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/THIRD_PARTY_NOTICES.md +0 -0
  31. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/loreguard.spec +0 -0
  32. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/loreguard_entry.py +0 -0
  33. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/scripts/build.py +0 -0
  34. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/API.md +0 -0
  35. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/csharp/LoreguardSDK.cs +0 -0
  36. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/gdscript/LoreguardSDK.gd +0 -0
  37. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/javascript/loreguard-sdk.js +0 -0
  38. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/sdk/python/loreguard_sdk.py +0 -0
  39. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/__init__.py +0 -0
  40. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/chunk_detector.py +0 -0
  41. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/dialogue_act_classifier.py +0 -0
  42. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/hf_discovery.py +0 -0
  43. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/intent_classifier.py +0 -0
  44. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/model_families.py +0 -0
  45. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/runtime.py +0 -0
  46. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/term_ui.py +0 -0
  47. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/__init__.py +0 -0
  48. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/app.py +0 -0
  49. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/__init__.py +0 -0
  50. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/modals/npc_chat.py +0 -0
  51. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/__init__.py +0 -0
  52. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/model_select.py +0 -0
  53. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/screens/nli_setup.py +0 -0
  54. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/styles.py +0 -0
  55. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/__init__.py +0 -0
  56. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/banner.py +0 -0
  57. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/footer.py +0 -0
  58. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/hardware_info.py +0 -0
  59. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/server_monitor.py +0 -0
  60. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/src/tui/widgets/status_panel.py +0 -0
  61. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/templates/llama31-no-tools.jinja +0 -0
  62. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_intent_classifier.py +0 -0
  63. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_nli_hhem.py +0 -0
  64. {loreguard_cli-0.16.0 → loreguard_cli-0.20.3}/tests/test_websocket_timeout.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loreguard-cli
3
- Version: 0.16.0
3
+ Version: 0.20.3
4
4
  Summary: Local inference client for Loreguard NPCs
5
5
  Project-URL: Homepage, https://loreguard.com
6
6
  Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -29,7 +29,7 @@ Requires-Dist: rich>=13.0.0
29
29
  Requires-Dist: textual>=0.47.0
30
30
  Requires-Dist: tf-keras>=2.16.0
31
31
  Requires-Dist: torch>=2.0.0
32
- Requires-Dist: transformers>=5.0.0
32
+ Requires-Dist: transformers<5,>=4.36.0
33
33
  Requires-Dist: uvicorn>=0.27.0
34
34
  Requires-Dist: websockets>=12.0
35
35
  Provides-Extra: build
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "loreguard-cli"
7
- version = "0.16.0"
7
+ version = "0.20.3"
8
8
  description = "Local inference client for Loreguard NPCs"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -28,7 +28,7 @@ dependencies = [
28
28
  "aiofiles>=24.1.0",
29
29
  "rich>=13.0.0",
30
30
  "textual>=0.47.0",
31
- "transformers>=5.0.0",
31
+ "transformers>=4.36.0,<5",
32
32
  "torch>=2.0.0",
33
33
  "fastapi>=0.109.0",
34
34
  "uvicorn>=0.27.0",
@@ -28,6 +28,29 @@ def main():
28
28
  print(json.dumps(status, indent=2))
29
29
  sys.exit(0 if status.get("running") else 1)
30
30
 
31
+ # Handle 'download-llama-server' command - for bundle tool delegation (ADR-0027)
32
+ if args and args[0] == "download-llama-server":
33
+ import asyncio
34
+ from pathlib import Path
35
+ from .llama_server import download_llama_server
36
+
37
+ output_dir = None
38
+ for i, a in enumerate(args):
39
+ if a == "--output-dir" and i + 1 < len(args):
40
+ output_dir = Path(args[i + 1])
41
+
42
+ if not output_dir:
43
+ print("Usage: loreguard download-llama-server --output-dir <path>", file=sys.stderr)
44
+ sys.exit(1)
45
+
46
+ output_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ def on_progress(msg, progress=None):
49
+ print(f" {msg}")
50
+
51
+ asyncio.run(download_llama_server(progress_callback=on_progress, target_dir=output_dir))
52
+ sys.exit(0)
53
+
31
54
  # Filter out help flags - these should show CLI help
32
55
  if any(a in ('-h', '--help') for a in args):
33
56
  from .cli import main as cli_main
@@ -11,7 +11,8 @@ Environment variables (alternative to args):
11
11
  LOREGUARD_MODEL Path to model file
12
12
  LOREGUARD_MODEL_ID Model ID to download (if not using custom model)
13
13
  LOREGUARD_PORT Local llama-server port (default: 8080)
14
- LOREGUARD_BACKEND Backend URL (default: wss://api.loreguard.com/workers)
14
+ LOREGUARD_BACKEND Backend WebSocket URL (default: wss://console.loreguard.com/workers)
15
+ LOREGUARD_API API base URL (default: https://console.loreguard.com)
15
16
  LOREGUARD_WORKER_ID Worker ID (default: hostname)
16
17
  """
17
18
 
@@ -26,6 +27,8 @@ from datetime import datetime
26
27
  from pathlib import Path
27
28
  from typing import Optional
28
29
 
30
+ from .config import DEFAULT_API_URL, DEFAULT_BACKEND_URL
31
+
29
32
  # Setup logging
30
33
  logging.basicConfig(
31
34
  level=logging.INFO,
@@ -44,7 +47,7 @@ class LoreguardCLI:
44
47
  model_path: Optional[Path] = None,
45
48
  model_id: Optional[str] = None,
46
49
  port: int = 8080,
47
- backend_url: str = "wss://api.loreguard.com/workers",
50
+ backend_url: str = DEFAULT_BACKEND_URL,
48
51
  worker_id: Optional[str] = None,
49
52
  model_family: str = "llama3",
50
53
  ):
@@ -454,9 +457,14 @@ Available model IDs:
454
457
  )
455
458
  parser.add_argument(
456
459
  "--backend",
457
- default=os.getenv("LOREGUARD_BACKEND", "wss://api.loreguard.com/workers"),
460
+ default=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
458
461
  help="Backend WebSocket URL",
459
462
  )
463
+ parser.add_argument(
464
+ "--api-url",
465
+ default=os.getenv("LOREGUARD_API", DEFAULT_API_URL),
466
+ help=f"API base URL (default: {DEFAULT_API_URL})",
467
+ )
460
468
  parser.add_argument(
461
469
  "-v", "--verbose",
462
470
  action="store_true",
@@ -50,6 +50,7 @@ class LoreguardConfig:
50
50
  context_size: int = 16384 # llama-server context window size (configurable per game)
51
51
  max_speech_tokens: int = 50 # Max tokens for NPC speech output (Pass 4). Default: 50 (~40 words)
52
52
  model_family: str = "auto" # Model family profile (auto, llama3, qwen3, gemma, chatml)
53
+ dialogue_act_enabled: bool = False # Dialogue act classifier for filler selection
53
54
 
54
55
  def save(self) -> None:
55
56
  """Save configuration to disk."""
@@ -73,6 +74,7 @@ class LoreguardConfig:
73
74
  context_size=data.get("context_size", 16384),
74
75
  max_speech_tokens=data.get("max_speech_tokens", 50),
75
76
  model_family=data.get("model_family", "auto"),
77
+ dialogue_act_enabled=data.get("dialogue_act_enabled", False),
76
78
  )
77
79
  except (json.JSONDecodeError, KeyError):
78
80
  pass
@@ -121,6 +123,14 @@ class LoreguardConfig:
121
123
  # Environment Variable Configuration
122
124
  # =============================================================================
123
125
 
126
+ DEFAULT_API_URL = "https://console.loreguard.com"
127
+ DEFAULT_BACKEND_URL = "wss://console.loreguard.com/workers"
128
+
129
+
130
+ def get_api_url() -> str:
131
+ """Get the Loreguard API base URL (configurable via LOREGUARD_API env var)."""
132
+ return os.getenv("LOREGUARD_API", DEFAULT_API_URL)
133
+
124
134
 
125
135
  @lru_cache(maxsize=1)
126
136
  def load_config() -> dict:
@@ -133,12 +143,13 @@ def load_config() -> dict:
133
143
  return {
134
144
  # Server settings
135
145
  "LLM_ENDPOINT": os.getenv("LLM_ENDPOINT", "http://localhost:8080"),
136
- "BACKEND_URL": os.getenv("LOREGUARD_BACKEND", "wss://api.loreguard.com/workers"),
146
+ "BACKEND_URL": os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
147
+ "API_URL": os.getenv("LOREGUARD_API", DEFAULT_API_URL),
137
148
  "HOST": os.getenv("HOST", "127.0.0.1"),
138
149
  "PORT": os.getenv("PORT", "8081"),
139
150
 
140
151
  # Worker authentication (required for backend connection)
141
- # Get API token from loreguard.com dashboard
152
+ # Get API token from console.loreguard.com
142
153
  "WORKER_ID": os.getenv("LOREGUARD_WORKER_ID", os.getenv("WORKER_ID", "")),
143
154
  # LOREGUARD_TOKEN is preferred, WORKER_TOKEN kept for backwards compatibility
144
155
  "LOREGUARD_TOKEN": os.getenv("LOREGUARD_TOKEN", os.getenv("WORKER_TOKEN", "")),
@@ -232,20 +243,21 @@ def get_models_dir() -> Optional[Path]:
232
243
 
233
244
 
234
245
  def resolve_model_path(model_name: str, subdir: str = "") -> str:
235
- """Resolve a model path, preferring pre-shipped models over HF downloads.
246
+ """Resolve a model path, preferring local models over HF downloads.
236
247
 
237
248
  Resolution order:
238
249
  1. LOREGUARD_MODELS_DIR/<subdir> (explicit override)
239
- 2. Bundle models dir using manifest.txt (HF name → manifest key → local dir)
240
- 3. Bundle models dir using HF name → org--model convention (fallback)
241
- 4. Original HF model name (download from HuggingFace)
250
+ 2. Application Support models dir/<subdir> (standard install location)
251
+ 3. Bundle models dir using manifest.txt (HF name → manifest key → local dir)
252
+ 4. Bundle models dir using HF name → org--model convention (fallback)
253
+ 5. Download from HuggingFace to Application Support models dir
242
254
 
243
255
  Args:
244
256
  model_name: HuggingFace model name (e.g., 'vectara/hallucination_evaluation_model')
245
257
  subdir: Subdirectory within MODELS_DIR to check (e.g., 'hhem', 'deberta')
246
258
 
247
259
  Returns:
248
- Local path if pre-shipped model found, otherwise the original HF model name.
260
+ Local path to the model directory.
249
261
  """
250
262
  # 1. Explicit LOREGUARD_MODELS_DIR/<subdir>
251
263
  explicit_dir = get_config_value("MODELS_DIR")
@@ -254,7 +266,14 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
254
266
  if local_path.exists() and any(local_path.iterdir()):
255
267
  return str(local_path)
256
268
 
257
- # 2 & 3. Bundle directory resolution
269
+ # 2. Application Support models dir/<subdir>
270
+ app_models = get_data_dir() / "models"
271
+ if subdir:
272
+ local_path = app_models / subdir
273
+ if local_path.exists() and any(local_path.iterdir()):
274
+ return str(local_path)
275
+
276
+ # 3 & 4. Bundle directory resolution
258
277
  bundle_dir = get_bundle_dir()
259
278
  if bundle_dir:
260
279
  bundle_models = bundle_dir / "models"
@@ -275,9 +294,37 @@ def resolve_model_path(model_name: str, subdir: str = "") -> str:
275
294
  if local_path.exists() and any(local_path.iterdir()):
276
295
  return str(local_path)
277
296
 
297
+ # 5. Download from HuggingFace to Application Support models dir
298
+ if subdir:
299
+ return _download_hf_model(model_name, app_models / subdir)
300
+
278
301
  return model_name
279
302
 
280
303
 
304
+ def _download_hf_model(model_name: str, target_dir: Path) -> str:
305
+ """Download a HuggingFace model to the loreguard models directory.
306
+
307
+ Returns:
308
+ Path to the downloaded model directory.
309
+ """
310
+ import logging
311
+ logger = logging.getLogger(__name__)
312
+ try:
313
+ from huggingface_hub import snapshot_download
314
+ target_dir.mkdir(parents=True, exist_ok=True)
315
+ logger.info(f"Downloading {model_name} to {target_dir}")
316
+ snapshot_download(
317
+ model_name,
318
+ local_dir=str(target_dir),
319
+ local_dir_use_symlinks=False,
320
+ )
321
+ logger.info(f"Downloaded {model_name} to {target_dir}")
322
+ return str(target_dir)
323
+ except Exception as e:
324
+ logger.warning(f"Failed to download {model_name}: {e}")
325
+ return model_name
326
+
327
+
281
328
  def get_config_value(key: str, default: Optional[str] = None) -> Optional[str]:
282
329
  """Get a single configuration value."""
283
330
  config = load_config()
@@ -367,7 +367,7 @@ class EmbeddedHTTPServer:
367
367
 
368
368
  # Derive HTTP base URL from WebSocket URL
369
369
  # ws://localhost:8090/workers → http://localhost:8090
370
- # wss://api.loreguard.com/workers → https://api.loreguard.com
370
+ # wss://console.loreguard.com/workers → https://console.loreguard.com
371
371
  backend_ws = server.tunnel.backend_url
372
372
  if backend_ws.startswith("wss://"):
373
373
  base_url = "https://" + backend_ws[6:].split("/")[0]
@@ -535,15 +535,15 @@ class EmbeddedHTTPServer:
535
535
  content={"error": "Missing 'model' field"},
536
536
  )
537
537
 
538
- # Security: prevent path traversal
539
- if "/" in model_name or "\\" in model_name or ".." in model_name:
538
+ # Security: resolve and verify path stays inside models_dir
539
+ model_path = (server.models_dir / model_name).resolve()
540
+ if model_path.parent != server.models_dir.resolve():
540
541
  return JSONResponse(
541
542
  status_code=400,
542
543
  content={"error": "Invalid model name"},
543
544
  )
544
545
 
545
- model_path = server.models_dir / model_name
546
- if not model_path.exists() or not model_path.suffix == ".gguf":
546
+ if not model_path.exists() or model_path.suffix != ".gguf":
547
547
  return JSONResponse(
548
548
  status_code=404,
549
549
  content={"error": f"Model '{model_name}' not found"},
@@ -553,6 +553,9 @@ class EmbeddedHTTPServer:
553
553
  if hasattr(server.llama_process, "model_path") and server.llama_process.model_path.name == model_name:
554
554
  return {"status": "already_active", "model": model_name}
555
555
 
556
+ # Save original model_path for rollback on failure
557
+ original_model_path = server.llama_process.model_path
558
+
556
559
  try:
557
560
  # Stop current llama-server
558
561
  server.llama_process.stop()
@@ -564,21 +567,35 @@ class EmbeddedHTTPServer:
564
567
  # Wait for health check (llama-server takes a few seconds to load model)
565
568
  import httpx
566
569
  llama_url = f"http://127.0.0.1:{server.llama_process.port}/health"
567
- for attempt in range(60): # 60 attempts × 0.5s = 30s timeout
568
- await asyncio.sleep(0.5)
569
- try:
570
- async with httpx.AsyncClient(timeout=2.0) as client:
570
+ async with httpx.AsyncClient(timeout=2.0) as client:
571
+ for attempt in range(60): # 60 attempts × 0.5s = 30s timeout
572
+ await asyncio.sleep(0.5)
573
+ try:
571
574
  resp = await client.get(llama_url)
572
575
  if resp.status_code == 200:
576
+ # Persist selection so it survives restarts
577
+ try:
578
+ from .config import LoreguardConfig
579
+ cfg = LoreguardConfig.load()
580
+ cfg.set_model_path(model_path)
581
+ cfg.save()
582
+ except Exception:
583
+ pass # Best-effort persistence
573
584
  return {"status": "ok", "model": model_name}
574
- except Exception:
575
- continue
585
+ except Exception:
586
+ continue
576
587
 
577
588
  return JSONResponse(
578
589
  status_code=500,
579
590
  content={"error": "Model loaded but health check timed out after 30s"},
580
591
  )
581
592
  except Exception as e:
593
+ # Rollback: restore original model path and try to restart
594
+ server.llama_process.model_path = original_model_path
595
+ try:
596
+ server.llama_process.start()
597
+ except Exception:
598
+ pass # Best-effort rollback
582
599
  return JSONResponse(
583
600
  status_code=500,
584
601
  content={"error": f"Failed to reload model: {e}"},
@@ -30,7 +30,7 @@ def _get_templates_dir() -> Path:
30
30
  return Path(__file__).parent.parent / "templates"
31
31
 
32
32
 
33
- LLAMA_VERSION = "b7789" # Must match loreguard-engine bundle version
33
+ LLAMA_VERSION = "b8467" # Must match loreguard-engine bundle version
34
34
 
35
35
  # Download URLs for each platform
36
36
  BINARIES = {
@@ -265,18 +265,21 @@ def make_executable(path: Path) -> None:
265
265
 
266
266
  async def download_llama_server(
267
267
  progress_callback: Optional[Callable[[str, DownloadProgress | None], None]] = None,
268
+ target_dir: Optional[Path] = None,
268
269
  ) -> Path:
269
270
  """Download and install llama-server for the current platform.
270
271
 
271
272
  Args:
272
273
  progress_callback: Called with (status_message, progress_or_none)
274
+ target_dir: If provided, install into this directory instead of the default.
275
+ Used by the bundle tool to pre-ship llama-server.
273
276
 
274
277
  Returns:
275
278
  Path to the installed llama-server binary
276
279
  """
277
280
  plat = get_platform()
278
281
  config = BINARIES[plat]
279
- bin_dir = get_bin_dir()
282
+ bin_dir = target_dir or get_bin_dir()
280
283
 
281
284
  def notify(msg: str, progress: DownloadProgress | None = None):
282
285
  if progress_callback:
@@ -355,12 +358,12 @@ async def download_llama_server(
355
358
  make_executable(lib)
356
359
 
357
360
  # Write version marker file for future version checks
358
- version_file = get_version_file_path()
361
+ version_file = bin_dir / ".llama_version" if target_dir else get_version_file_path()
359
362
  version_file.write_text(LLAMA_VERSION)
360
363
 
361
364
  notify(f"llama-server {LLAMA_VERSION} installed successfully!")
362
365
 
363
- return get_llama_server_path()
366
+ return bin_dir / config["binary_name"]
364
367
 
365
368
 
366
369
  class LlamaServerProcess:
@@ -61,7 +61,9 @@ class LLMRequest:
61
61
  stop: list[str] = field(default_factory=lambda: DEFAULT_STOP_SEQUENCES.copy())
62
62
 
63
63
  # Thinking mode control (for Qwen3)
64
- disable_thinking: bool = False
64
+ # Defaults to True: thinking wastes tokens and breaks pipelines.
65
+ # Only enable explicitly when extended reasoning is desired.
66
+ disable_thinking: bool = True
65
67
 
66
68
  # If true, error if content is empty instead of falling back to reasoning_content
67
69
  require_content: bool = False
@@ -257,9 +259,10 @@ class LLMProxy:
257
259
  payload["id_slot"] = 0
258
260
  logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
259
261
 
260
- # Disable thinking mode if requested (for Qwen3)
262
+ # Disable thinking mode (for Qwen3/3.5).
263
+ # Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
261
264
  if req.disable_thinking:
262
- payload["enable_thinking"] = False
265
+ payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
263
266
 
264
267
  # Note: JSON mode is not compatible with streaming in llama.cpp
265
268
  # If force_json is requested, fall back to non-streaming
@@ -573,9 +576,10 @@ class LLMProxy:
573
576
  payload["id_slot"] = 0
574
577
  logger.info("KV cache: cache_prompt=true, id_slot=0 (verify -np 1 on server)")
575
578
 
576
- # Disable thinking mode if requested (for Qwen3)
579
+ # Disable thinking mode (for Qwen3/3.5).
580
+ # Must use chat_template_kwargs — top-level enable_thinking is ignored by llama.cpp b8467+.
577
581
  if req.disable_thinking:
578
- payload["enable_thinking"] = False
582
+ payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
579
583
 
580
584
  # Force JSON output if requested
581
585
  if req.force_json:
@@ -31,7 +31,7 @@ from rich.console import Console
31
31
 
32
32
  from .tunnel import BackendTunnel
33
33
  from .llm import LLMProxy
34
- from .config import get_config_value, resolve_model_path
34
+ from .config import get_config_value, resolve_model_path, DEFAULT_BACKEND_URL
35
35
  from .nli import NLIService, is_nli_model_available
36
36
  from .intent_classifier import IntentClassifier, is_intent_model_available
37
37
  from .dialogue_act_classifier import (
@@ -118,7 +118,7 @@ async def startup():
118
118
  console.print("[yellow]Intent classifier disabled (set LOREGUARD_INTENT_ENABLED=true to enable)[/yellow]")
119
119
 
120
120
  # Initialize dialogue act classifier (optional, for filler selection)
121
- enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
121
+ enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
122
122
  if enable_dialogue_act:
123
123
  console.print("[cyan]Initializing dialogue act classifier...[/cyan]")
124
124
  if is_dialogue_act_model_available():
@@ -170,7 +170,7 @@ async def startup():
170
170
  chunk_detector = None
171
171
 
172
172
  # Connect to remote backend
173
- backend_url = get_config_value("BACKEND_URL", "wss://api.lorekeeper.ai/workers")
173
+ backend_url = get_config_value("BACKEND_URL", DEFAULT_BACKEND_URL)
174
174
  worker_id = get_config_value("WORKER_ID", "")
175
175
  worker_token = get_config_value("WORKER_TOKEN", "")
176
176
  model_id = get_config_value("MODEL_ID", "default")
@@ -92,6 +92,18 @@ SUPPORTED_MODELS: list[ModelInfo] = [
92
92
  hardware="32GB RAM • 20GB VRAM",
93
93
  recommended=False,
94
94
  ),
95
+ ModelInfo(
96
+ id="qwen3.5-9b-q4km",
97
+ name="Qwen 3.5 9B Q4_K_M",
98
+ filename="Qwen3.5-9B-Q4_K_M.gguf",
99
+ size_gb=5.2,
100
+ size_bytes=5_627_044_256,
101
+ context_length=32768,
102
+ url="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf",
103
+ description="Strong general model. 32K context, good reasoning.",
104
+ hardware="12GB RAM • 8GB VRAM",
105
+ recommended=False,
106
+ ),
95
107
  ]
96
108
 
97
109
 
@@ -328,6 +328,26 @@ class NLIService:
328
328
 
329
329
  return results
330
330
 
331
+ def _resolve_model_dir(self) -> Optional[str]:
332
+ """Resolve the actual directory containing model files.
333
+
334
+ For local paths, returns as-is. For HuggingFace repo IDs (e.g.
335
+ 'vectara/hallucination_evaluation_model'), resolves the cache snapshot dir.
336
+ """
337
+ if os.path.isdir(self._model_path):
338
+ return self._model_path
339
+ try:
340
+ from huggingface_hub import snapshot_download
341
+ return snapshot_download(self._model_path, local_files_only=True)
342
+ except Exception:
343
+ # Cache miss — trigger a download so the snapshot exists
344
+ try:
345
+ from huggingface_hub import snapshot_download
346
+ return snapshot_download(self._model_path)
347
+ except Exception as e:
348
+ logger.warning(f"Could not resolve model dir for {self._model_path}: {e}")
349
+ return None
350
+
331
351
  def _patch_hhem_model_files(self):
332
352
  """Patch vendored HHEM files for transformers 5.x compatibility.
333
353
 
@@ -336,8 +356,31 @@ class NLIService:
336
356
  2. Is stricter about model_type matching between config.json and config class
337
357
  Since trust_remote_code loads the .py files directly, we patch before loading.
338
358
  """
359
+ model_dir = self._resolve_model_dir()
360
+ if not model_dir:
361
+ logger.warning("Cannot resolve model directory for patching")
362
+ return
363
+
364
+ # Also patch the modules cache (transformers copies .py files there)
365
+ modules_dirs = [model_dir]
366
+ modules_cache = os.path.join(
367
+ os.path.expanduser("~"), ".cache", "huggingface", "modules",
368
+ "transformers_modules",
369
+ )
370
+ if os.path.isdir(modules_cache):
371
+ for root, dirs, files in os.walk(modules_cache):
372
+ if "modeling_hhem_v2.py" in files:
373
+ modules_dirs.append(root)
374
+
375
+ for patch_dir in modules_dirs:
376
+ self._patch_dir(patch_dir)
377
+
378
+ def _patch_dir(self, patch_dir: str):
379
+ """Apply HHEM patches to a single directory."""
339
380
  # Patch 1: modeling_hhem_v2.py — add missing class attributes
340
- model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
381
+ model_file = os.path.join(patch_dir, "modeling_hhem_v2.py")
382
+ if not os.path.exists(model_file):
383
+ return
341
384
  if os.path.exists(model_file):
342
385
  try:
343
386
  content = open(model_file, "r").read()
@@ -354,13 +397,18 @@ class NLIService:
354
397
  if patched != content:
355
398
  with open(model_file, "w") as f:
356
399
  f.write(patched)
357
- logger.info("Patched modeling_hhem_v2.py for transformers 5.x")
400
+ # Clear __pycache__ so patched file is reloaded
401
+ pycache = os.path.join(patch_dir, "__pycache__")
402
+ if os.path.isdir(pycache):
403
+ import shutil
404
+ shutil.rmtree(pycache)
405
+ logger.info(f"Patched {model_file} for transformers 5.x")
358
406
  except Exception as e:
359
407
  logger.warning(f"Could not patch modeling_hhem_v2.py: {e}")
360
408
 
361
409
  # Patch 2: config.json — fix model_type mismatch
362
410
  # config.json has "HHEMv2Config" but the config class defines model_type = "HHEMv2"
363
- config_file = os.path.join(self._model_path, "config.json")
411
+ config_file = os.path.join(patch_dir, "config.json")
364
412
  if os.path.exists(config_file):
365
413
  try:
366
414
  content = open(config_file, "r").read()
@@ -375,24 +423,40 @@ class NLIService:
375
423
  except Exception as e:
376
424
  logger.warning(f"Could not patch config.json: {e}")
377
425
 
378
- # Patch 3: configuration_hhem_v2.py use local flan-t5-base instead of HuggingFace
379
- # The HHEM model downloads google/flan-t5-base config+tokenizer at init.
380
- # If we've bundled those files locally, rewrite the foundation path.
381
- config_py = os.path.join(self._model_path, "configuration_hhem_v2.py")
382
- local_foundation = os.path.join(self._model_path, "flan-t5-base")
426
+ # Patch 3: configuration_hhem_v2.py, point the foundation at the bundled
427
+ # flan-t5-base. Resolve it RELATIVE to the config file at runtime (via
428
+ # __file__) so the path is portable and never a baked absolute machine path.
429
+ config_py = os.path.join(patch_dir, "configuration_hhem_v2.py")
430
+ local_foundation = os.path.join(patch_dir, "flan-t5-base")
383
431
  if os.path.exists(config_py) and os.path.isdir(local_foundation):
384
432
  try:
433
+ import re
434
+
385
435
  content = open(config_py, "r").read()
386
- if '"google/flan-t5-base"' in content:
387
- # Use absolute path to the bundled flan-t5-base files
388
- abs_path = os.path.abspath(local_foundation)
389
- patched = content.replace(
390
- '"google/flan-t5-base"',
391
- f'"{abs_path}"',
436
+ resolver = (
437
+ 'os.path.join(os.path.dirname(os.path.abspath(__file__)), '
438
+ '"flan-t5-base")'
439
+ )
440
+ patched = content
441
+ if "import os" not in patched:
442
+ patched = patched.replace(
443
+ "from transformers import PretrainedConfig",
444
+ "from transformers import PretrainedConfig\nimport os",
445
+ 1,
392
446
  )
447
+ # Replace the class-level foundation assignment, whatever it is now
448
+ # (the HuggingFace default OR a previously-baked absolute path), with
449
+ # the self-resolving expression. count=1 leaves any __init__ default.
450
+ patched = re.sub(
451
+ r'foundation\s*=\s*"[^"]*"',
452
+ "foundation = " + resolver,
453
+ patched,
454
+ count=1,
455
+ )
456
+ if patched != content:
393
457
  with open(config_py, "w") as f:
394
458
  f.write(patched)
395
- logger.info(f"Patched foundation to local: {abs_path}")
459
+ logger.info("Patched foundation to self-resolving relative path")
396
460
  except Exception as e:
397
461
  logger.warning(f"Could not patch configuration_hhem_v2.py: {e}")
398
462
 
@@ -15,6 +15,7 @@ Rate Limits (when using Player JWT):
15
15
 
16
16
  import asyncio
17
17
  import logging
18
+ import os
18
19
  from dataclasses import dataclass
19
20
  from typing import Optional
20
21
 
@@ -35,8 +36,9 @@ from .term_ui import (
35
36
  # Configure module logger
36
37
  logger = logging.getLogger(__name__)
37
38
 
38
- # Loreguard API base URL
39
- LOREGUARD_API_URL = "https://api.loreguard.com"
39
+ # Loreguard API base URL (configurable via LOREGUARD_API env var)
40
+ from .config import get_api_url
41
+ LOREGUARD_API_URL = get_api_url()
40
42
 
41
43
 
42
44
  @dataclass
@@ -407,7 +409,7 @@ class NPCChat:
407
409
  raise Exception("Invalid API token - please check your authentication")
408
410
  elif e.response.status_code == 404:
409
411
  logger.warning("No characters found for this account")
410
- raise Exception("No characters found - register NPCs at loreguard.com first")
412
+ raise Exception("No characters found - register NPCs at console.loreguard.com first")
411
413
  logger.error("HTTP error fetching characters: %d", e.response.status_code)
412
414
  raise
413
415
  except httpx.RequestError as e:
@@ -428,7 +430,7 @@ class NPCChat:
428
430
  return None
429
431
 
430
432
  if not characters:
431
- print_error("No NPCs registered. Create NPCs at loreguard.com first.")
433
+ print_error("No NPCs registered. Create NPCs at console.loreguard.com first.")
432
434
  return None
433
435
 
434
436
  items = [
@@ -642,7 +644,7 @@ async def run_npc_chat(
642
644
  Args:
643
645
  api_token: Loreguard API token for authentication (for server-side use)
644
646
  player_jwt: Player JWT from Steam exchange (for game clients)
645
- base_url: Loreguard API base URL (default: https://api.loreguard.com)
647
+ base_url: Loreguard API base URL (default: https://console.loreguard.com)
646
648
  config: Optional client configuration for timeouts
647
649
  verbose: If True, show pipeline pass updates via WebSocket
648
650
  tunnel: BackendTunnel instance for receiving pass_update messages (required for verbose)
@@ -12,7 +12,7 @@ Usage:
12
12
  read_timeout=15.0,
13
13
  max_retries=3
14
14
  )
15
- steam_auth = SteamAuth(api_url="https://api.loreguard.com", config=config)
15
+ steam_auth = SteamAuth(api_url="https://console.loreguard.com", config=config)
16
16
 
17
17
  # Exchange Steam ticket for Player JWT
18
18
  result = await steam_auth.exchange_ticket(
@@ -40,8 +40,9 @@ import httpx
40
40
  # Configure module logger
41
41
  logger = logging.getLogger(__name__)
42
42
 
43
- # Default Loreguard API URL
44
- LOREGUARD_API_URL = "https://api.loreguard.com"
43
+ # Default Loreguard API URL (configurable via LOREGUARD_API env var)
44
+ from .config import get_api_url
45
+ LOREGUARD_API_URL = get_api_url()
45
46
 
46
47
  # Validation patterns
47
48
  STEAM_APP_ID_PATTERN = re.compile(r"^\d{1,10}$")
@@ -12,6 +12,7 @@ from textual.widgets import Input, Static, ListView, ListItem, Label
12
12
  from rich.text import Text
13
13
 
14
14
  from ..styles import PURPLE, CYAN, PINK, FG, FG_DIM, GREEN, RED
15
+ from ...config import get_api_url
15
16
  from ..widgets.banner import get_gradient_color
16
17
 
17
18
  if TYPE_CHECKING:
@@ -227,7 +228,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
227
228
 
228
229
  # Update status
229
230
  status = self.query_one("#status-line", Static)
230
- status.update(Text("Get your token at loreguard.com/dashboard", style=FG_DIM))
231
+ status.update(Text("Get your token at console.loreguard.com", style=FG_DIM))
231
232
 
232
233
  def _switch_to_menu(self) -> None:
233
234
  """Switch back to menu mode."""
@@ -270,7 +271,7 @@ class AuthMenuModal(ModalScreen[tuple | None]):
270
271
  try:
271
272
  async with httpx.AsyncClient(timeout=10.0) as client:
272
273
  response = await client.get(
273
- "https://api.loreguard.com/api/auth/me",
274
+ f"{get_api_url()}/api/auth/me",
274
275
  headers={"Authorization": f"Bearer {token}"},
275
276
  )
276
277
  if response.status_code == 200:
@@ -59,7 +59,7 @@ class TokenInputModal(ModalScreen[str | None]):
59
59
  """Compose the modal layout."""
60
60
  with Vertical():
61
61
  yield Static("Enter API Token", classes="modal-title")
62
- yield Static("Get your token at loreguard.com/dashboard", classes="modal-hint")
62
+ yield Static("Get your token at console.loreguard.com", classes="modal-hint")
63
63
  yield Input(placeholder="Paste your token here...", password=True, id="token-input")
64
64
  yield Static("enter submit • esc cancel", classes="modal-footer")
65
65
 
@@ -284,7 +284,7 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
284
284
  # Add models
285
285
  if self._show_models:
286
286
  from ...llama_server import get_models_dir
287
- from ...models_registry import SUPPORTED_MODELS
287
+ from ...models_registry import SUPPORTED_MODELS, ModelInfo
288
288
  from ...hf_discovery import discover_models
289
289
  from ..widgets.hardware_info import detect_hardware
290
290
 
@@ -302,6 +302,33 @@ class UnifiedPaletteModal(ModalScreen[tuple[str, Any] | None]):
302
302
  except Exception:
303
303
  all_models = list(SUPPORTED_MODELS)
304
304
 
305
+ # Ensure all static registry models are included
306
+ known_filenames = {m.filename for m in all_models}
307
+ for model in SUPPORTED_MODELS:
308
+ if model.filename not in known_filenames:
309
+ all_models.append(model)
310
+ known_filenames.add(model.filename)
311
+
312
+ # Scan local models dir for GGUF files not in registry/discovery
313
+ if self._models_dir and self._models_dir.exists():
314
+ for gguf_file in self._models_dir.glob("*.gguf"):
315
+ if gguf_file.name not in known_filenames:
316
+ size_bytes = gguf_file.stat().st_size
317
+ size_gb = size_bytes / (1024 ** 3)
318
+ stem = gguf_file.stem
319
+ all_models.append(ModelInfo(
320
+ id=f"local-{stem.lower()}",
321
+ name=stem.replace("-", " ").replace("_", " "),
322
+ filename=gguf_file.name,
323
+ size_gb=round(size_gb, 1),
324
+ size_bytes=size_bytes,
325
+ context_length=8192,
326
+ url="",
327
+ description="Local model",
328
+ hardware="",
329
+ ))
330
+ known_filenames.add(gguf_file.name)
331
+
305
332
  # Sort: most recent first, then by size descending
306
333
  def model_sort_key(m):
307
334
  # Primary: sort by recency (days_ago), None goes last
@@ -16,6 +16,7 @@ from ..widgets.banner import LoreguardBanner
16
16
  from ..widgets.hardware_info import HardwareInfo
17
17
  from ..widgets.footer import LoreguardFooter
18
18
  from ..styles import CYAN, PINK, GREEN, RED, FG_DIM
19
+ from ...config import get_api_url
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  from ..app import LoreguardApp
@@ -115,7 +116,7 @@ class AuthScreen(Screen):
115
116
  try:
116
117
  async with httpx.AsyncClient(timeout=10.0) as client:
117
118
  response = await client.get(
118
- "https://api.loreguard.com/api/auth/me",
119
+ f"{get_api_url()}/api/auth/me",
119
120
  headers={"Authorization": f"Bearer {token}"},
120
121
  )
121
122
  if response.status_code == 200:
@@ -16,7 +16,7 @@ from ..widgets.hardware_info import HardwareInfo
16
16
  from ..widgets.server_monitor import ServerMonitor
17
17
  from ..widgets.npc_chat import NPCChat
18
18
  from ..widgets.footer import LoreguardFooter
19
- from ...config import LoreguardConfig
19
+ from ...config import LoreguardConfig, get_api_url, DEFAULT_BACKEND_URL
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from ..app import LoreguardApp
@@ -450,7 +450,7 @@ class MainScreen(Screen):
450
450
  self._log(f"Still loading... ({elapsed}s)")
451
451
 
452
452
  # Check if process died
453
- if app._llama_process.process and app._llama_process.process.poll() is not None:
453
+ if app._llama_process and app._llama_process.process and app._llama_process.process.poll() is not None:
454
454
  self._log("llama-server process died", "error")
455
455
  break
456
456
 
@@ -548,7 +548,7 @@ class MainScreen(Screen):
548
548
 
549
549
  # Load dialogue act classifier (filler selection) - run in thread pool
550
550
  dialogue_act_classifier = None
551
- enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
551
+ enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
552
552
  if not enable_dialogue_act:
553
553
  self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED")
554
554
  else:
@@ -649,7 +649,7 @@ class MainScreen(Screen):
649
649
  self._update_connection_status("connecting")
650
650
 
651
651
  app._tunnel = BackendTunnel(
652
- backend_url="wss://api.loreguard.com/workers",
652
+ backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
653
653
  llm_proxy=llm_proxy,
654
654
  worker_id=app.worker_id,
655
655
  worker_token=app.api_token,
@@ -851,7 +851,7 @@ class MainScreen(Screen):
851
851
  try:
852
852
  async with httpx.AsyncClient(timeout=10.0) as client:
853
853
  response = await client.get(
854
- "https://api.loreguard.com/api/characters",
854
+ f"{get_api_url()}/api/characters",
855
855
  headers={"Authorization": f"Bearer {app.api_token}"},
856
856
  )
857
857
 
@@ -862,7 +862,7 @@ class MainScreen(Screen):
862
862
  npcs = [c for c in characters if c.get("type") != "world"]
863
863
 
864
864
  if not npcs:
865
- self._update_status("No NPCs registered. Create NPCs at loreguard.com first.")
865
+ self._update_status("No NPCs registered. Create NPCs at console.loreguard.com first.")
866
866
  return
867
867
 
868
868
  # Create NPC items
@@ -17,6 +17,7 @@ from ..widgets.banner import LoreguardBanner
17
17
  from ..widgets.hardware_info import HardwareInfo
18
18
  from ..widgets.footer import LoreguardFooter
19
19
  from ..styles import CYAN, GREEN, YELLOW, RED, FG_DIM
20
+ from ...config import DEFAULT_BACKEND_URL
20
21
 
21
22
  if TYPE_CHECKING:
22
23
  from ..app import LoreguardApp
@@ -256,7 +257,7 @@ class RunningScreen(Screen):
256
257
 
257
258
  # Load Dialogue Act Classifier
258
259
  dialogue_act_classifier = None
259
- enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
260
+ enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "false").lower() == "true"
260
261
  if not enable_dialogue_act:
261
262
  self._update_status("dialogue_act", "Dialogue Act", "Disabled", "info")
262
263
  self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED", "info")
@@ -322,7 +323,7 @@ class RunningScreen(Screen):
322
323
  model_id = app.model_path.stem
323
324
 
324
325
  self._tunnel = BackendTunnel(
325
- backend_url="wss://api.loreguard.com/workers",
326
+ backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
326
327
  llm_proxy=llm_proxy,
327
328
  worker_id=app.worker_id,
328
329
  worker_token=app.api_token,
@@ -6,6 +6,7 @@ Uses the local proxy for NPC conversations with token streaming:
6
6
 
7
7
  import json
8
8
  import logging
9
+ import os
9
10
  from typing import TYPE_CHECKING
10
11
 
11
12
  import httpx
@@ -23,8 +24,9 @@ from ...runtime import RuntimeInfo
23
24
  if TYPE_CHECKING:
24
25
  from ..app import LoreguardApp
25
26
 
26
- # Fallback to cloud API if local proxy unavailable
27
- LOREGUARD_API_URL = "https://api.loreguard.com"
27
+ # Fallback to cloud API if local proxy unavailable (configurable via LOREGUARD_API env var)
28
+ from ...config import get_api_url
29
+ LOREGUARD_API_URL = get_api_url()
28
30
 
29
31
 
30
32
  def get_local_proxy_url() -> str | None:
@@ -81,7 +81,8 @@ class BackendTunnel:
81
81
  self.registered = False
82
82
  self.backend_version = "" # Populated from worker_ack
83
83
  self._reconnect_delay = 1 # Start with 1 second
84
- self._max_reconnect_delay = 60 # Max 60 seconds
84
+ self._max_reconnect_delay = 3 # Cap at 3s until first successful connection
85
+ self._has_connected = False # Tracks if we've ever connected successfully
85
86
  self._running = True
86
87
  self._shutdown_requested = False
87
88
  self._heartbeat_task: asyncio.Task | None = None
@@ -122,7 +123,7 @@ class BackendTunnel:
122
123
  """Establish and maintain connection to backend with auto-reconnect."""
123
124
  if not self.worker_id or not self.worker_token:
124
125
  self._log("Error: Worker ID and API token are required", "error")
125
- self._log("Get an API token from loreguard.com dashboard", "warn")
126
+ self._log("Get an API token from console.loreguard.com", "warn")
126
127
  return
127
128
 
128
129
  last_error = ""
@@ -210,6 +211,11 @@ class BackendTunnel:
210
211
  connection_start = time.time()
211
212
  self._log("Connected to backend!", "success")
212
213
 
214
+ # After first successful connection, use longer backoff for reconnections
215
+ if not self._has_connected:
216
+ self._has_connected = True
217
+ self._max_reconnect_delay = 60
218
+
213
219
  # Register as worker
214
220
  success, error_reason = await self._register_worker()
215
221
  if not success:
@@ -54,6 +54,8 @@ from rich.box import ROUNDED
54
54
  from rich.align import Align
55
55
  from rich.layout import Layout
56
56
 
57
+ from .config import get_api_url, DEFAULT_BACKEND_URL
58
+
57
59
  # Logger instance
58
60
  log = logging.getLogger("loreguard")
59
61
 
@@ -1044,7 +1046,7 @@ async def step_authentication(app: Optional[TUIApp] = None) -> tuple[Optional[st
1044
1046
  try:
1045
1047
  async with httpx.AsyncClient(timeout=10.0) as client:
1046
1048
  response = await client.get(
1047
- "https://api.loreguard.com/api/auth/me",
1049
+ f"{get_api_url()}/api/auth/me",
1048
1050
  headers={"Authorization": f"Bearer {token}"},
1049
1051
  )
1050
1052
  if response.status_code == 200:
@@ -1510,7 +1512,7 @@ async def step_start(
1510
1512
 
1511
1513
  model_id = _resolve_backend_model_id(model_path.stem)
1512
1514
  tunnel = BackendTunnel(
1513
- backend_url="wss://api.loreguard.com/workers",
1515
+ backend_url=os.getenv("LOREGUARD_BACKEND", DEFAULT_BACKEND_URL),
1514
1516
  llm_proxy=llm_proxy,
1515
1517
  worker_id=worker_id,
1516
1518
  worker_token=token,
@@ -476,7 +476,7 @@ wheels = [
476
476
 
477
477
  [[package]]
478
478
  name = "huggingface-hub"
479
- version = "0.36.0"
479
+ version = "0.36.2"
480
480
  source = { registry = "https://pypi.org/simple" }
481
481
  dependencies = [
482
482
  { name = "filelock" },
@@ -488,9 +488,9 @@ dependencies = [
488
488
  { name = "tqdm" },
489
489
  { name = "typing-extensions" },
490
490
  ]
491
- sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload-time = "2025-10-23T12:12:01.413Z" }
491
+ sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
492
492
  wheels = [
493
- { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" },
493
+ { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
494
494
  ]
495
495
 
496
496
  [[package]]
@@ -600,7 +600,7 @@ wheels = [
600
600
 
601
601
  [[package]]
602
602
  name = "loreguard-cli"
603
- version = "0.14.5"
603
+ version = "0.16.0"
604
604
  source = { editable = "." }
605
605
  dependencies = [
606
606
  { name = "aiofiles" },
@@ -649,7 +649,7 @@ requires-dist = [
649
649
  { name = "textual", specifier = ">=0.47.0" },
650
650
  { name = "tf-keras", specifier = ">=2.16.0" },
651
651
  { name = "torch", specifier = ">=2.0.0" },
652
- { name = "transformers", specifier = ">=4.36.0" },
652
+ { name = "transformers", specifier = ">=4.36.0,<5" },
653
653
  { name = "uvicorn", specifier = ">=0.27.0" },
654
654
  { name = "websockets", specifier = ">=12.0" },
655
655
  ]
@@ -2226,7 +2226,7 @@ wheels = [
2226
2226
 
2227
2227
  [[package]]
2228
2228
  name = "transformers"
2229
- version = "4.57.3"
2229
+ version = "4.57.6"
2230
2230
  source = { registry = "https://pypi.org/simple" }
2231
2231
  dependencies = [
2232
2232
  { name = "filelock" },
@@ -2241,9 +2241,9 @@ dependencies = [
2241
2241
  { name = "tokenizers" },
2242
2242
  { name = "tqdm" },
2243
2243
  ]
2244
- sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" }
2244
+ sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
2245
2245
  wheels = [
2246
- { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" },
2246
+ { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
2247
2247
  ]
2248
2248
 
2249
2249
  [[package]]
File without changes
File without changes