loreguard-cli 0.14.6__tar.gz → 0.15.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/PKG-INFO +1 -1
  2. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/pyproject.toml +1 -1
  3. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/intent_classifier.py +5 -2
  4. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/llm.py +5 -0
  5. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/nli.py +72 -4
  6. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tunnel.py +3 -0
  7. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
  8. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/.env.example +0 -0
  9. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/.github/workflows/release.yml +0 -0
  10. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/.gitignore +0 -0
  11. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/LICENSE +0 -0
  12. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/README.md +0 -0
  13. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/THIRD_PARTY_NOTICES.md +0 -0
  14. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/loreguard.spec +0 -0
  15. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/loreguard_entry.py +0 -0
  16. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/scripts/build.py +0 -0
  17. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/sdk/API.md +0 -0
  18. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/sdk/csharp/LoreguardSDK.cs +0 -0
  19. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/sdk/gdscript/LoreguardSDK.gd +0 -0
  20. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/sdk/javascript/loreguard-sdk.js +0 -0
  21. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/sdk/python/loreguard_sdk.py +0 -0
  22. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/__init__.py +0 -0
  23. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/__main__.py +0 -0
  24. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/chunk_detector.py +0 -0
  25. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/cli.py +0 -0
  26. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/config.py +0 -0
  27. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/dialogue_act_classifier.py +0 -0
  28. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/hf_discovery.py +0 -0
  29. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/http_server.py +0 -0
  30. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/llama_server.py +0 -0
  31. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/main.py +0 -0
  32. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/models_registry.py +0 -0
  33. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/npc_chat.py +0 -0
  34. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/runtime.py +0 -0
  35. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/steam.py +0 -0
  36. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/term_ui.py +0 -0
  37. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/__init__.py +0 -0
  38. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/app.py +0 -0
  39. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/modals/__init__.py +0 -0
  40. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/modals/auth_menu.py +0 -0
  41. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/modals/npc_chat.py +0 -0
  42. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/modals/token_input.py +0 -0
  43. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/modals/unified_palette.py +0 -0
  44. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/__init__.py +0 -0
  45. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/auth.py +0 -0
  46. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/main.py +0 -0
  47. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/model_select.py +0 -0
  48. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/nli_setup.py +0 -0
  49. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/screens/running.py +0 -0
  50. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/styles.py +0 -0
  51. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/__init__.py +0 -0
  52. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/banner.py +0 -0
  53. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/footer.py +0 -0
  54. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/hardware_info.py +0 -0
  55. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/npc_chat.py +0 -0
  56. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/server_monitor.py +0 -0
  57. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tui/widgets/status_panel.py +0 -0
  58. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/wizard.py +0 -0
  59. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/templates/llama31-no-tools.jinja +0 -0
  60. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/tests/test_intent_classifier.py +0 -0
  61. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/tests/test_nli_hhem.py +0 -0
  62. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/tests/test_websocket_timeout.py +0 -0
  63. {loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loreguard-cli
3
- Version: 0.14.6
3
+ Version: 0.15.2
4
4
  Summary: Local inference client for Loreguard NPCs
5
5
  Project-URL: Homepage, https://loreguard.com
6
6
  Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "loreguard-cli"
7
- version = "0.14.6"
7
+ version = "0.15.2"
8
8
  description = "Local inference client for Loreguard NPCs"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -52,7 +52,9 @@ INTENT_LABEL_DESCRIPTIONS = {
52
52
  ),
53
53
  IntentLabel.WORKING_MEMORY: (
54
54
  "a question about the NPC's own current state, feelings, recent "
55
- "experiences, or personal memory that can be answered from working memory"
55
+ "experiences, or personal memory that can be answered from working memory; "
56
+ "this also includes clarification questions about what the NPC just said "
57
+ "(examples: what do you mean, what do you mean by that, clarify that)"
56
58
  ),
57
59
  IntentLabel.LIGHT_RETRIEVAL: (
58
60
  "a request for one specific factual detail (number, fee, date, name, "
@@ -60,7 +62,8 @@ INTENT_LABEL_DESCRIPTIONS = {
60
62
  ),
61
63
  IntentLabel.FULL_RETRIEVAL: (
62
64
  "a complex request requiring multiple facts, synthesis, planning, "
63
- "comparison, or multi-step reasoning across sources"
65
+ "comparison, or multi-step reasoning across sources; not a brief clarification "
66
+ "of the NPC's own wording"
64
67
  ),
65
68
  }
66
69
 
@@ -289,6 +289,7 @@ class LLMProxy:
289
289
  token_index = 0
290
290
  usage = {}
291
291
  line_count = 0 # Track SSE lines for debugging
292
+ final_finish_reason = None
292
293
 
293
294
  try:
294
295
  # Use a custom timeout for streaming:
@@ -355,6 +356,8 @@ class LLMProxy:
355
356
 
356
357
  # Check for finish_reason
357
358
  finish_reason = choices[0].get("finish_reason")
359
+ if finish_reason:
360
+ final_finish_reason = finish_reason
358
361
 
359
362
  # Extract usage if present (some servers send it with final chunk)
360
363
  if "usage" in chunk_data:
@@ -402,6 +405,7 @@ class LLMProxy:
402
405
  "usage": usage,
403
406
  "model": req.model,
404
407
  "token_count": token_index,
408
+ "finish_reason": final_finish_reason,
405
409
  }
406
410
 
407
411
  def _validate_messages(self, messages: list[dict]) -> list[dict]:
@@ -641,6 +645,7 @@ class LLMProxy:
641
645
  "thinking": thinking,
642
646
  "model": data.get("model", req.model),
643
647
  "usage": data.get("usage", {}),
648
+ "finish_reason": data["choices"][0].get("finish_reason"),
644
649
  }
645
650
 
646
651
  def _extract_thinking(self, content: str) -> tuple[str, str]:
@@ -119,14 +119,14 @@ class NLIService:
119
119
  logger.info(f"Loading NLI model: {self._model_path} (device={self._device})")
120
120
 
121
121
  if self._use_hhem:
122
+ # HHEMv2 custom class (built for transformers 4.39) lacks
123
+ # all_tied_weights_keys required by transformers 5.x.
124
+ # Patch the vendored modeling file before loading.
125
+ self._patch_hhem_model_files()
122
126
  self._model = AutoModelForSequenceClassification.from_pretrained(
123
127
  self._model_path,
124
128
  trust_remote_code=True,
125
129
  )
126
- # HHEMv2 custom class may lack all_tied_weights_keys (needed by
127
- # newer transformers for .to() / .eval()). Patch if missing.
128
- if not hasattr(self._model, "_tied_weights_keys"):
129
- self._model._tied_weights_keys = []
130
130
  self._model.to(self._device)
131
131
  self._model.eval()
132
132
 
@@ -328,6 +328,74 @@ class NLIService:
328
328
 
329
329
  return results
330
330
 
331
+ def _patch_hhem_model_files(self):
332
+ """Patch vendored HHEM files for transformers 5.x compatibility.
333
+
334
+ The HHEM model was built for transformers 4.39. Transformers 5.x:
335
+ 1. Requires `all_tied_weights_keys` during PreTrainedModel.__init__()
336
+ 2. Is stricter about model_type matching between config.json and config class
337
+ Since trust_remote_code loads the .py files directly, we patch before loading.
338
+ """
339
+ # Patch 1: modeling_hhem_v2.py — add missing class attributes
340
+ model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
341
+ if os.path.exists(model_file):
342
+ try:
343
+ content = open(model_file, "r").read()
344
+ if "all_tied_weights_keys" not in content:
345
+ patched = content.replace(
346
+ "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
347
+ " config_class = HHEMv2Config",
348
+ "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
349
+ " config_class = HHEMv2Config\n"
350
+ " # Compatibility: transformers 5.x requires these attributes\n"
351
+ " _tied_weights_keys = []\n"
352
+ " all_tied_weights_keys = {}",
353
+ )
354
+ if patched != content:
355
+ with open(model_file, "w") as f:
356
+ f.write(patched)
357
+ logger.info("Patched modeling_hhem_v2.py for transformers 5.x")
358
+ except Exception as e:
359
+ logger.warning(f"Could not patch modeling_hhem_v2.py: {e}")
360
+
361
+ # Patch 2: config.json — fix model_type mismatch
362
+ # config.json has "HHEMv2Config" but the config class defines model_type = "HHEMv2"
363
+ config_file = os.path.join(self._model_path, "config.json")
364
+ if os.path.exists(config_file):
365
+ try:
366
+ content = open(config_file, "r").read()
367
+ if '"model_type": "HHEMv2Config"' in content:
368
+ patched = content.replace(
369
+ '"model_type": "HHEMv2Config"',
370
+ '"model_type": "HHEMv2"',
371
+ )
372
+ with open(config_file, "w") as f:
373
+ f.write(patched)
374
+ logger.info("Patched config.json: model_type HHEMv2Config -> HHEMv2")
375
+ except Exception as e:
376
+ logger.warning(f"Could not patch config.json: {e}")
377
+
378
+ # Patch 3: configuration_hhem_v2.py — use local flan-t5-base instead of HuggingFace
379
+ # The HHEM model downloads google/flan-t5-base config+tokenizer at init.
380
+ # If we've bundled those files locally, rewrite the foundation path.
381
+ config_py = os.path.join(self._model_path, "configuration_hhem_v2.py")
382
+ local_foundation = os.path.join(self._model_path, "flan-t5-base")
383
+ if os.path.exists(config_py) and os.path.isdir(local_foundation):
384
+ try:
385
+ content = open(config_py, "r").read()
386
+ if '"google/flan-t5-base"' in content:
387
+ # Use absolute path to the bundled flan-t5-base files
388
+ abs_path = os.path.abspath(local_foundation)
389
+ patched = content.replace(
390
+ '"google/flan-t5-base"',
391
+ f'"{abs_path}"',
392
+ )
393
+ with open(config_py, "w") as f:
394
+ f.write(patched)
395
+ logger.info(f"Patched foundation to local: {abs_path}")
396
+ except Exception as e:
397
+ logger.warning(f"Could not patch configuration_hhem_v2.py: {e}")
398
+
331
399
  def _predict_hhem(self, pairs: List[Tuple[str, str]]) -> List[float]:
332
400
  """Run HHEM prediction and normalize output to list of floats."""
333
401
  import torch
@@ -539,6 +539,7 @@ class BackendTunnel:
539
539
  "workerId": self.worker_id,
540
540
  "success": "error" not in result or not result["error"],
541
541
  "content": result.get("content", ""),
542
+ "finishReason": result.get("finish_reason", ""),
542
543
  "tokensUsed": result.get("usage", {}).get("total_tokens", 0),
543
544
  "generationMs": generation_ms,
544
545
  "errorMessage": result.get("error", ""),
@@ -724,6 +725,7 @@ class BackendTunnel:
724
725
  usage = chunk.get("usage", {})
725
726
  # Use the processed content from the done chunk
726
727
  final_content = chunk.get("content", "".join(content_parts))
728
+ finish_reason = chunk.get("finish_reason", "")
727
729
 
728
730
  latency_ms = int((time.time() - start_time) * 1000)
729
731
 
@@ -742,6 +744,7 @@ class BackendTunnel:
742
744
  "success": True,
743
745
  "content": final_content,
744
746
  "thinking": thinking,
747
+ "finishReason": finish_reason,
745
748
  "tokenCount": token_count,
746
749
  "latencyMs": latency_ms,
747
750
  },
File without changes
File without changes
File without changes