loreguard-cli 0.14.6__tar.gz → 0.15.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/PKG-INFO +1 -1
  2. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/pyproject.toml +1 -1
  3. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/llm.py +5 -0
  4. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/nli.py +38 -4
  5. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tunnel.py +3 -0
  6. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
  7. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.env.example +0 -0
  8. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.github/workflows/release.yml +0 -0
  9. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.gitignore +0 -0
  10. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/LICENSE +0 -0
  11. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/README.md +0 -0
  12. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/THIRD_PARTY_NOTICES.md +0 -0
  13. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/loreguard.spec +0 -0
  14. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/loreguard_entry.py +0 -0
  15. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/scripts/build.py +0 -0
  16. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/API.md +0 -0
  17. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/csharp/LoreguardSDK.cs +0 -0
  18. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/gdscript/LoreguardSDK.gd +0 -0
  19. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/javascript/loreguard-sdk.js +0 -0
  20. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/python/loreguard_sdk.py +0 -0
  21. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/__init__.py +0 -0
  22. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/__main__.py +0 -0
  23. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/chunk_detector.py +0 -0
  24. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/cli.py +0 -0
  25. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/config.py +0 -0
  26. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/dialogue_act_classifier.py +0 -0
  27. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/hf_discovery.py +0 -0
  28. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/http_server.py +0 -0
  29. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/intent_classifier.py +0 -0
  30. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/llama_server.py +0 -0
  31. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/main.py +0 -0
  32. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/models_registry.py +0 -0
  33. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/npc_chat.py +0 -0
  34. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/runtime.py +0 -0
  35. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/steam.py +0 -0
  36. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/term_ui.py +0 -0
  37. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/__init__.py +0 -0
  38. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/app.py +0 -0
  39. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/__init__.py +0 -0
  40. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/auth_menu.py +0 -0
  41. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/npc_chat.py +0 -0
  42. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/token_input.py +0 -0
  43. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/unified_palette.py +0 -0
  44. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/__init__.py +0 -0
  45. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/auth.py +0 -0
  46. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/main.py +0 -0
  47. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/model_select.py +0 -0
  48. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/nli_setup.py +0 -0
  49. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/running.py +0 -0
  50. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/styles.py +0 -0
  51. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/__init__.py +0 -0
  52. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/banner.py +0 -0
  53. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/footer.py +0 -0
  54. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/hardware_info.py +0 -0
  55. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/npc_chat.py +0 -0
  56. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/server_monitor.py +0 -0
  57. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/status_panel.py +0 -0
  58. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/wizard.py +0 -0
  59. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/templates/llama31-no-tools.jinja +0 -0
  60. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_intent_classifier.py +0 -0
  61. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_nli_hhem.py +0 -0
  62. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_websocket_timeout.py +0 -0
  63. {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loreguard-cli
3
- Version: 0.14.6
3
+ Version: 0.15.1
4
4
  Summary: Local inference client for Loreguard NPCs
5
5
  Project-URL: Homepage, https://loreguard.com
6
6
  Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "loreguard-cli"
7
- version = "0.14.6"
7
+ version = "0.15.1"
8
8
  description = "Local inference client for Loreguard NPCs"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -289,6 +289,7 @@ class LLMProxy:
289
289
  token_index = 0
290
290
  usage = {}
291
291
  line_count = 0 # Track SSE lines for debugging
292
+ final_finish_reason = None
292
293
 
293
294
  try:
294
295
  # Use a custom timeout for streaming:
@@ -355,6 +356,8 @@ class LLMProxy:
355
356
 
356
357
  # Check for finish_reason
357
358
  finish_reason = choices[0].get("finish_reason")
359
+ if finish_reason:
360
+ final_finish_reason = finish_reason
358
361
 
359
362
  # Extract usage if present (some servers send it with final chunk)
360
363
  if "usage" in chunk_data:
@@ -402,6 +405,7 @@ class LLMProxy:
402
405
  "usage": usage,
403
406
  "model": req.model,
404
407
  "token_count": token_index,
408
+ "finish_reason": final_finish_reason,
405
409
  }
406
410
 
407
411
  def _validate_messages(self, messages: list[dict]) -> list[dict]:
@@ -641,6 +645,7 @@ class LLMProxy:
641
645
  "thinking": thinking,
642
646
  "model": data.get("model", req.model),
643
647
  "usage": data.get("usage", {}),
648
+ "finish_reason": data["choices"][0].get("finish_reason"),
644
649
  }
645
650
 
646
651
  def _extract_thinking(self, content: str) -> tuple[str, str]:
@@ -119,14 +119,14 @@ class NLIService:
119
119
  logger.info(f"Loading NLI model: {self._model_path} (device={self._device})")
120
120
 
121
121
  if self._use_hhem:
122
+ # HHEMv2 custom class (built for transformers 4.39) lacks
123
+ # all_tied_weights_keys required by transformers 5.x.
124
+ # Patch the vendored modeling file before loading.
125
+ self._patch_hhem_model_file()
122
126
  self._model = AutoModelForSequenceClassification.from_pretrained(
123
127
  self._model_path,
124
128
  trust_remote_code=True,
125
129
  )
126
- # HHEMv2 custom class may lack all_tied_weights_keys (needed by
127
- # newer transformers for .to() / .eval()). Patch if missing.
128
- if not hasattr(self._model, "_tied_weights_keys"):
129
- self._model._tied_weights_keys = []
130
130
  self._model.to(self._device)
131
131
  self._model.eval()
132
132
 
@@ -328,6 +328,40 @@ class NLIService:
328
328
 
329
329
  return results
330
330
 
331
+ def _patch_hhem_model_file(self):
332
+ """Patch vendored modeling_hhem_v2.py for transformers 5.x compatibility.
333
+
334
+ The HHEM model was built for transformers 4.39. Transformers 5.x requires
335
+ `all_tied_weights_keys` during PreTrainedModel.__init__(), which the
336
+ custom class doesn't define. Since trust_remote_code loads the .py file
337
+ directly, we patch the file before from_pretrained reads it.
338
+ """
339
+ model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
340
+ if not os.path.exists(model_file):
341
+ return
342
+
343
+ try:
344
+ content = open(model_file, "r").read()
345
+ if "all_tied_weights_keys" in content:
346
+ return # Already patched
347
+
348
+ # Add the missing attribute as a class variable
349
+ patched = content.replace(
350
+ "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
351
+ " config_class = HHEMv2Config",
352
+ "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
353
+ " config_class = HHEMv2Config\n"
354
+ " # Compatibility: transformers 5.x requires these attributes\n"
355
+ " _tied_weights_keys = []\n"
356
+ " all_tied_weights_keys = {}",
357
+ )
358
+ if patched != content:
359
+ with open(model_file, "w") as f:
360
+ f.write(patched)
361
+ logger.info("Patched modeling_hhem_v2.py for transformers 5.x compatibility")
362
+ except Exception as e:
363
+ logger.warning(f"Could not patch HHEM model file: {e}")
364
+
331
365
  def _predict_hhem(self, pairs: List[Tuple[str, str]]) -> List[float]:
332
366
  """Run HHEM prediction and normalize output to list of floats."""
333
367
  import torch
@@ -539,6 +539,7 @@ class BackendTunnel:
539
539
  "workerId": self.worker_id,
540
540
  "success": "error" not in result or not result["error"],
541
541
  "content": result.get("content", ""),
542
+ "finishReason": result.get("finish_reason", ""),
542
543
  "tokensUsed": result.get("usage", {}).get("total_tokens", 0),
543
544
  "generationMs": generation_ms,
544
545
  "errorMessage": result.get("error", ""),
@@ -724,6 +725,7 @@ class BackendTunnel:
724
725
  usage = chunk.get("usage", {})
725
726
  # Use the processed content from the done chunk
726
727
  final_content = chunk.get("content", "".join(content_parts))
728
+ finish_reason = chunk.get("finish_reason", "")
727
729
 
728
730
  latency_ms = int((time.time() - start_time) * 1000)
729
731
 
@@ -742,6 +744,7 @@ class BackendTunnel:
742
744
  "success": True,
743
745
  "content": final_content,
744
746
  "thinking": thinking,
747
+ "finishReason": finish_reason,
745
748
  "tokenCount": token_count,
746
749
  "latencyMs": latency_ms,
747
750
  },
File without changes
File without changes
File without changes