loreguard-cli 0.14.6__tar.gz → 0.15.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/PKG-INFO +1 -1
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/pyproject.toml +1 -1
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/llm.py +5 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/nli.py +38 -4
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tunnel.py +3 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.env.example +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.github/workflows/release.yml +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.gitignore +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/LICENSE +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/README.md +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/THIRD_PARTY_NOTICES.md +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/loreguard.spec +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/loreguard_entry.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/scripts/build.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/API.md +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/csharp/LoreguardSDK.cs +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/gdscript/LoreguardSDK.gd +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/javascript/loreguard-sdk.js +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/sdk/python/loreguard_sdk.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/__init__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/__main__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/chunk_detector.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/cli.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/config.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/dialogue_act_classifier.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/hf_discovery.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/http_server.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/intent_classifier.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/llama_server.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/main.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/models_registry.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/npc_chat.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/runtime.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/steam.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/term_ui.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/__init__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/app.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/__init__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/auth_menu.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/npc_chat.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/token_input.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/modals/unified_palette.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/__init__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/auth.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/main.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/model_select.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/nli_setup.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/screens/running.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/styles.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/__init__.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/banner.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/footer.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/hardware_info.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/npc_chat.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/server_monitor.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tui/widgets/status_panel.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/wizard.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/templates/llama31-no-tools.jinja +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_intent_classifier.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_nli_hhem.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/tests/test_websocket_timeout.py +0 -0
- {loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/uv.lock +0 -0
|
@@ -289,6 +289,7 @@ class LLMProxy:
|
|
|
289
289
|
token_index = 0
|
|
290
290
|
usage = {}
|
|
291
291
|
line_count = 0 # Track SSE lines for debugging
|
|
292
|
+
final_finish_reason = None
|
|
292
293
|
|
|
293
294
|
try:
|
|
294
295
|
# Use a custom timeout for streaming:
|
|
@@ -355,6 +356,8 @@ class LLMProxy:
|
|
|
355
356
|
|
|
356
357
|
# Check for finish_reason
|
|
357
358
|
finish_reason = choices[0].get("finish_reason")
|
|
359
|
+
if finish_reason:
|
|
360
|
+
final_finish_reason = finish_reason
|
|
358
361
|
|
|
359
362
|
# Extract usage if present (some servers send it with final chunk)
|
|
360
363
|
if "usage" in chunk_data:
|
|
@@ -402,6 +405,7 @@ class LLMProxy:
|
|
|
402
405
|
"usage": usage,
|
|
403
406
|
"model": req.model,
|
|
404
407
|
"token_count": token_index,
|
|
408
|
+
"finish_reason": final_finish_reason,
|
|
405
409
|
}
|
|
406
410
|
|
|
407
411
|
def _validate_messages(self, messages: list[dict]) -> list[dict]:
|
|
@@ -641,6 +645,7 @@ class LLMProxy:
|
|
|
641
645
|
"thinking": thinking,
|
|
642
646
|
"model": data.get("model", req.model),
|
|
643
647
|
"usage": data.get("usage", {}),
|
|
648
|
+
"finish_reason": data["choices"][0].get("finish_reason"),
|
|
644
649
|
}
|
|
645
650
|
|
|
646
651
|
def _extract_thinking(self, content: str) -> tuple[str, str]:
|
|
@@ -119,14 +119,14 @@ class NLIService:
|
|
|
119
119
|
logger.info(f"Loading NLI model: {self._model_path} (device={self._device})")
|
|
120
120
|
|
|
121
121
|
if self._use_hhem:
|
|
122
|
+
# HHEMv2 custom class (built for transformers 4.39) lacks
|
|
123
|
+
# all_tied_weights_keys required by transformers 5.x.
|
|
124
|
+
# Patch the vendored modeling file before loading.
|
|
125
|
+
self._patch_hhem_model_file()
|
|
122
126
|
self._model = AutoModelForSequenceClassification.from_pretrained(
|
|
123
127
|
self._model_path,
|
|
124
128
|
trust_remote_code=True,
|
|
125
129
|
)
|
|
126
|
-
# HHEMv2 custom class may lack all_tied_weights_keys (needed by
|
|
127
|
-
# newer transformers for .to() / .eval()). Patch if missing.
|
|
128
|
-
if not hasattr(self._model, "_tied_weights_keys"):
|
|
129
|
-
self._model._tied_weights_keys = []
|
|
130
130
|
self._model.to(self._device)
|
|
131
131
|
self._model.eval()
|
|
132
132
|
|
|
@@ -328,6 +328,40 @@ class NLIService:
|
|
|
328
328
|
|
|
329
329
|
return results
|
|
330
330
|
|
|
331
|
+
def _patch_hhem_model_file(self):
|
|
332
|
+
"""Patch vendored modeling_hhem_v2.py for transformers 5.x compatibility.
|
|
333
|
+
|
|
334
|
+
The HHEM model was built for transformers 4.39. Transformers 5.x requires
|
|
335
|
+
`all_tied_weights_keys` during PreTrainedModel.__init__(), which the
|
|
336
|
+
custom class doesn't define. Since trust_remote_code loads the .py file
|
|
337
|
+
directly, we patch the file before from_pretrained reads it.
|
|
338
|
+
"""
|
|
339
|
+
model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
|
|
340
|
+
if not os.path.exists(model_file):
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
content = open(model_file, "r").read()
|
|
345
|
+
if "all_tied_weights_keys" in content:
|
|
346
|
+
return # Already patched
|
|
347
|
+
|
|
348
|
+
# Add the missing attribute as a class variable
|
|
349
|
+
patched = content.replace(
|
|
350
|
+
"class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
|
|
351
|
+
" config_class = HHEMv2Config",
|
|
352
|
+
"class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
|
|
353
|
+
" config_class = HHEMv2Config\n"
|
|
354
|
+
" # Compatibility: transformers 5.x requires these attributes\n"
|
|
355
|
+
" _tied_weights_keys = []\n"
|
|
356
|
+
" all_tied_weights_keys = {}",
|
|
357
|
+
)
|
|
358
|
+
if patched != content:
|
|
359
|
+
with open(model_file, "w") as f:
|
|
360
|
+
f.write(patched)
|
|
361
|
+
logger.info("Patched modeling_hhem_v2.py for transformers 5.x compatibility")
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.warning(f"Could not patch HHEM model file: {e}")
|
|
364
|
+
|
|
331
365
|
def _predict_hhem(self, pairs: List[Tuple[str, str]]) -> List[float]:
|
|
332
366
|
"""Run HHEM prediction and normalize output to list of floats."""
|
|
333
367
|
import torch
|
|
@@ -539,6 +539,7 @@ class BackendTunnel:
|
|
|
539
539
|
"workerId": self.worker_id,
|
|
540
540
|
"success": "error" not in result or not result["error"],
|
|
541
541
|
"content": result.get("content", ""),
|
|
542
|
+
"finishReason": result.get("finish_reason", ""),
|
|
542
543
|
"tokensUsed": result.get("usage", {}).get("total_tokens", 0),
|
|
543
544
|
"generationMs": generation_ms,
|
|
544
545
|
"errorMessage": result.get("error", ""),
|
|
@@ -724,6 +725,7 @@ class BackendTunnel:
|
|
|
724
725
|
usage = chunk.get("usage", {})
|
|
725
726
|
# Use the processed content from the done chunk
|
|
726
727
|
final_content = chunk.get("content", "".join(content_parts))
|
|
728
|
+
finish_reason = chunk.get("finish_reason", "")
|
|
727
729
|
|
|
728
730
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
729
731
|
|
|
@@ -742,6 +744,7 @@ class BackendTunnel:
|
|
|
742
744
|
"success": True,
|
|
743
745
|
"content": final_content,
|
|
744
746
|
"thinking": thinking,
|
|
747
|
+
"finishReason": finish_reason,
|
|
745
748
|
"tokenCount": token_count,
|
|
746
749
|
"latencyMs": latency_ms,
|
|
747
750
|
},
|
{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|