PyPI - loreguard-cli - Versions diffs - 0.14.6__tar.gz → 0.15.1__tar.gz - Mend

loreguard-cli 0.14.6tar.gz → 0.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.14.6
+Version: 0.15.1
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme

{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.14.6"
+version = "0.15.1"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"

{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/llm.py RENAMED Viewed

@@ -289,6 +289,7 @@ class LLMProxy:
         token_index = 0
         usage = {}
         line_count = 0  # Track SSE lines for debugging
+        final_finish_reason = None
         try:
             # Use a custom timeout for streaming:
@@ -355,6 +356,8 @@ class LLMProxy:
                     # Check for finish_reason
                     finish_reason = choices[0].get("finish_reason")
+                    if finish_reason:
+                        final_finish_reason = finish_reason
                     # Extract usage if present (some servers send it with final chunk)
                     if "usage" in chunk_data:
@@ -402,6 +405,7 @@ class LLMProxy:
             "usage": usage,
             "model": req.model,
             "token_count": token_index,
+            "finish_reason": final_finish_reason,
         }
     def _validate_messages(self, messages: list[dict]) -> list[dict]:
@@ -641,6 +645,7 @@ class LLMProxy:
             "thinking": thinking,
             "model": data.get("model", req.model),
             "usage": data.get("usage", {}),
+            "finish_reason": data["choices"][0].get("finish_reason"),
         }
     def _extract_thinking(self, content: str) -> tuple[str, str]:

{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/nli.py RENAMED Viewed

@@ -119,14 +119,14 @@ class NLIService:
             logger.info(f"Loading NLI model: {self._model_path} (device={self._device})")
             if self._use_hhem:
+                # HHEMv2 custom class (built for transformers 4.39) lacks
+                # all_tied_weights_keys required by transformers 5.x.
+                # Patch the vendored modeling file before loading.
+                self._patch_hhem_model_file()
                 self._model = AutoModelForSequenceClassification.from_pretrained(
                     self._model_path,
                     trust_remote_code=True,
                 )
-                # HHEMv2 custom class may lack all_tied_weights_keys (needed by
-                # newer transformers for .to() / .eval()). Patch if missing.
-                if not hasattr(self._model, "_tied_weights_keys"):
-                    self._model._tied_weights_keys = []
                 self._model.to(self._device)
                 self._model.eval()
@@ -328,6 +328,40 @@ class NLIService:
             return results
+    def _patch_hhem_model_file(self):
+        """Patch vendored modeling_hhem_v2.py for transformers 5.x compatibility.
+        The HHEM model was built for transformers 4.39. Transformers 5.x requires
+        `all_tied_weights_keys` during PreTrainedModel.__init__(), which the
+        custom class doesn't define. Since trust_remote_code loads the .py file
+        directly, we patch the file before from_pretrained reads it.
+        """
+        model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
+        if not os.path.exists(model_file):
+            return
+        try:
+            content = open(model_file, "r").read()
+            if "all_tied_weights_keys" in content:
+                return  # Already patched
+            # Add the missing attribute as a class variable
+            patched = content.replace(
+                "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
+                "    config_class = HHEMv2Config",
+                "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
+                "    config_class = HHEMv2Config\n"
+                "    # Compatibility: transformers 5.x requires these attributes\n"
+                "    _tied_weights_keys = []\n"
+                "    all_tied_weights_keys = {}",
+            )
+            if patched != content:
+                with open(model_file, "w") as f:
+                    f.write(patched)
+                logger.info("Patched modeling_hhem_v2.py for transformers 5.x compatibility")
+        except Exception as e:
+            logger.warning(f"Could not patch HHEM model file: {e}")
     def _predict_hhem(self, pairs: List[Tuple[str, str]]) -> List[float]:
         """Run HHEM prediction and normalize output to list of floats."""
         import torch

{loreguard_cli-0.14.6 → loreguard_cli-0.15.1}/src/tunnel.py RENAMED Viewed

@@ -539,6 +539,7 @@ class BackendTunnel:
                     "workerId": self.worker_id,
                     "success": "error" not in result or not result["error"],
                     "content": result.get("content", ""),
+                    "finishReason": result.get("finish_reason", ""),
                     "tokensUsed": result.get("usage", {}).get("total_tokens", 0),
                     "generationMs": generation_ms,
                     "errorMessage": result.get("error", ""),
@@ -724,6 +725,7 @@ class BackendTunnel:
                     usage = chunk.get("usage", {})
                     # Use the processed content from the done chunk
                     final_content = chunk.get("content", "".join(content_parts))
+                    finish_reason = chunk.get("finish_reason", "")
                     latency_ms = int((time.time() - start_time) * 1000)
@@ -742,6 +744,7 @@ class BackendTunnel:
                             "success": True,
                             "content": final_content,
                             "thinking": thinking,
+                            "finishReason": finish_reason,
                             "tokenCount": token_count,
                             "latencyMs": latency_ms,
                         },