PyPI - loreguard-cli - Versions diffs - 0.14.6__tar.gz → 0.15.2__tar.gz - Mend

loreguard-cli 0.14.6tar.gz → 0.15.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.14.6
+Version: 0.15.2
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.14.6"
+version = "0.15.2"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/intent_classifier.py RENAMED Viewed

@@ -52,7 +52,9 @@ INTENT_LABEL_DESCRIPTIONS = {
     ),
     IntentLabel.WORKING_MEMORY: (
         "a question about the NPC's own current state, feelings, recent "
-        "experiences, or personal memory that can be answered from working memory"
+        "experiences, or personal memory that can be answered from working memory; "
+        "this also includes clarification questions about what the NPC just said "
+        "(examples: what do you mean, what do you mean by that, clarify that)"
     ),
     IntentLabel.LIGHT_RETRIEVAL: (
         "a request for one specific factual detail (number, fee, date, name, "
@@ -60,7 +62,8 @@ INTENT_LABEL_DESCRIPTIONS = {
     ),
     IntentLabel.FULL_RETRIEVAL: (
         "a complex request requiring multiple facts, synthesis, planning, "
-        "comparison, or multi-step reasoning across sources"
+        "comparison, or multi-step reasoning across sources; not a brief clarification "
+        "of the NPC's own wording"
     ),
 }

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/llm.py RENAMED Viewed

@@ -289,6 +289,7 @@ class LLMProxy:
         token_index = 0
         usage = {}
         line_count = 0  # Track SSE lines for debugging
+        final_finish_reason = None
         try:
             # Use a custom timeout for streaming:
@@ -355,6 +356,8 @@ class LLMProxy:
                     # Check for finish_reason
                     finish_reason = choices[0].get("finish_reason")
+                    if finish_reason:
+                        final_finish_reason = finish_reason
                     # Extract usage if present (some servers send it with final chunk)
                     if "usage" in chunk_data:
@@ -402,6 +405,7 @@ class LLMProxy:
             "usage": usage,
             "model": req.model,
             "token_count": token_index,
+            "finish_reason": final_finish_reason,
         }
     def _validate_messages(self, messages: list[dict]) -> list[dict]:
@@ -641,6 +645,7 @@ class LLMProxy:
             "thinking": thinking,
             "model": data.get("model", req.model),
             "usage": data.get("usage", {}),
+            "finish_reason": data["choices"][0].get("finish_reason"),
         }
     def _extract_thinking(self, content: str) -> tuple[str, str]:

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/nli.py RENAMED Viewed

@@ -119,14 +119,14 @@ class NLIService:
             logger.info(f"Loading NLI model: {self._model_path} (device={self._device})")
             if self._use_hhem:
+                # HHEMv2 custom class (built for transformers 4.39) lacks
+                # all_tied_weights_keys required by transformers 5.x.
+                # Patch the vendored modeling file before loading.
+                self._patch_hhem_model_files()
                 self._model = AutoModelForSequenceClassification.from_pretrained(
                     self._model_path,
                     trust_remote_code=True,
                 )
-                # HHEMv2 custom class may lack all_tied_weights_keys (needed by
-                # newer transformers for .to() / .eval()). Patch if missing.
-                if not hasattr(self._model, "_tied_weights_keys"):
-                    self._model._tied_weights_keys = []
                 self._model.to(self._device)
                 self._model.eval()
@@ -328,6 +328,74 @@ class NLIService:
             return results
+    def _patch_hhem_model_files(self):
+        """Patch vendored HHEM files for transformers 5.x compatibility.
+        The HHEM model was built for transformers 4.39. Transformers 5.x:
+        1. Requires `all_tied_weights_keys` during PreTrainedModel.__init__()
+        2. Is stricter about model_type matching between config.json and config class
+        Since trust_remote_code loads the .py files directly, we patch before loading.
+        """
+        # Patch 1: modeling_hhem_v2.py — add missing class attributes
+        model_file = os.path.join(self._model_path, "modeling_hhem_v2.py")
+        if os.path.exists(model_file):
+            try:
+                content = open(model_file, "r").read()
+                if "all_tied_weights_keys" not in content:
+                    patched = content.replace(
+                        "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
+                        "    config_class = HHEMv2Config",
+                        "class HHEMv2ForSequenceClassification(PreTrainedModel):\n"
+                        "    config_class = HHEMv2Config\n"
+                        "    # Compatibility: transformers 5.x requires these attributes\n"
+                        "    _tied_weights_keys = []\n"
+                        "    all_tied_weights_keys = {}",
+                    )
+                    if patched != content:
+                        with open(model_file, "w") as f:
+                            f.write(patched)
+                        logger.info("Patched modeling_hhem_v2.py for transformers 5.x")
+            except Exception as e:
+                logger.warning(f"Could not patch modeling_hhem_v2.py: {e}")
+        # Patch 2: config.json — fix model_type mismatch
+        # config.json has "HHEMv2Config" but the config class defines model_type = "HHEMv2"
+        config_file = os.path.join(self._model_path, "config.json")
+        if os.path.exists(config_file):
+            try:
+                content = open(config_file, "r").read()
+                if '"model_type": "HHEMv2Config"' in content:
+                    patched = content.replace(
+                        '"model_type": "HHEMv2Config"',
+                        '"model_type": "HHEMv2"',
+                    )
+                    with open(config_file, "w") as f:
+                        f.write(patched)
+                    logger.info("Patched config.json: model_type HHEMv2Config -> HHEMv2")
+            except Exception as e:
+                logger.warning(f"Could not patch config.json: {e}")
+        # Patch 3: configuration_hhem_v2.py — use local flan-t5-base instead of HuggingFace
+        # The HHEM model downloads google/flan-t5-base config+tokenizer at init.
+        # If we've bundled those files locally, rewrite the foundation path.
+        config_py = os.path.join(self._model_path, "configuration_hhem_v2.py")
+        local_foundation = os.path.join(self._model_path, "flan-t5-base")
+        if os.path.exists(config_py) and os.path.isdir(local_foundation):
+            try:
+                content = open(config_py, "r").read()
+                if '"google/flan-t5-base"' in content:
+                    # Use absolute path to the bundled flan-t5-base files
+                    abs_path = os.path.abspath(local_foundation)
+                    patched = content.replace(
+                        '"google/flan-t5-base"',
+                        f'"{abs_path}"',
+                    )
+                    with open(config_py, "w") as f:
+                        f.write(patched)
+                    logger.info(f"Patched foundation to local: {abs_path}")
+            except Exception as e:
+                logger.warning(f"Could not patch configuration_hhem_v2.py: {e}")
     def _predict_hhem(self, pairs: List[Tuple[str, str]]) -> List[float]:
         """Run HHEM prediction and normalize output to list of floats."""
         import torch

{loreguard_cli-0.14.6 → loreguard_cli-0.15.2}/src/tunnel.py RENAMED Viewed

@@ -539,6 +539,7 @@ class BackendTunnel:
                     "workerId": self.worker_id,
                     "success": "error" not in result or not result["error"],
                     "content": result.get("content", ""),
+                    "finishReason": result.get("finish_reason", ""),
                     "tokensUsed": result.get("usage", {}).get("total_tokens", 0),
                     "generationMs": generation_ms,
                     "errorMessage": result.get("error", ""),
@@ -724,6 +725,7 @@ class BackendTunnel:
                     usage = chunk.get("usage", {})
                     # Use the processed content from the done chunk
                     final_content = chunk.get("content", "".join(content_parts))
+                    finish_reason = chunk.get("finish_reason", "")
                     latency_ms = int((time.time() - start_time) * 1000)
@@ -742,6 +744,7 @@ class BackendTunnel:
                             "success": True,
                             "content": final_content,
                             "thinking": thinking,
+                            "finishReason": finish_reason,
                             "tokenCount": token_count,
                             "latencyMs": latency_ms,
                         },