PyPI - loreguard-cli - Versions diffs - 0.14.1__tar.gz → 0.14.5__tar.gz - Mend

loreguard-cli 0.14.1tar.gz → 0.14.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.14.1
+Version: 0.14.5
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/loreguard_entry.py RENAMED Viewed

@@ -3,6 +3,9 @@
 Imports src as a proper package so relative imports inside src/__main__.py work.
 """
+import multiprocessing
+multiprocessing.freeze_support()  # Required for PyInstaller on macOS/Windows
 from src.__main__ import main
 main()

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.14.1"
+version = "0.14.5"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/src/intent_classifier.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Intent Classification service for adaptive retrieval (ADR-0010).
+ """Intent Classification service for adaptive retrieval (ADR-0010).
 This module provides zero-shot intent classification for the NPC dialogue pipeline.
 It uses DeBERTa-v3-large-zeroshot to classify user messages into retrieval strategy categories:
@@ -40,16 +40,28 @@ class IntentResult:
 # DeBERTa-v3-large is state-of-the-art for zero-shot classification
 DEFAULT_INTENT_MODEL = "MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0"
-# Intent hypothesis templates for zero-shot classification
-# Each intent maps to a hypothesis that DeBERTa will evaluate
-# NOTE: Hypotheses must be specific to avoid misclassification of mixed-intent messages
-# (e.g., "hey, how are you? what's the ISP fee?" should match LIGHT_RETRIEVAL, not WORKING_MEMORY)
-# TODO: Move hypotheses to backend for centralized control (see loreguard-engine issue)
-INTENT_HYPOTHESES = {
-    IntentLabel.NO_RETRIEVAL: "This is a greeting, farewell, or a vague question about availability without asking for any specific information.",
-    IntentLabel.WORKING_MEMORY: "This asks about the person's current life, recent experiences, what they've been up to, or how things are going for them.",
-    IntentLabel.LIGHT_RETRIEVAL: "This asks for a specific fact, number, price, fee, date, location, or procedure that requires looking up information.",
-    IntentLabel.FULL_RETRIEVAL: "This is a complex question requiring analysis of multiple topics or understanding relationships between different pieces of information.",
+# Intent label descriptions for zero-shot classification.
+# Keep these mutually exclusive and concrete to reduce confusion between:
+# - social greetings vs factual requests
+# - working-memory questions vs retrieval questions
+INTENT_LABEL_DESCRIPTIONS = {
+    IntentLabel.NO_RETRIEVAL: (
+        "a greeting, acknowledgement, farewell, or social small talk "
+        "(examples: hi, hello, hey, yo, thanks, okay, bye) without asking "
+        "for specific factual information"
+    ),
+    IntentLabel.WORKING_MEMORY: (
+        "a question about the NPC's own current state, feelings, recent "
+        "experiences, or personal memory that can be answered from working memory"
+    ),
+    IntentLabel.LIGHT_RETRIEVAL: (
+        "a request for one specific factual detail (number, fee, date, name, "
+        "location, status, or single-file fact) that needs light retrieval"
+    ),
+    IntentLabel.FULL_RETRIEVAL: (
+        "a complex request requiring multiple facts, synthesis, planning, "
+        "comparison, or multi-step reasoning across sources"
+    ),
 }
 # Promise detection hypothesis for follow-up triggers (ADR-0020)
@@ -156,32 +168,35 @@ class IntentClassifier:
         start_time = time.time()
-        # Get candidate labels and hypotheses
-        labels = list(INTENT_HYPOTHESES.keys())
-        hypotheses = list(INTENT_HYPOTHESES.values())
+        # Candidate labels are descriptive intent classes.
+        candidate_labels = list(INTENT_LABEL_DESCRIPTIONS.values())
-        # Run zero-shot classification
-        # The pipeline will evaluate each hypothesis against the query
+        # Run zero-shot classification.
+        # Using a consistent hypothesis template tends to be more stable than
+        # passing full natural-language hypotheses as labels.
         result = self._classifier(
             query,
-            candidate_labels=hypotheses,
-            hypothesis_template="{}",  # Use hypotheses directly
+            candidate_labels=candidate_labels,
+            hypothesis_template="This user message is {}.",
             multi_label=False,
         )
         latency_ms = int((time.time() - start_time) * 1000)
-        # Map the winning hypothesis back to intent label
-        winning_hypothesis = result["labels"][0]
+        # Map the winning description back to intent label
+        winning_description = result["labels"][0]
         confidence = result["scores"][0]
-        # Find the intent that corresponds to the winning hypothesis
+        # Find the intent that corresponds to the winning description
         intent = IntentLabel.FULL_RETRIEVAL  # Default
-        for label, hypothesis in INTENT_HYPOTHESES.items():
-            if hypothesis == winning_hypothesis:
+        for label, description in INTENT_LABEL_DESCRIPTIONS.items():
+            if description == winning_description:
                 intent = label
                 break
+        # Log full score distribution for tuning/debugging.
+        label_score_pairs = list(zip(result["labels"], result["scores"]))
+        logger.debug("Intent score distribution: %s", label_score_pairs)
         logger.info(f"Intent classification: {intent.value} (confidence={confidence:.2f}, latency={latency_ms}ms)")
         return IntentResult(

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/src/nli.py RENAMED Viewed

@@ -123,6 +123,10 @@ class NLIService:
                     self._model_path,
                     trust_remote_code=True,
                 )
+                # HHEMv2 custom class may lack all_tied_weights_keys (needed by
+                # newer transformers for .to() / .eval()). Patch if missing.
+                if not hasattr(self._model, "_tied_weights_keys"):
+                    self._model._tied_weights_keys = []
                 self._model.to(self._device)
                 self._model.eval()

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/src/tui/screens/main.py RENAMED Viewed

@@ -546,11 +546,17 @@ class MainScreen(Screen):
                 intent_classifier = None
             # Load dialogue act classifier (filler selection) - run in thread pool
-            self._update_status("Loading dialogue act model...", log=False)
-            self._log("Loading dialogue act classifier...")
             dialogue_act_classifier = None
+            enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
+            if not enable_dialogue_act:
+                self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED")
+            else:
+                self._update_status("Loading dialogue act model...", log=False)
+                self._log("Loading dialogue act classifier...")
             try:
-                if is_dialogue_act_model_available():
+                if not enable_dialogue_act:
+                    pass  # Skip loading
+                elif is_dialogue_act_model_available():
                     dialogue_act_classifier = DialogueActClassifier()
                     loop = asyncio.get_event_loop()
                     with concurrent.futures.ThreadPoolExecutor() as pool:

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/src/tui/screens/running.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import asyncio
 import concurrent.futures
+import os
 from typing import TYPE_CHECKING, Optional
 from textual.app import ComposeResult
@@ -247,10 +248,17 @@ class RunningScreen(Screen):
                 # Load Dialogue Act Classifier
                 dialogue_act_classifier = None
-                self._update_status("dialogue_act", "Dialogue Act", "Loading...", "info")
-                self._log("Loading Dialogue Act classifier...", "info")
+                enable_dialogue_act = os.getenv("LOREGUARD_DIALOGUE_ACT_ENABLED", "true").lower() == "true"
+                if not enable_dialogue_act:
+                    self._update_status("dialogue_act", "Dialogue Act", "Disabled", "info")
+                    self._log("Dialogue act classifier disabled via LOREGUARD_DIALOGUE_ACT_ENABLED", "info")
+                else:
+                    self._update_status("dialogue_act", "Dialogue Act", "Loading...", "info")
+                    self._log("Loading Dialogue Act classifier...", "info")
                 try:
-                    if is_dialogue_act_model_available():
+                    if not enable_dialogue_act:
+                        pass  # Skip loading
+                    elif is_dialogue_act_model_available():
                         dialogue_act_classifier = DialogueActClassifier()
                         loop = asyncio.get_event_loop()
                         with concurrent.futures.ThreadPoolExecutor() as pool:

loreguard_cli-0.14.5/tests/test_intent_classifier.py ADDED Viewed

@@ -0,0 +1,48 @@
+from src.intent_classifier import (
+    INTENT_LABEL_DESCRIPTIONS,
+    IntentClassifier,
+    IntentLabel,
+)
+class _FakeClassifier:
+    def __init__(self, labels, scores):
+        self.labels = labels
+        self.scores = scores
+        self.calls = []
+    def __call__(self, query, **kwargs):
+        self.calls.append((query, kwargs))
+        return {"labels": self.labels, "scores": self.scores}
+def test_classify_uses_descriptive_labels_and_maps_winner():
+    winner = INTENT_LABEL_DESCRIPTIONS[IntentLabel.NO_RETRIEVAL]
+    second = INTENT_LABEL_DESCRIPTIONS[IntentLabel.LIGHT_RETRIEVAL]
+    fake = _FakeClassifier(labels=[winner, second], scores=[0.88, 0.12])
+    classifier = IntentClassifier(model_path="dummy-model")
+    classifier._classifier = fake
+    result = classifier.classify("hi")
+    assert result.intent == IntentLabel.NO_RETRIEVAL
+    assert result.confidence == 0.88
+    assert result.latency_ms >= 0
+    assert len(fake.calls) == 1
+    _, kwargs = fake.calls[0]
+    assert kwargs["candidate_labels"] == list(INTENT_LABEL_DESCRIPTIONS.values())
+    assert kwargs["hypothesis_template"] == "This user message is {}."
+    assert kwargs["multi_label"] is False
+def test_classify_with_fallback_returns_full_retrieval_on_error():
+    classifier = IntentClassifier(model_path="dummy-model")
+    classifier._classifier = None
+    result = classifier.classify_with_fallback("hi")
+    assert result.intent == IntentLabel.FULL_RETRIEVAL
+    assert result.confidence == 0.0
+    assert result.latency_ms == 0

{loreguard_cli-0.14.1 → loreguard_cli-0.14.5}/uv.lock RENAMED Viewed

@@ -600,7 +600,7 @@ wheels = [
 [[package]]
 name = "loreguard-cli"
-version = "0.14.0"
+version = "0.14.5"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },