PyPI - loreguard-cli - Versions diffs - 0.11.2__tar.gz → 0.12.1__tar.gz - Mend

loreguard-cli 0.11.2tar.gz → 0.12.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/.gitignore RENAMED Viewed

@@ -43,3 +43,4 @@ htmlcov/
 # Misc
 *.log
 .DS_Store
+.python-version

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loreguard-cli
-Version: 0.11.2
+Version: 0.12.1
 Summary: Local inference client for Loreguard NPCs
 Project-URL: Homepage, https://loreguard.com
 Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -27,6 +27,7 @@ Requires-Dist: pydantic>=2.5.0
 Requires-Dist: python-dotenv>=1.0.0
 Requires-Dist: rich>=13.0.0
 Requires-Dist: textual>=0.47.0
+Requires-Dist: tf-keras>=2.16.0
 Requires-Dist: torch>=2.0.0
 Requires-Dist: transformers>=4.36.0
 Requires-Dist: uvicorn>=0.27.0

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "loreguard-cli"
-version = "0.11.2"
+version = "0.12.1"
 description = "Local inference client for Loreguard NPCs"
 readme = "README.md"
 license = "MIT"
@@ -34,6 +34,7 @@ dependencies = [
     "uvicorn>=0.27.0",
     "python-dotenv>=1.0.0",
     "DialogTag",
+    "tf-keras>=2.16.0",  # Required for DeBERTa intent classifier (Keras 3 compatibility)
 ]
 [project.urls]

loreguard_cli-0.12.1/src/chunk_detector.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""Chunk Detection service for natural conversation breaks (ADR-0023).
+This module provides zero-shot classification to detect natural break points
+in NPC responses. It splits a response into multiple chunks that can be
+delivered sequentially for more human-like conversation flow.
+Uses DeBERTa-v3-large-zeroshot to classify sentence boundaries:
+- "continues same thought" → merge with previous chunk
+- "starts new thought" → create new chunk
+This is the client-side implementation that runs locally on the user's machine,
+leveraging the same DeBERTa model used for intent classification.
+"""
+import logging
+import re
+import threading
+import time
+from dataclasses import dataclass
+from typing import List, Optional
+logger = logging.getLogger(__name__)
+@dataclass
+class TextChunk:
+    """A single chunk of text in a response."""
+    text: str
+    index: int  # Position in sequence (0-based)
+@dataclass
+class ChunkResult:
+    """Result of chunk detection."""
+    chunks: List[TextChunk]
+    latency_ms: int  # Detection latency in milliseconds
+# Hypotheses for zero-shot classification of sentence boundaries
+CHUNK_HYPOTHESES = {
+    "continues": "This text continues the same thought or topic as the previous sentence.",
+    "starts_new": "This text starts a new thought, topic, or conversational turn.",
+}
+# Threshold for "starts new thought" classification
+# If confidence > threshold, we create a new chunk
+NEW_THOUGHT_THRESHOLD = 0.55
+class ChunkDetector:
+    """Service for detecting natural conversation breaks using DeBERTa.
+    Uses zero-shot classification to determine where to split a response
+    into natural chunks for more human-like delivery.
+    """
+    def __init__(self, classifier=None, model_path: Optional[str] = None):
+        """Initialize the chunk detector.
+        Args:
+            classifier: Optional pre-loaded zero-shot classifier to reuse.
+                       If None, will use IntentClassifier's model.
+            model_path: Path to local model directory. If None, uses HuggingFace hub.
+        """
+        self._classifier = classifier
+        self._model_path = model_path or "MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0"
+        self._device = None
+        self._load_lock = threading.Lock()
+    @property
+    def model_name(self) -> str:
+        """Return the configured model identifier."""
+        return self._model_path
+    def set_classifier(self, classifier):
+        """Set a pre-loaded classifier to reuse.
+        This allows sharing the DeBERTa model with IntentClassifier
+        to avoid loading it twice.
+        """
+        self._classifier = classifier
+    def _resolve_device(self) -> str:
+        """Resolve the best available device."""
+        try:
+            import torch
+            if torch.cuda.is_available():
+                return "cuda"
+            elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                return "mps"  # Apple Silicon
+            return "cpu"
+        except ImportError:
+            return "cpu"
+    def load_model(self) -> bool:
+        """Load the classification model.
+        Thread-safe: uses lock to prevent concurrent model loading.
+        Returns:
+            True if model loaded successfully, False otherwise.
+        """
+        if self._classifier is not None:
+            return True
+        with self._load_lock:
+            if self._classifier is not None:
+                return True
+            try:
+                from transformers import pipeline
+                self._device = self._resolve_device()
+                logger.info(f"Loading chunk detector: {self._model_path} (device={self._device})")
+                device_idx = 0 if self._device == "cuda" else -1 if self._device == "cpu" else 0
+                self._classifier = pipeline(
+                    "zero-shot-classification",
+                    model=self._model_path,
+                    device=device_idx if self._device != "mps" else "mps",
+                )
+                logger.info("Chunk detector loaded successfully")
+                return True
+            except Exception as e:
+                logger.error(f"Failed to load chunk detector: {e}")
+                return False
+    def _split_into_sentences(self, text: str) -> List[str]:
+        """Split text into sentences at natural break points.
+        Uses a simple regex-based approach that handles common cases:
+        - Period, exclamation, question mark followed by space/end
+        - Ellipsis (...)
+        - Preserves quotes and parentheses
+        """
+        # Split on sentence-ending punctuation followed by space or end
+        # Handles: . ! ? ... followed by space or end
+        pattern = r'(?<=[.!?])\s+|(?<=\.\.\.)\s*'
+        sentences = re.split(pattern, text.strip())
+        # Filter out empty strings and strip whitespace
+        sentences = [s.strip() for s in sentences if s.strip()]
+        return sentences
+    def detect(self, text: str) -> ChunkResult:
+        """Detect natural break points in text and split into chunks.
+        Args:
+            text: The NPC response text to analyze
+        Returns:
+            ChunkResult with list of TextChunk objects
+        """
+        start_time = time.time()
+        # Handle empty or very short text
+        if not text or len(text.strip()) < 10:
+            return ChunkResult(
+                chunks=[TextChunk(text=text, index=0)] if text else [],
+                latency_ms=0,
+            )
+        # Split into sentences
+        sentences = self._split_into_sentences(text)
+        # If only one sentence, return as single chunk
+        if len(sentences) <= 1:
+            latency_ms = int((time.time() - start_time) * 1000)
+            return ChunkResult(
+                chunks=[TextChunk(text=text, index=0)],
+                latency_ms=latency_ms,
+            )
+        # Ensure model is loaded
+        if self._classifier is None:
+            if not self.load_model():
+                # Fallback: return full text as single chunk
+                return ChunkResult(
+                    chunks=[TextChunk(text=text, index=0)],
+                    latency_ms=0,
+                )
+        # Classify each sentence boundary
+        chunks: List[str] = [sentences[0]]
+        hypotheses = list(CHUNK_HYPOTHESES.values())
+        for i in range(1, len(sentences)):
+            prev_sentence = sentences[i - 1]
+            curr_sentence = sentences[i]
+            # Create context for classification
+            # We ask: does curr_sentence continue prev_sentence's thought?
+            context = f"{prev_sentence} {curr_sentence}"
+            try:
+                result = self._classifier(
+                    context,
+                    candidate_labels=hypotheses,
+                    hypothesis_template="{}",
+                    multi_label=False,
+                )
+                # Check if "starts new thought" won
+                starts_new_idx = hypotheses.index(CHUNK_HYPOTHESES["starts_new"])
+                starts_new_score = 0.0
+                for j, label in enumerate(result["labels"]):
+                    if label == CHUNK_HYPOTHESES["starts_new"]:
+                        starts_new_score = result["scores"][j]
+                        break
+                if starts_new_score > NEW_THOUGHT_THRESHOLD:
+                    # Start new chunk
+                    chunks.append(curr_sentence)
+                    logger.debug(f"New chunk at sentence {i}: score={starts_new_score:.2f}")
+                else:
+                    # Merge with previous chunk
+                    chunks[-1] = f"{chunks[-1]} {curr_sentence}"
+                    logger.debug(f"Merged sentence {i}: score={starts_new_score:.2f}")
+            except Exception as e:
+                logger.warning(f"Classification failed for sentence {i}, merging: {e}")
+                chunks[-1] = f"{chunks[-1]} {curr_sentence}"
+        latency_ms = int((time.time() - start_time) * 1000)
+        # Convert to TextChunk objects
+        text_chunks = [
+            TextChunk(text=chunk.strip(), index=i)
+            for i, chunk in enumerate(chunks)
+            if chunk.strip()
+        ]
+        logger.info(f"Chunk detection: {len(sentences)} sentences -> {len(text_chunks)} chunks (latency={latency_ms}ms)")
+        return ChunkResult(
+            chunks=text_chunks,
+            latency_ms=latency_ms,
+        )
+    def detect_with_fallback(self, text: str) -> ChunkResult:
+        """Detect chunks with fallback to single chunk on error.
+        Args:
+            text: The NPC response text to analyze
+        Returns:
+            ChunkResult (defaults to single chunk on error)
+        """
+        try:
+            return self.detect(text)
+        except Exception as e:
+            logger.warning(f"Chunk detection failed, returning single chunk: {e}")
+            return ChunkResult(
+                chunks=[TextChunk(text=text, index=0)] if text else [],
+                latency_ms=0,
+            )
+    @property
+    def is_loaded(self) -> bool:
+        """Check if the model is loaded."""
+        return self._classifier is not None
+    @property
+    def device(self) -> Optional[str]:
+        """Get the device being used."""
+        return self._device

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/cli.py RENAMED Viewed

@@ -240,12 +240,46 @@ class LoreguardCLI:
         try:
             llm_proxy = LLMProxy(f"http://127.0.0.1:{self.port}")
+            # Initialize intent classifier (ADR-0010)
+            intent_classifier = None
+            try:
+                from .intent_classifier import IntentClassifier
+                log.info("Loading intent classifier...")
+                intent_classifier = IntentClassifier()
+                if intent_classifier.load_model():
+                    log.info(f"Intent classifier ready (device: {intent_classifier.device})")
+                else:
+                    log.warning("Intent classifier failed to load")
+                    intent_classifier = None
+            except Exception as e:
+                log.warning(f"Intent classifier error: {e}")
+            # Initialize chunk detector (ADR-0023) - shares model with intent classifier
+            chunk_detector = None
+            try:
+                from .chunk_detector import ChunkDetector
+                log.info("Loading chunk detector...")
+                chunk_detector = ChunkDetector()
+                if intent_classifier is not None and intent_classifier.is_loaded:
+                    chunk_detector.set_classifier(intent_classifier._classifier)
+                    log.info("Chunk detector ready (shared model)")
+                else:
+                    if chunk_detector.load_model():
+                        log.info(f"Chunk detector ready (device: {chunk_detector.device})")
+                    else:
+                        log.warning("Chunk detector failed to load")
+                        chunk_detector = None
+            except Exception as e:
+                log.warning(f"Chunk detector error: {e}")
             self._tunnel = BackendTunnel(
                 backend_url=self.backend_url,
                 llm_proxy=llm_proxy,
                 worker_id=self.worker_id,
                 worker_token=self.token,
                 model_id=self.model_path.stem if self.model_path else "unknown",
+                intent_classifier=intent_classifier,
+                chunk_detector=chunk_detector,
             )
             self._tunnel.on_request_complete = self._on_request_complete
@@ -253,9 +287,11 @@ class LoreguardCLI:
             # Start SDK server for local game clients
             from .http_server import start_sdk_server
             try:
+                sdk_port = int(os.environ.get("LOREGUARD_SDK_PORT", "0"))
                 self._sdk_port = start_sdk_server(
                     tunnel=self._tunnel,
                     main_loop=asyncio.get_running_loop(),
+                    port=sdk_port,
                 )
                 log.info(f"SDK server listening on 127.0.0.1:{self._sdk_port}")
             except Exception as e:

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/dialogue_act_classifier.py RENAMED Viewed

@@ -274,9 +274,12 @@ def download_dialogue_act_model(progress_callback=None, error_callback=None) ->
                 DEFAULT_DIALOGUE_ACT_MODEL,
                 local_files_only=False,
                 tqdm_class=TqdmCallback,
+                max_workers=1,  # Avoid subprocess fd issues in ThreadPoolExecutor
             )
         else:
-            snapshot_download(DEFAULT_DIALOGUE_ACT_MODEL, local_files_only=False)
+            # max_workers=1 prevents "bad value(s) in fds_to_keep" error
+            # when running from ThreadPoolExecutor
+            snapshot_download(DEFAULT_DIALOGUE_ACT_MODEL, local_files_only=False, max_workers=1)
         logger.info("Dialogue act model downloaded successfully")
         return True

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/http_server.py RENAMED Viewed

@@ -218,6 +218,8 @@ class EmbeddedHTTPServer:
                         except (asyncio.TimeoutError, TimeoutError, Exception):
                             break
                     break
+                elif msg_type == "pass_update":
+                    yield f"event: pass_update\ndata: {json.dumps(msg.get('data', {}))}\n\n"
                 elif msg_type == "follow_up":
                     # Follow-up received before done (shouldn't happen, but handle gracefully)
                     yield f"event: follow_up\ndata: {json.dumps(msg.get('data', {}))}\n\n"
@@ -236,6 +238,7 @@ class EmbeddedHTTPServer:
     async def _wait_for_response(self, request_id: str, queue: asyncio.Queue) -> dict:
         """Wait for complete response (non-streaming mode)."""
+        pipeline_trace = []
         try:
             while True:
                 try:
@@ -258,12 +261,18 @@ class EmbeddedHTTPServer:
                 msg_type = msg.get("type")
                 if msg_type == "done":
                     data = msg.get("data", {})
-                    return {
+                    result = {
                         "response": data.get("speech", ""),
                         "verified": data.get("verified", False),
                         "citations": data.get("citations", []),
                     }
-                elif msg_type in ("filler", "pass_update"):
+                    if pipeline_trace:
+                        result["pipeline_trace"] = pipeline_trace
+                    return result
+                elif msg_type == "pass_update":
+                    pipeline_trace.append(msg.get("data", {}))
+                    continue
+                elif msg_type == "filler":
                     continue
                 elif msg_type == "error":
                     return {"error": msg.get("error", "Unknown error")}

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/intent_classifier.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Intent Classification service for adaptive retrieval (ADR-0010).
 This module provides zero-shot intent classification for the NPC dialogue pipeline.
-It uses BART-large-MNLI to classify user messages into retrieval strategy categories:
+It uses DeBERTa-v3-large-zeroshot to classify user messages into retrieval strategy categories:
 - A_NO_RETRIEVAL: Greetings, chitchat, farewells (skip retrieval)
 - B_WORKING_MEMORY: Simple identity/state questions (working memory only)
 - C_LIGHT_RETRIEVAL: Direct factual questions (top 3 sources)
@@ -41,12 +41,15 @@ class IntentResult:
 DEFAULT_INTENT_MODEL = "MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0"
 # Intent hypothesis templates for zero-shot classification
-# Each intent maps to a hypothesis that BART will evaluate
+# Each intent maps to a hypothesis that DeBERTa will evaluate
+# NOTE: Hypotheses must be specific to avoid misclassification of mixed-intent messages
+# (e.g., "hey, how are you? what's the ISP fee?" should match LIGHT_RETRIEVAL, not WORKING_MEMORY)
+# TODO: Move hypotheses to backend for centralized control (see loreguard-engine issue)
 INTENT_HYPOTHESES = {
-    IntentLabel.NO_RETRIEVAL: "This is a greeting, chitchat, or farewell that does not require any information retrieval.",
-    IntentLabel.WORKING_MEMORY: "This is a simple question about identity, name, or basic state that only requires basic memory.",
-    IntentLabel.LIGHT_RETRIEVAL: "This is a direct factual question that requires looking up specific information.",
-    IntentLabel.FULL_RETRIEVAL: "This is a complex question that requires comprehensive information retrieval and analysis.",
+    IntentLabel.NO_RETRIEVAL: "This is a greeting, farewell, or a vague question about availability without asking for any specific information.",
+    IntentLabel.WORKING_MEMORY: "This asks about the person's current life, recent experiences, what they've been up to, or how things are going for them.",
+    IntentLabel.LIGHT_RETRIEVAL: "This asks for a specific fact, number, price, fee, date, location, or procedure that requires looking up information.",
+    IntentLabel.FULL_RETRIEVAL: "This is a complex question requiring analysis of multiple topics or understanding relationships between different pieces of information.",
 }
 # Promise detection hypothesis for follow-up triggers (ADR-0020)
@@ -63,7 +66,7 @@ class PromiseResult:
 class IntentClassifier:
-    """Service for zero-shot intent classification using BART-large-MNLI.
+    """Service for zero-shot intent classification using DeBERTa-v3-large-zeroshot.
     Uses zero-shot classification to categorize user messages into one of four
     retrieval strategies without any fine-tuning required.
@@ -261,7 +264,7 @@ def is_intent_model_available() -> bool:
     """Check if the intent model is available in HuggingFace cache.
     The transformers library caches models in ~/.cache/huggingface/hub/.
-    This function checks if the BART model has been downloaded.
+    This function checks if the DeBERTa model has been downloaded.
     """
     try:
         from huggingface_hub import try_to_load_from_cache
@@ -314,9 +317,12 @@ def download_intent_model(progress_callback=None, error_callback=None) -> bool:
                 DEFAULT_INTENT_MODEL,
                 local_files_only=False,
                 tqdm_class=TqdmCallback,
+                max_workers=1,  # Avoid subprocess fd issues in ThreadPoolExecutor
             )
         else:
-            snapshot_download(DEFAULT_INTENT_MODEL, local_files_only=False)
+            # max_workers=1 prevents "bad value(s) in fds_to_keep" error
+            # when running from ThreadPoolExecutor
+            snapshot_download(DEFAULT_INTENT_MODEL, local_files_only=False, max_workers=1)
         logger.info("Intent model downloaded successfully")
         return True

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/main.py RENAMED Viewed

@@ -145,6 +145,28 @@ async def startup():
     else:
         console.print("[yellow]Dialogue act classifier disabled (set LOREGUARD_DIALOGUE_ACT_ENABLED=true to enable)[/yellow]")
+    # Initialize chunk detector (ADR-0023 - for natural conversation breaks)
+    # Shares model with intent classifier if available
+    chunk_detector = None
+    if enable_intent:
+        console.print("[cyan]Initializing chunk detector...[/cyan]")
+        try:
+            from .chunk_detector import ChunkDetector
+            chunk_detector = ChunkDetector()
+            # Share classifier with intent_classifier if available
+            if intent_classifier is not None and intent_classifier.is_loaded:
+                chunk_detector.set_classifier(intent_classifier._classifier)
+                console.print("[green]Chunk detector ready (shared model)[/green]")
+            else:
+                if chunk_detector.load_model():
+                    console.print(f"[green]Chunk detector ready (device: {chunk_detector.device})[/green]")
+                else:
+                    console.print("[yellow]Warning: Chunk detector failed to load[/yellow]")
+                    chunk_detector = None
+        except Exception as e:
+            console.print(f"[yellow]Warning: Chunk detector error: {e}[/yellow]")
+            chunk_detector = None
     # Connect to remote backend
     backend_url = get_config_value("BACKEND_URL", "wss://api.lorekeeper.ai/workers")
     worker_id = get_config_value("WORKER_ID", "")
@@ -159,6 +181,7 @@ async def startup():
             nli_service=nli_service,
             intent_classifier=intent_classifier,
             dialogue_act_classifier=dialogue_act_classifier,
+            chunk_detector=chunk_detector,
         )
         asyncio.create_task(tunnel.connect())
     elif backend_url:

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/nli.py RENAMED Viewed

@@ -404,9 +404,13 @@ def download_nli_model(progress_callback=None, error_callback=None) -> bool:
                 DEFAULT_NLI_MODEL,
                 local_files_only=False,
                 tqdm_class=TqdmCallback,
+                max_workers=1,  # Avoid subprocess fd issues in ThreadPoolExecutor
             )
         else:
-            snapshot_download(DEFAULT_NLI_MODEL, local_files_only=False)
+            # max_workers=1 prevents "bad value(s) in fds_to_keep" error
+            # when running from ThreadPoolExecutor (parallel downloads spawn
+            # subprocesses that conflict with thread-based fd management)
+            snapshot_download(DEFAULT_NLI_MODEL, local_files_only=False, max_workers=1)
         logger.info("NLI model downloaded successfully")
         return True

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/main.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import asyncio
 import logging
+import os
 from typing import TYPE_CHECKING
 from textual.app import ComposeResult
@@ -640,11 +641,12 @@ class MainScreen(Screen):
             # Wire up pass update callback to chat widget (for verbose mode)
             def on_pass_update(payload: dict) -> None:
+                log.debug(f"tunnel on_pass_update callback fired, payload keys: {list(payload.keys()) if payload else 'None'}")
                 try:
                     chat = self.query_one(NPCChat)
                     chat.on_pass_update(payload)
-                except Exception:
-                    pass
+                except Exception as e:
+                    log.debug(f"tunnel on_pass_update callback error: {e}")
             app._tunnel.on_pass_update = on_pass_update
@@ -655,10 +657,12 @@ class MainScreen(Screen):
                 # Log SDK server status
                 pass  # Could update a status widget here
+            sdk_port_env = int(os.environ.get("LOREGUARD_SDK_PORT", "0"))
             sdk_port = start_sdk_server(
                 tunnel=app._tunnel,
                 on_status_change=on_sdk_status,
                 main_loop=asyncio.get_event_loop(),
+                port=sdk_port_env,
             )
             self._sdk_port = sdk_port

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/npc_chat.py RENAMED Viewed

@@ -5,9 +5,12 @@ Uses the local proxy for NPC conversations with token streaming:
 """
 import json
+import logging
 from typing import TYPE_CHECKING
 import httpx
+logger = logging.getLogger(__name__)
 from textual.app import ComposeResult
 from textual.containers import Vertical, Horizontal, VerticalScroll
 from textual.widgets import Static, Input
@@ -799,11 +802,11 @@ class NPCChat(Vertical):
             if self._verbose:
                 payload["verbose"] = True
+            local_url = get_local_proxy_url()
             try:
                 await self._do_generate_streaming(payload, status, container)
                 return
             except httpx.ConnectError as e:
-                local_url = get_local_proxy_url()
                 if self._verbose:
                     if local_url:
                         status.update(Text(f"Local proxy failed ({local_url}): {e}", style=FG_DIM))
@@ -812,7 +815,6 @@ class NPCChat(Vertical):
                 else:
                     status.update(Text("Local proxy unavailable, using cloud...", style=FG_DIM))
             except Exception as e:
-                local_url = get_local_proxy_url()
                 if self._verbose:
                     status.update(Text(f"Local proxy error ({local_url}): {type(e).__name__}: {e}", style="#FF5555"))
                 else:
@@ -895,6 +897,9 @@ class NPCChat(Vertical):
                                 container.scroll_end(animate=False)
                                 status.update(Text(f"Streaming... ({tokens_received} tokens)", style=CYAN))
+                        elif event_type == "pass_update":
+                            self.on_pass_update(data)
                         elif event_type == "done":
                             final_data = data
                             speech = data.get("speech", speech)
@@ -987,15 +992,18 @@ class NPCChat(Vertical):
         Called by the tunnel when it receives pass updates via WebSocket.
         """
+        logger.debug(f"on_pass_update called: verbose={self._verbose}, visible={self._visible}, payload_keys={list(payload.keys()) if payload else 'None'}")
         if not self._verbose or not self._visible:
+            logger.debug(f"on_pass_update skipped: verbose={self._verbose}, visible={self._visible}")
             return
         # Add pass to debug panel instead of chat
         try:
             debug_panel = self.query_one(DebugPanel)
             debug_panel.add_pass(payload)
-        except Exception:
-            pass
+            logger.debug(f"on_pass_update: added pass to debug panel")
+        except Exception as e:
+            logger.debug(f"on_pass_update exception: {e}")
     def action_close_chat(self) -> None:
         """Close the chat widget."""

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/wizard.py RENAMED Viewed

@@ -1480,6 +1480,34 @@ async def step_start(
                     status.log(f"Dialogue act error: {e}", "error")
                     dialogue_act_classifier = None
+            # Initialize chunk detector (ADR-0023) - shares model with intent classifier
+            chunk_detector = None
+            if intent_enabled:
+                status.set_line("chunk", "Chunk Detect", "Loading...")
+                try:
+                    from .chunk_detector import ChunkDetector
+                    chunk_detector = ChunkDetector()
+                    # Share classifier with intent_classifier if available
+                    if intent_classifier is not None and intent_classifier.is_loaded:
+                        chunk_detector.set_classifier(intent_classifier._classifier)
+                        status.set_line("chunk", "Chunk Detect", f"✓ Ready (shared)")
+                    else:
+                        # Load independently
+                        loop = asyncio.get_event_loop()
+                        with concurrent.futures.ThreadPoolExecutor() as pool:
+                            with suppress_external_output():
+                                model_loaded = await loop.run_in_executor(pool, chunk_detector.load_model)
+                        if model_loaded:
+                            status.set_line("chunk", "Chunk Detect", f"✓ Ready ({chunk_detector.device})")
+                        else:
+                            status.set_line("chunk", "Chunk Detect", "✗ Failed to load")
+                            status.log("Chunk detector failed to load - continuing without", "warn")
+                            chunk_detector = None
+                except Exception as e:
+                    status.set_line("chunk", "Chunk Detect", f"✗ Error: {e}")
+                    status.log(f"Chunk detector error: {e}", "error")
+                    chunk_detector = None
             model_id = _resolve_backend_model_id(model_path.stem)
             tunnel = BackendTunnel(
                 backend_url="wss://api.loreguard.com/workers",
@@ -1490,6 +1518,7 @@ async def step_start(
                 nli_service=nli_service,
                 intent_classifier=intent_classifier,
                 dialogue_act_classifier=dialogue_act_classifier,
+                chunk_detector=chunk_detector,
                 log_callback=status.log,
             )
             asyncio.create_task(tunnel.connect())
@@ -1567,7 +1596,7 @@ async def step_start(
                                 status.stop()
                                 from .npc_chat import run_npc_chat
                                 try:
-                                    await run_npc_chat(api_token=token, tunnel=tunnel)
+                                    await run_npc_chat(api_token=token, tunnel=tunnel, verbose=_verbose)
                                 except KeyboardInterrupt:
                                     pass
                                 status.start()

{loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/uv.lock RENAMED Viewed

@@ -600,7 +600,7 @@ wheels = [
 [[package]]
 name = "loreguard-cli"
-version = "0.11.2"
+version = "0.12.1"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -611,6 +611,7 @@ dependencies = [
     { name = "python-dotenv" },
     { name = "rich" },
     { name = "textual" },
+    { name = "tf-keras" },
     { name = "torch" },
     { name = "transformers" },
     { name = "uvicorn" },
@@ -641,6 +642,7 @@ requires-dist = [
     { name = "rich", specifier = ">=13.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
     { name = "textual", specifier = ">=0.47.0" },
+    { name = "tf-keras", specifier = ">=2.16.0" },
     { name = "torch", specifier = ">=2.0.0" },
     { name = "transformers", specifier = ">=4.36.0" },
     { name = "uvicorn", specifier = ">=0.27.0" },
@@ -2055,6 +2057,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/84/38/47fab2a5fad163ca4851f7a20eb2442491cc63bf2756ec4ef161bc1461dd/textual-7.0.1-py3-none-any.whl", hash = "sha256:f9b7d16fa9b640bfff2a2008bf31e3f2d4429dc85e07a9583be033840ed15174", size = 715268, upload-time = "2026-01-07T13:07:22.006Z" },
 ]
+[[package]]
+name = "tf-keras"
+version = "2.20.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "tensorflow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/38/6060f6c7472439bb3890b9094d69d31d9f8d5da123b16c738773e70fff91/tf_keras-2.20.1.tar.gz", hash = "sha256:884be5938fb0b2b53b1583c1ae2b660ef87215377c29b5b6a77fd221b472aeaf", size = 1254487, upload-time = "2025-09-04T21:23:41.81Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/6b/d9a8202bfe5c9e3b078cf550bafab962aa9d6b1a1f1180f0065399d4c9b2/tf_keras-2.20.1-py3-none-any.whl", hash = "sha256:3f0e0a34d9a4c8758f24fdc1053e6e335f16ab5534c7d34f1899b8924779760c", size = 1694335, upload-time = "2025-09-04T21:23:40.153Z" },
+]
 [[package]]
 name = "tokenizers"
 version = "0.22.1"