loreguard-cli 0.11.2__tar.gz → 0.12.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/.gitignore +1 -0
  2. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/PKG-INFO +2 -1
  3. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/pyproject.toml +2 -1
  4. loreguard_cli-0.12.1/src/chunk_detector.py +270 -0
  5. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/cli.py +36 -0
  6. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/dialogue_act_classifier.py +4 -1
  7. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/http_server.py +11 -2
  8. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/intent_classifier.py +15 -9
  9. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/main.py +23 -0
  10. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/nli.py +5 -1
  11. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/main.py +6 -2
  12. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/npc_chat.py +12 -4
  13. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/wizard.py +30 -1
  14. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/uv.lock +15 -1
  15. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
  16. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/.env.example +0 -0
  17. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/.github/workflows/release.yml +0 -0
  18. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/LICENSE +0 -0
  19. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/README.md +0 -0
  20. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/THIRD_PARTY_NOTICES.md +0 -0
  21. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/scripts/build.py +0 -0
  22. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/sdk/csharp/LoreguardSDK.cs +0 -0
  23. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/sdk/gdscript/LoreguardSDK.gd +0 -0
  24. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/sdk/javascript/loreguard-sdk.js +0 -0
  25. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/sdk/python/loreguard_sdk.py +0 -0
  26. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/__init__.py +0 -0
  27. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/__main__.py +0 -0
  28. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/config.py +0 -0
  29. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/hf_discovery.py +0 -0
  30. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/llama_server.py +0 -0
  31. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/llm.py +0 -0
  32. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/models_registry.py +0 -0
  33. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/npc_chat.py +0 -0
  34. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/runtime.py +0 -0
  35. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/steam.py +0 -0
  36. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/term_ui.py +0 -0
  37. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/__init__.py +0 -0
  38. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/app.py +0 -0
  39. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/modals/__init__.py +0 -0
  40. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/modals/auth_menu.py +0 -0
  41. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/modals/npc_chat.py +0 -0
  42. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/modals/token_input.py +0 -0
  43. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/modals/unified_palette.py +0 -0
  44. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/__init__.py +0 -0
  45. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/auth.py +0 -0
  46. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/model_select.py +0 -0
  47. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/nli_setup.py +0 -0
  48. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/screens/running.py +0 -0
  49. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/styles.py +0 -0
  50. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/__init__.py +0 -0
  51. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/banner.py +0 -0
  52. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/footer.py +0 -0
  53. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/hardware_info.py +0 -0
  54. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/server_monitor.py +0 -0
  55. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tui/widgets/status_panel.py +0 -0
  56. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/src/tunnel.py +0 -0
  57. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/templates/llama31-no-tools.jinja +0 -0
  58. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/tests/test_nli_hhem.py +0 -0
  59. {loreguard_cli-0.11.2 → loreguard_cli-0.12.1}/tests/test_websocket_timeout.py +0 -0
@@ -43,3 +43,4 @@ htmlcov/
43
43
  # Misc
44
44
  *.log
45
45
  .DS_Store
46
+ .python-version
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loreguard-cli
3
- Version: 0.11.2
3
+ Version: 0.12.1
4
4
  Summary: Local inference client for Loreguard NPCs
5
5
  Project-URL: Homepage, https://loreguard.com
6
6
  Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
@@ -27,6 +27,7 @@ Requires-Dist: pydantic>=2.5.0
27
27
  Requires-Dist: python-dotenv>=1.0.0
28
28
  Requires-Dist: rich>=13.0.0
29
29
  Requires-Dist: textual>=0.47.0
30
+ Requires-Dist: tf-keras>=2.16.0
30
31
  Requires-Dist: torch>=2.0.0
31
32
  Requires-Dist: transformers>=4.36.0
32
33
  Requires-Dist: uvicorn>=0.27.0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "loreguard-cli"
7
- version = "0.11.2"
7
+ version = "0.12.1"
8
8
  description = "Local inference client for Loreguard NPCs"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -34,6 +34,7 @@ dependencies = [
34
34
  "uvicorn>=0.27.0",
35
35
  "python-dotenv>=1.0.0",
36
36
  "DialogTag",
37
+ "tf-keras>=2.16.0", # Required for DeBERTa intent classifier (Keras 3 compatibility)
37
38
  ]
38
39
 
39
40
  [project.urls]
@@ -0,0 +1,270 @@
1
+ """Chunk Detection service for natural conversation breaks (ADR-0023).
2
+
3
+ This module provides zero-shot classification to detect natural break points
4
+ in NPC responses. It splits a response into multiple chunks that can be
5
+ delivered sequentially for more human-like conversation flow.
6
+
7
+ Uses DeBERTa-v3-large-zeroshot to classify sentence boundaries:
8
+ - "continues same thought" → merge with previous chunk
9
+ - "starts new thought" → create new chunk
10
+
11
+ This is the client-side implementation that runs locally on the user's machine,
12
+ leveraging the same DeBERTa model used for intent classification.
13
+ """
14
+
15
+ import logging
16
+ import re
17
+ import threading
18
+ import time
19
+ from dataclasses import dataclass
20
+ from typing import List, Optional
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class TextChunk:
27
+ """A single chunk of text in a response."""
28
+ text: str
29
+ index: int # Position in sequence (0-based)
30
+
31
+
32
+ @dataclass
33
+ class ChunkResult:
34
+ """Result of chunk detection."""
35
+ chunks: List[TextChunk]
36
+ latency_ms: int # Detection latency in milliseconds
37
+
38
+
39
+ # Hypotheses for zero-shot classification of sentence boundaries
40
+ CHUNK_HYPOTHESES = {
41
+ "continues": "This text continues the same thought or topic as the previous sentence.",
42
+ "starts_new": "This text starts a new thought, topic, or conversational turn.",
43
+ }
44
+
45
+ # Threshold for "starts new thought" classification
46
+ # If confidence > threshold, we create a new chunk
47
+ NEW_THOUGHT_THRESHOLD = 0.55
48
+
49
+
50
+ class ChunkDetector:
51
+ """Service for detecting natural conversation breaks using DeBERTa.
52
+
53
+ Uses zero-shot classification to determine where to split a response
54
+ into natural chunks for more human-like delivery.
55
+ """
56
+
57
+ def __init__(self, classifier=None, model_path: Optional[str] = None):
58
+ """Initialize the chunk detector.
59
+
60
+ Args:
61
+ classifier: Optional pre-loaded zero-shot classifier to reuse.
62
+ If None, will use IntentClassifier's model.
63
+ model_path: Path to local model directory. If None, uses HuggingFace hub.
64
+ """
65
+ self._classifier = classifier
66
+ self._model_path = model_path or "MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0"
67
+ self._device = None
68
+ self._load_lock = threading.Lock()
69
+
70
+ @property
71
+ def model_name(self) -> str:
72
+ """Return the configured model identifier."""
73
+ return self._model_path
74
+
75
+ def set_classifier(self, classifier):
76
+ """Set a pre-loaded classifier to reuse.
77
+
78
+ This allows sharing the DeBERTa model with IntentClassifier
79
+ to avoid loading it twice.
80
+ """
81
+ self._classifier = classifier
82
+
83
+ def _resolve_device(self) -> str:
84
+ """Resolve the best available device."""
85
+ try:
86
+ import torch
87
+ if torch.cuda.is_available():
88
+ return "cuda"
89
+ elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
90
+ return "mps" # Apple Silicon
91
+ return "cpu"
92
+ except ImportError:
93
+ return "cpu"
94
+
95
+ def load_model(self) -> bool:
96
+ """Load the classification model.
97
+
98
+ Thread-safe: uses lock to prevent concurrent model loading.
99
+
100
+ Returns:
101
+ True if model loaded successfully, False otherwise.
102
+ """
103
+ if self._classifier is not None:
104
+ return True
105
+
106
+ with self._load_lock:
107
+ if self._classifier is not None:
108
+ return True
109
+
110
+ try:
111
+ from transformers import pipeline
112
+
113
+ self._device = self._resolve_device()
114
+ logger.info(f"Loading chunk detector: {self._model_path} (device={self._device})")
115
+
116
+ device_idx = 0 if self._device == "cuda" else -1 if self._device == "cpu" else 0
117
+ self._classifier = pipeline(
118
+ "zero-shot-classification",
119
+ model=self._model_path,
120
+ device=device_idx if self._device != "mps" else "mps",
121
+ )
122
+
123
+ logger.info("Chunk detector loaded successfully")
124
+ return True
125
+
126
+ except Exception as e:
127
+ logger.error(f"Failed to load chunk detector: {e}")
128
+ return False
129
+
130
+ def _split_into_sentences(self, text: str) -> List[str]:
131
+ """Split text into sentences at natural break points.
132
+
133
+ Uses a simple regex-based approach that handles common cases:
134
+ - Period, exclamation, question mark followed by space/end
135
+ - Ellipsis (...)
136
+ - Preserves quotes and parentheses
137
+ """
138
+ # Split on sentence-ending punctuation followed by space or end
139
+ # Handles: . ! ? ... followed by space or end
140
+ pattern = r'(?<=[.!?])\s+|(?<=\.\.\.)\s*'
141
+ sentences = re.split(pattern, text.strip())
142
+
143
+ # Filter out empty strings and strip whitespace
144
+ sentences = [s.strip() for s in sentences if s.strip()]
145
+
146
+ return sentences
147
+
148
+ def detect(self, text: str) -> ChunkResult:
149
+ """Detect natural break points in text and split into chunks.
150
+
151
+ Args:
152
+ text: The NPC response text to analyze
153
+
154
+ Returns:
155
+ ChunkResult with list of TextChunk objects
156
+ """
157
+ start_time = time.time()
158
+
159
+ # Handle empty or very short text
160
+ if not text or len(text.strip()) < 10:
161
+ return ChunkResult(
162
+ chunks=[TextChunk(text=text, index=0)] if text else [],
163
+ latency_ms=0,
164
+ )
165
+
166
+ # Split into sentences
167
+ sentences = self._split_into_sentences(text)
168
+
169
+ # If only one sentence, return as single chunk
170
+ if len(sentences) <= 1:
171
+ latency_ms = int((time.time() - start_time) * 1000)
172
+ return ChunkResult(
173
+ chunks=[TextChunk(text=text, index=0)],
174
+ latency_ms=latency_ms,
175
+ )
176
+
177
+ # Ensure model is loaded
178
+ if self._classifier is None:
179
+ if not self.load_model():
180
+ # Fallback: return full text as single chunk
181
+ return ChunkResult(
182
+ chunks=[TextChunk(text=text, index=0)],
183
+ latency_ms=0,
184
+ )
185
+
186
+ # Classify each sentence boundary
187
+ chunks: List[str] = [sentences[0]]
188
+ hypotheses = list(CHUNK_HYPOTHESES.values())
189
+
190
+ for i in range(1, len(sentences)):
191
+ prev_sentence = sentences[i - 1]
192
+ curr_sentence = sentences[i]
193
+
194
+ # Create context for classification
195
+ # We ask: does curr_sentence continue prev_sentence's thought?
196
+ context = f"{prev_sentence} {curr_sentence}"
197
+
198
+ try:
199
+ result = self._classifier(
200
+ context,
201
+ candidate_labels=hypotheses,
202
+ hypothesis_template="{}",
203
+ multi_label=False,
204
+ )
205
+
206
+ # Check if "starts new thought" won
207
+ starts_new_idx = hypotheses.index(CHUNK_HYPOTHESES["starts_new"])
208
+ starts_new_score = 0.0
209
+
210
+ for j, label in enumerate(result["labels"]):
211
+ if label == CHUNK_HYPOTHESES["starts_new"]:
212
+ starts_new_score = result["scores"][j]
213
+ break
214
+
215
+ if starts_new_score > NEW_THOUGHT_THRESHOLD:
216
+ # Start new chunk
217
+ chunks.append(curr_sentence)
218
+ logger.debug(f"New chunk at sentence {i}: score={starts_new_score:.2f}")
219
+ else:
220
+ # Merge with previous chunk
221
+ chunks[-1] = f"{chunks[-1]} {curr_sentence}"
222
+ logger.debug(f"Merged sentence {i}: score={starts_new_score:.2f}")
223
+
224
+ except Exception as e:
225
+ logger.warning(f"Classification failed for sentence {i}, merging: {e}")
226
+ chunks[-1] = f"{chunks[-1]} {curr_sentence}"
227
+
228
+ latency_ms = int((time.time() - start_time) * 1000)
229
+
230
+ # Convert to TextChunk objects
231
+ text_chunks = [
232
+ TextChunk(text=chunk.strip(), index=i)
233
+ for i, chunk in enumerate(chunks)
234
+ if chunk.strip()
235
+ ]
236
+
237
+ logger.info(f"Chunk detection: {len(sentences)} sentences -> {len(text_chunks)} chunks (latency={latency_ms}ms)")
238
+
239
+ return ChunkResult(
240
+ chunks=text_chunks,
241
+ latency_ms=latency_ms,
242
+ )
243
+
244
+ def detect_with_fallback(self, text: str) -> ChunkResult:
245
+ """Detect chunks with fallback to single chunk on error.
246
+
247
+ Args:
248
+ text: The NPC response text to analyze
249
+
250
+ Returns:
251
+ ChunkResult (defaults to single chunk on error)
252
+ """
253
+ try:
254
+ return self.detect(text)
255
+ except Exception as e:
256
+ logger.warning(f"Chunk detection failed, returning single chunk: {e}")
257
+ return ChunkResult(
258
+ chunks=[TextChunk(text=text, index=0)] if text else [],
259
+ latency_ms=0,
260
+ )
261
+
262
+ @property
263
+ def is_loaded(self) -> bool:
264
+ """Check if the model is loaded."""
265
+ return self._classifier is not None
266
+
267
+ @property
268
+ def device(self) -> Optional[str]:
269
+ """Get the device being used."""
270
+ return self._device
@@ -240,12 +240,46 @@ class LoreguardCLI:
240
240
  try:
241
241
  llm_proxy = LLMProxy(f"http://127.0.0.1:{self.port}")
242
242
 
243
+ # Initialize intent classifier (ADR-0010)
244
+ intent_classifier = None
245
+ try:
246
+ from .intent_classifier import IntentClassifier
247
+ log.info("Loading intent classifier...")
248
+ intent_classifier = IntentClassifier()
249
+ if intent_classifier.load_model():
250
+ log.info(f"Intent classifier ready (device: {intent_classifier.device})")
251
+ else:
252
+ log.warning("Intent classifier failed to load")
253
+ intent_classifier = None
254
+ except Exception as e:
255
+ log.warning(f"Intent classifier error: {e}")
256
+
257
+ # Initialize chunk detector (ADR-0023) - shares model with intent classifier
258
+ chunk_detector = None
259
+ try:
260
+ from .chunk_detector import ChunkDetector
261
+ log.info("Loading chunk detector...")
262
+ chunk_detector = ChunkDetector()
263
+ if intent_classifier is not None and intent_classifier.is_loaded:
264
+ chunk_detector.set_classifier(intent_classifier._classifier)
265
+ log.info("Chunk detector ready (shared model)")
266
+ else:
267
+ if chunk_detector.load_model():
268
+ log.info(f"Chunk detector ready (device: {chunk_detector.device})")
269
+ else:
270
+ log.warning("Chunk detector failed to load")
271
+ chunk_detector = None
272
+ except Exception as e:
273
+ log.warning(f"Chunk detector error: {e}")
274
+
243
275
  self._tunnel = BackendTunnel(
244
276
  backend_url=self.backend_url,
245
277
  llm_proxy=llm_proxy,
246
278
  worker_id=self.worker_id,
247
279
  worker_token=self.token,
248
280
  model_id=self.model_path.stem if self.model_path else "unknown",
281
+ intent_classifier=intent_classifier,
282
+ chunk_detector=chunk_detector,
249
283
  )
250
284
 
251
285
  self._tunnel.on_request_complete = self._on_request_complete
@@ -253,9 +287,11 @@ class LoreguardCLI:
253
287
  # Start SDK server for local game clients
254
288
  from .http_server import start_sdk_server
255
289
  try:
290
+ sdk_port = int(os.environ.get("LOREGUARD_SDK_PORT", "0"))
256
291
  self._sdk_port = start_sdk_server(
257
292
  tunnel=self._tunnel,
258
293
  main_loop=asyncio.get_running_loop(),
294
+ port=sdk_port,
259
295
  )
260
296
  log.info(f"SDK server listening on 127.0.0.1:{self._sdk_port}")
261
297
  except Exception as e:
@@ -274,9 +274,12 @@ def download_dialogue_act_model(progress_callback=None, error_callback=None) ->
274
274
  DEFAULT_DIALOGUE_ACT_MODEL,
275
275
  local_files_only=False,
276
276
  tqdm_class=TqdmCallback,
277
+ max_workers=1, # Avoid subprocess fd issues in ThreadPoolExecutor
277
278
  )
278
279
  else:
279
- snapshot_download(DEFAULT_DIALOGUE_ACT_MODEL, local_files_only=False)
280
+ # max_workers=1 prevents "bad value(s) in fds_to_keep" error
281
+ # when running from ThreadPoolExecutor
282
+ snapshot_download(DEFAULT_DIALOGUE_ACT_MODEL, local_files_only=False, max_workers=1)
280
283
 
281
284
  logger.info("Dialogue act model downloaded successfully")
282
285
  return True
@@ -218,6 +218,8 @@ class EmbeddedHTTPServer:
218
218
  except (asyncio.TimeoutError, TimeoutError, Exception):
219
219
  break
220
220
  break
221
+ elif msg_type == "pass_update":
222
+ yield f"event: pass_update\ndata: {json.dumps(msg.get('data', {}))}\n\n"
221
223
  elif msg_type == "follow_up":
222
224
  # Follow-up received before done (shouldn't happen, but handle gracefully)
223
225
  yield f"event: follow_up\ndata: {json.dumps(msg.get('data', {}))}\n\n"
@@ -236,6 +238,7 @@ class EmbeddedHTTPServer:
236
238
 
237
239
  async def _wait_for_response(self, request_id: str, queue: asyncio.Queue) -> dict:
238
240
  """Wait for complete response (non-streaming mode)."""
241
+ pipeline_trace = []
239
242
  try:
240
243
  while True:
241
244
  try:
@@ -258,12 +261,18 @@ class EmbeddedHTTPServer:
258
261
  msg_type = msg.get("type")
259
262
  if msg_type == "done":
260
263
  data = msg.get("data", {})
261
- return {
264
+ result = {
262
265
  "response": data.get("speech", ""),
263
266
  "verified": data.get("verified", False),
264
267
  "citations": data.get("citations", []),
265
268
  }
266
- elif msg_type in ("filler", "pass_update"):
269
+ if pipeline_trace:
270
+ result["pipeline_trace"] = pipeline_trace
271
+ return result
272
+ elif msg_type == "pass_update":
273
+ pipeline_trace.append(msg.get("data", {}))
274
+ continue
275
+ elif msg_type == "filler":
267
276
  continue
268
277
  elif msg_type == "error":
269
278
  return {"error": msg.get("error", "Unknown error")}
@@ -1,7 +1,7 @@
1
1
  """Intent Classification service for adaptive retrieval (ADR-0010).
2
2
 
3
3
  This module provides zero-shot intent classification for the NPC dialogue pipeline.
4
- It uses BART-large-MNLI to classify user messages into retrieval strategy categories:
4
+ It uses DeBERTa-v3-large-zeroshot to classify user messages into retrieval strategy categories:
5
5
  - A_NO_RETRIEVAL: Greetings, chitchat, farewells (skip retrieval)
6
6
  - B_WORKING_MEMORY: Simple identity/state questions (working memory only)
7
7
  - C_LIGHT_RETRIEVAL: Direct factual questions (top 3 sources)
@@ -41,12 +41,15 @@ class IntentResult:
41
41
  DEFAULT_INTENT_MODEL = "MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0"
42
42
 
43
43
  # Intent hypothesis templates for zero-shot classification
44
- # Each intent maps to a hypothesis that BART will evaluate
44
+ # Each intent maps to a hypothesis that DeBERTa will evaluate
45
+ # NOTE: Hypotheses must be specific to avoid misclassification of mixed-intent messages
46
+ # (e.g., "hey, how are you? what's the ISP fee?" should match LIGHT_RETRIEVAL, not WORKING_MEMORY)
47
+ # TODO: Move hypotheses to backend for centralized control (see loreguard-engine issue)
45
48
  INTENT_HYPOTHESES = {
46
- IntentLabel.NO_RETRIEVAL: "This is a greeting, chitchat, or farewell that does not require any information retrieval.",
47
- IntentLabel.WORKING_MEMORY: "This is a simple question about identity, name, or basic state that only requires basic memory.",
48
- IntentLabel.LIGHT_RETRIEVAL: "This is a direct factual question that requires looking up specific information.",
49
- IntentLabel.FULL_RETRIEVAL: "This is a complex question that requires comprehensive information retrieval and analysis.",
49
+ IntentLabel.NO_RETRIEVAL: "This is a greeting, farewell, or a vague question about availability without asking for any specific information.",
50
+ IntentLabel.WORKING_MEMORY: "This asks about the person's current life, recent experiences, what they've been up to, or how things are going for them.",
51
+ IntentLabel.LIGHT_RETRIEVAL: "This asks for a specific fact, number, price, fee, date, location, or procedure that requires looking up information.",
52
+ IntentLabel.FULL_RETRIEVAL: "This is a complex question requiring analysis of multiple topics or understanding relationships between different pieces of information.",
50
53
  }
51
54
 
52
55
  # Promise detection hypothesis for follow-up triggers (ADR-0020)
@@ -63,7 +66,7 @@ class PromiseResult:
63
66
 
64
67
 
65
68
  class IntentClassifier:
66
- """Service for zero-shot intent classification using BART-large-MNLI.
69
+ """Service for zero-shot intent classification using DeBERTa-v3-large-zeroshot.
67
70
 
68
71
  Uses zero-shot classification to categorize user messages into one of four
69
72
  retrieval strategies without any fine-tuning required.
@@ -261,7 +264,7 @@ def is_intent_model_available() -> bool:
261
264
  """Check if the intent model is available in HuggingFace cache.
262
265
 
263
266
  The transformers library caches models in ~/.cache/huggingface/hub/.
264
- This function checks if the BART model has been downloaded.
267
+ This function checks if the DeBERTa model has been downloaded.
265
268
  """
266
269
  try:
267
270
  from huggingface_hub import try_to_load_from_cache
@@ -314,9 +317,12 @@ def download_intent_model(progress_callback=None, error_callback=None) -> bool:
314
317
  DEFAULT_INTENT_MODEL,
315
318
  local_files_only=False,
316
319
  tqdm_class=TqdmCallback,
320
+ max_workers=1, # Avoid subprocess fd issues in ThreadPoolExecutor
317
321
  )
318
322
  else:
319
- snapshot_download(DEFAULT_INTENT_MODEL, local_files_only=False)
323
+ # max_workers=1 prevents "bad value(s) in fds_to_keep" error
324
+ # when running from ThreadPoolExecutor
325
+ snapshot_download(DEFAULT_INTENT_MODEL, local_files_only=False, max_workers=1)
320
326
 
321
327
  logger.info("Intent model downloaded successfully")
322
328
  return True
@@ -145,6 +145,28 @@ async def startup():
145
145
  else:
146
146
  console.print("[yellow]Dialogue act classifier disabled (set LOREGUARD_DIALOGUE_ACT_ENABLED=true to enable)[/yellow]")
147
147
 
148
+ # Initialize chunk detector (ADR-0023 - for natural conversation breaks)
149
+ # Shares model with intent classifier if available
150
+ chunk_detector = None
151
+ if enable_intent:
152
+ console.print("[cyan]Initializing chunk detector...[/cyan]")
153
+ try:
154
+ from .chunk_detector import ChunkDetector
155
+ chunk_detector = ChunkDetector()
156
+ # Share classifier with intent_classifier if available
157
+ if intent_classifier is not None and intent_classifier.is_loaded:
158
+ chunk_detector.set_classifier(intent_classifier._classifier)
159
+ console.print("[green]Chunk detector ready (shared model)[/green]")
160
+ else:
161
+ if chunk_detector.load_model():
162
+ console.print(f"[green]Chunk detector ready (device: {chunk_detector.device})[/green]")
163
+ else:
164
+ console.print("[yellow]Warning: Chunk detector failed to load[/yellow]")
165
+ chunk_detector = None
166
+ except Exception as e:
167
+ console.print(f"[yellow]Warning: Chunk detector error: {e}[/yellow]")
168
+ chunk_detector = None
169
+
148
170
  # Connect to remote backend
149
171
  backend_url = get_config_value("BACKEND_URL", "wss://api.lorekeeper.ai/workers")
150
172
  worker_id = get_config_value("WORKER_ID", "")
@@ -159,6 +181,7 @@ async def startup():
159
181
  nli_service=nli_service,
160
182
  intent_classifier=intent_classifier,
161
183
  dialogue_act_classifier=dialogue_act_classifier,
184
+ chunk_detector=chunk_detector,
162
185
  )
163
186
  asyncio.create_task(tunnel.connect())
164
187
  elif backend_url:
@@ -404,9 +404,13 @@ def download_nli_model(progress_callback=None, error_callback=None) -> bool:
404
404
  DEFAULT_NLI_MODEL,
405
405
  local_files_only=False,
406
406
  tqdm_class=TqdmCallback,
407
+ max_workers=1, # Avoid subprocess fd issues in ThreadPoolExecutor
407
408
  )
408
409
  else:
409
- snapshot_download(DEFAULT_NLI_MODEL, local_files_only=False)
410
+ # max_workers=1 prevents "bad value(s) in fds_to_keep" error
411
+ # when running from ThreadPoolExecutor (parallel downloads spawn
412
+ # subprocesses that conflict with thread-based fd management)
413
+ snapshot_download(DEFAULT_NLI_MODEL, local_files_only=False, max_workers=1)
410
414
 
411
415
  logger.info("NLI model downloaded successfully")
412
416
  return True
@@ -2,6 +2,7 @@
2
2
 
3
3
  import asyncio
4
4
  import logging
5
+ import os
5
6
  from typing import TYPE_CHECKING
6
7
 
7
8
  from textual.app import ComposeResult
@@ -640,11 +641,12 @@ class MainScreen(Screen):
640
641
 
641
642
  # Wire up pass update callback to chat widget (for verbose mode)
642
643
  def on_pass_update(payload: dict) -> None:
644
+ log.debug(f"tunnel on_pass_update callback fired, payload keys: {list(payload.keys()) if payload else 'None'}")
643
645
  try:
644
646
  chat = self.query_one(NPCChat)
645
647
  chat.on_pass_update(payload)
646
- except Exception:
647
- pass
648
+ except Exception as e:
649
+ log.debug(f"tunnel on_pass_update callback error: {e}")
648
650
 
649
651
  app._tunnel.on_pass_update = on_pass_update
650
652
 
@@ -655,10 +657,12 @@ class MainScreen(Screen):
655
657
  # Log SDK server status
656
658
  pass # Could update a status widget here
657
659
 
660
+ sdk_port_env = int(os.environ.get("LOREGUARD_SDK_PORT", "0"))
658
661
  sdk_port = start_sdk_server(
659
662
  tunnel=app._tunnel,
660
663
  on_status_change=on_sdk_status,
661
664
  main_loop=asyncio.get_event_loop(),
665
+ port=sdk_port_env,
662
666
  )
663
667
  self._sdk_port = sdk_port
664
668
 
@@ -5,9 +5,12 @@ Uses the local proxy for NPC conversations with token streaming:
5
5
  """
6
6
 
7
7
  import json
8
+ import logging
8
9
  from typing import TYPE_CHECKING
9
10
 
10
11
  import httpx
12
+
13
+ logger = logging.getLogger(__name__)
11
14
  from textual.app import ComposeResult
12
15
  from textual.containers import Vertical, Horizontal, VerticalScroll
13
16
  from textual.widgets import Static, Input
@@ -799,11 +802,11 @@ class NPCChat(Vertical):
799
802
  if self._verbose:
800
803
  payload["verbose"] = True
801
804
 
805
+ local_url = get_local_proxy_url()
802
806
  try:
803
807
  await self._do_generate_streaming(payload, status, container)
804
808
  return
805
809
  except httpx.ConnectError as e:
806
- local_url = get_local_proxy_url()
807
810
  if self._verbose:
808
811
  if local_url:
809
812
  status.update(Text(f"Local proxy failed ({local_url}): {e}", style=FG_DIM))
@@ -812,7 +815,6 @@ class NPCChat(Vertical):
812
815
  else:
813
816
  status.update(Text("Local proxy unavailable, using cloud...", style=FG_DIM))
814
817
  except Exception as e:
815
- local_url = get_local_proxy_url()
816
818
  if self._verbose:
817
819
  status.update(Text(f"Local proxy error ({local_url}): {type(e).__name__}: {e}", style="#FF5555"))
818
820
  else:
@@ -895,6 +897,9 @@ class NPCChat(Vertical):
895
897
  container.scroll_end(animate=False)
896
898
  status.update(Text(f"Streaming... ({tokens_received} tokens)", style=CYAN))
897
899
 
900
+ elif event_type == "pass_update":
901
+ self.on_pass_update(data)
902
+
898
903
  elif event_type == "done":
899
904
  final_data = data
900
905
  speech = data.get("speech", speech)
@@ -987,15 +992,18 @@ class NPCChat(Vertical):
987
992
 
988
993
  Called by the tunnel when it receives pass updates via WebSocket.
989
994
  """
995
+ logger.debug(f"on_pass_update called: verbose={self._verbose}, visible={self._visible}, payload_keys={list(payload.keys()) if payload else 'None'}")
990
996
  if not self._verbose or not self._visible:
997
+ logger.debug(f"on_pass_update skipped: verbose={self._verbose}, visible={self._visible}")
991
998
  return
992
999
 
993
1000
  # Add pass to debug panel instead of chat
994
1001
  try:
995
1002
  debug_panel = self.query_one(DebugPanel)
996
1003
  debug_panel.add_pass(payload)
997
- except Exception:
998
- pass
1004
+ logger.debug(f"on_pass_update: added pass to debug panel")
1005
+ except Exception as e:
1006
+ logger.debug(f"on_pass_update exception: {e}")
999
1007
 
1000
1008
  def action_close_chat(self) -> None:
1001
1009
  """Close the chat widget."""
@@ -1480,6 +1480,34 @@ async def step_start(
1480
1480
  status.log(f"Dialogue act error: {e}", "error")
1481
1481
  dialogue_act_classifier = None
1482
1482
 
1483
+ # Initialize chunk detector (ADR-0023) - shares model with intent classifier
1484
+ chunk_detector = None
1485
+ if intent_enabled:
1486
+ status.set_line("chunk", "Chunk Detect", "Loading...")
1487
+ try:
1488
+ from .chunk_detector import ChunkDetector
1489
+ chunk_detector = ChunkDetector()
1490
+ # Share classifier with intent_classifier if available
1491
+ if intent_classifier is not None and intent_classifier.is_loaded:
1492
+ chunk_detector.set_classifier(intent_classifier._classifier)
1493
+ status.set_line("chunk", "Chunk Detect", f"✓ Ready (shared)")
1494
+ else:
1495
+ # Load independently
1496
+ loop = asyncio.get_event_loop()
1497
+ with concurrent.futures.ThreadPoolExecutor() as pool:
1498
+ with suppress_external_output():
1499
+ model_loaded = await loop.run_in_executor(pool, chunk_detector.load_model)
1500
+ if model_loaded:
1501
+ status.set_line("chunk", "Chunk Detect", f"✓ Ready ({chunk_detector.device})")
1502
+ else:
1503
+ status.set_line("chunk", "Chunk Detect", "✗ Failed to load")
1504
+ status.log("Chunk detector failed to load - continuing without", "warn")
1505
+ chunk_detector = None
1506
+ except Exception as e:
1507
+ status.set_line("chunk", "Chunk Detect", f"✗ Error: {e}")
1508
+ status.log(f"Chunk detector error: {e}", "error")
1509
+ chunk_detector = None
1510
+
1483
1511
  model_id = _resolve_backend_model_id(model_path.stem)
1484
1512
  tunnel = BackendTunnel(
1485
1513
  backend_url="wss://api.loreguard.com/workers",
@@ -1490,6 +1518,7 @@ async def step_start(
1490
1518
  nli_service=nli_service,
1491
1519
  intent_classifier=intent_classifier,
1492
1520
  dialogue_act_classifier=dialogue_act_classifier,
1521
+ chunk_detector=chunk_detector,
1493
1522
  log_callback=status.log,
1494
1523
  )
1495
1524
  asyncio.create_task(tunnel.connect())
@@ -1567,7 +1596,7 @@ async def step_start(
1567
1596
  status.stop()
1568
1597
  from .npc_chat import run_npc_chat
1569
1598
  try:
1570
- await run_npc_chat(api_token=token, tunnel=tunnel)
1599
+ await run_npc_chat(api_token=token, tunnel=tunnel, verbose=_verbose)
1571
1600
  except KeyboardInterrupt:
1572
1601
  pass
1573
1602
  status.start()
@@ -600,7 +600,7 @@ wheels = [
600
600
 
601
601
  [[package]]
602
602
  name = "loreguard-cli"
603
- version = "0.11.2"
603
+ version = "0.12.1"
604
604
  source = { editable = "." }
605
605
  dependencies = [
606
606
  { name = "aiofiles" },
@@ -611,6 +611,7 @@ dependencies = [
611
611
  { name = "python-dotenv" },
612
612
  { name = "rich" },
613
613
  { name = "textual" },
614
+ { name = "tf-keras" },
614
615
  { name = "torch" },
615
616
  { name = "transformers" },
616
617
  { name = "uvicorn" },
@@ -641,6 +642,7 @@ requires-dist = [
641
642
  { name = "rich", specifier = ">=13.0.0" },
642
643
  { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
643
644
  { name = "textual", specifier = ">=0.47.0" },
645
+ { name = "tf-keras", specifier = ">=2.16.0" },
644
646
  { name = "torch", specifier = ">=2.0.0" },
645
647
  { name = "transformers", specifier = ">=4.36.0" },
646
648
  { name = "uvicorn", specifier = ">=0.27.0" },
@@ -2055,6 +2057,18 @@ wheels = [
2055
2057
  { url = "https://files.pythonhosted.org/packages/84/38/47fab2a5fad163ca4851f7a20eb2442491cc63bf2756ec4ef161bc1461dd/textual-7.0.1-py3-none-any.whl", hash = "sha256:f9b7d16fa9b640bfff2a2008bf31e3f2d4429dc85e07a9583be033840ed15174", size = 715268, upload-time = "2026-01-07T13:07:22.006Z" },
2056
2058
  ]
2057
2059
 
2060
+ [[package]]
2061
+ name = "tf-keras"
2062
+ version = "2.20.1"
2063
+ source = { registry = "https://pypi.org/simple" }
2064
+ dependencies = [
2065
+ { name = "tensorflow" },
2066
+ ]
2067
+ sdist = { url = "https://files.pythonhosted.org/packages/42/38/6060f6c7472439bb3890b9094d69d31d9f8d5da123b16c738773e70fff91/tf_keras-2.20.1.tar.gz", hash = "sha256:884be5938fb0b2b53b1583c1ae2b660ef87215377c29b5b6a77fd221b472aeaf", size = 1254487, upload-time = "2025-09-04T21:23:41.81Z" }
2068
+ wheels = [
2069
+ { url = "https://files.pythonhosted.org/packages/85/6b/d9a8202bfe5c9e3b078cf550bafab962aa9d6b1a1f1180f0065399d4c9b2/tf_keras-2.20.1-py3-none-any.whl", hash = "sha256:3f0e0a34d9a4c8758f24fdc1053e6e335f16ab5534c7d34f1899b8924779760c", size = 1694335, upload-time = "2025-09-04T21:23:40.153Z" },
2070
+ ]
2071
+
2058
2072
  [[package]]
2059
2073
  name = "tokenizers"
2060
2074
  version = "0.22.1"
File without changes
File without changes