bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bot_knows/__init__.py +70 -0
  2. bot_knows/config.py +115 -0
  3. bot_knows/domain/__init__.py +5 -0
  4. bot_knows/domain/chat.py +62 -0
  5. bot_knows/domain/message.py +64 -0
  6. bot_knows/domain/relation.py +56 -0
  7. bot_knows/domain/topic.py +132 -0
  8. bot_knows/domain/topic_evidence.py +55 -0
  9. bot_knows/importers/__init__.py +12 -0
  10. bot_knows/importers/base.py +116 -0
  11. bot_knows/importers/chatgpt.py +154 -0
  12. bot_knows/importers/claude.py +172 -0
  13. bot_knows/importers/generic_json.py +272 -0
  14. bot_knows/importers/registry.py +125 -0
  15. bot_knows/infra/__init__.py +5 -0
  16. bot_knows/infra/llm/__init__.py +6 -0
  17. bot_knows/infra/llm/anthropic_provider.py +172 -0
  18. bot_knows/infra/llm/openai_provider.py +195 -0
  19. bot_knows/infra/mongo/__init__.py +5 -0
  20. bot_knows/infra/mongo/client.py +145 -0
  21. bot_knows/infra/mongo/repositories.py +348 -0
  22. bot_knows/infra/neo4j/__init__.py +5 -0
  23. bot_knows/infra/neo4j/client.py +152 -0
  24. bot_knows/infra/neo4j/graph_repository.py +329 -0
  25. bot_knows/infra/redis/__init__.py +6 -0
  26. bot_knows/infra/redis/cache.py +198 -0
  27. bot_knows/infra/redis/client.py +193 -0
  28. bot_knows/interfaces/__init__.py +18 -0
  29. bot_knows/interfaces/embedding.py +55 -0
  30. bot_knows/interfaces/graph.py +194 -0
  31. bot_knows/interfaces/llm.py +70 -0
  32. bot_knows/interfaces/recall.py +92 -0
  33. bot_knows/interfaces/storage.py +225 -0
  34. bot_knows/logging.py +101 -0
  35. bot_knows/models/__init__.py +22 -0
  36. bot_knows/models/chat.py +55 -0
  37. bot_knows/models/ingest.py +70 -0
  38. bot_knows/models/message.py +49 -0
  39. bot_knows/models/recall.py +58 -0
  40. bot_knows/models/topic.py +100 -0
  41. bot_knows/orchestrator.py +398 -0
  42. bot_knows/py.typed +0 -0
  43. bot_knows/services/__init__.py +24 -0
  44. bot_knows/services/chat_processing.py +182 -0
  45. bot_knows/services/dedup_service.py +161 -0
  46. bot_knows/services/graph_service.py +217 -0
  47. bot_knows/services/message_builder.py +135 -0
  48. bot_knows/services/recall_service.py +296 -0
  49. bot_knows/services/tasks.py +128 -0
  50. bot_knows/services/topic_extraction.py +199 -0
  51. bot_knows/utils/__init__.py +22 -0
  52. bot_knows/utils/hashing.py +126 -0
  53. bot_knows-0.1.0.dist-info/METADATA +294 -0
  54. bot_knows-0.1.0.dist-info/RECORD +56 -0
  55. bot_knows-0.1.0.dist-info/WHEEL +4 -0
  56. bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,398 @@
1
+ """BotKnows orchestrator for high-level knowledge base operations.
2
+
3
+ This module provides the main entry point for the bot_knows package,
4
+ orchestrating all services for chat ingestion and knowledge retrieval.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from bot_knows.config import BotKnowsConfig
12
+ from bot_knows.importers.base import ChatImportAdapter
13
+ from bot_knows.interfaces.embedding import EmbeddingServiceInterface
14
+ from bot_knows.interfaces.graph import GraphServiceInterface
15
+ from bot_knows.interfaces.llm import LLMInterface
16
+ from bot_knows.interfaces.storage import StorageInterface
17
+ from bot_knows.logging import get_logger
18
+ from bot_knows.models.ingest import ChatIngest
19
+ from bot_knows.models.message import MessageDTO
20
+ from bot_knows.models.recall import TopicRecallStateDTO
21
+ from bot_knows.models.topic import TopicEvidenceDTO
22
+ from bot_knows.services.chat_processing import ChatProcessingService
23
+ from bot_knows.services.dedup_service import DedupAction, DedupService
24
+ from bot_knows.services.graph_service import GraphService
25
+ from bot_knows.services.message_builder import MessageBuilder
26
+ from bot_knows.services.recall_service import RecallService
27
+ from bot_knows.services.topic_extraction import TopicExtractionService
28
+
29
+ __all__ = ["BotKnows", "InsertResult"]
30
+
31
+ logger = get_logger(__name__)
32
+
33
+
34
+ @dataclass
35
+ class InsertResult:
36
+ """Statistics from chat insertion."""
37
+
38
+ chats_processed: int = 0
39
+ chats_new: int = 0
40
+ chats_skipped: int = 0
41
+ messages_created: int = 0
42
+ topics_created: int = 0
43
+ topics_merged: int = 0
44
+ topics_soft_matched: int = 0
45
+ errors: list[str] = field(default_factory=list)
46
+
47
+
48
+ class BotKnows:
49
+ """Main orchestrator and retriver for bot_knows knowledge base.
50
+
51
+ Accepts implementation classes. Config is loaded from .env automatically.
52
+ For custom implementations, set config_class = None and pass custom_config dict.
53
+
54
+ Example:
55
+ async with BotKnows(
56
+ storage_class=MongoStorageRepository,
57
+ graphdb_class=Neo4jGraphRepository,
58
+ llm_class=OpenAIProvider,
59
+ ) as bk:
60
+ result = await bk.insert_chats("export.json", ChatGPTAdapter)
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ storage_class: type[StorageInterface],
66
+ graphdb_class: type[GraphServiceInterface],
67
+ llm_class: type[LLMInterface],
68
+ embedding_class: type[EmbeddingServiceInterface] | None = None,
69
+ *,
70
+ storage_custom_config: dict[str, Any] | None = None,
71
+ graphdb_custom_config: dict[str, Any] | None = None,
72
+ llm_custom_config: dict[str, Any] | None = None,
73
+ embedding_custom_config: dict[str, Any] | None = None,
74
+ ) -> None:
75
+ """Initialize BotKnows with implementation classes.
76
+
77
+ Args:
78
+ storage_class: Storage implementation class
79
+ graphdb_class: Graph DB implementation class
80
+ llm_class: LLM implementation class
81
+ embedding_class: Embedding implementation class (defaults to llm_class)
82
+ storage_custom_config: Custom config dict if storage_class.config_class is None
83
+ graphdb_custom_config: Custom config dict if graphdb_class.config_class is None
84
+ llm_custom_config: Custom config dict if llm_class.config_class is None
85
+ embedding_custom_config: Custom config dict if embedding_class.config_class is None
86
+ """
87
+ self._config = BotKnowsConfig() # Loads from .env
88
+
89
+ self._storage_class = storage_class
90
+ self._graphdb_class = graphdb_class
91
+ self._llm_class = llm_class
92
+ self._embedding_class = embedding_class or llm_class
93
+
94
+ self._storage_custom_config = storage_custom_config
95
+ self._graphdb_custom_config = graphdb_custom_config
96
+ self._llm_custom_config = llm_custom_config
97
+ self._embedding_custom_config = embedding_custom_config
98
+
99
+ # Instances (created on connect)
100
+ self._storage: StorageInterface | None = None
101
+ self._graph: GraphServiceInterface | None = None
102
+ self._llm: LLMInterface | None = None
103
+ self._embedding: EmbeddingServiceInterface | None = None
104
+
105
+ # Services (wired on connect)
106
+ self._chat_processor: ChatProcessingService | None = None
107
+ self._message_builder = MessageBuilder()
108
+ self._topic_extractor: TopicExtractionService | None = None
109
+ self._dedup_service: DedupService | None = None
110
+ self._graph_service: GraphService | None = None
111
+ self._recall_service: RecallService | None = None
112
+
113
+ self._connected = False
114
+
115
+ async def _instantiate_class(
116
+ self,
117
+ cls: type,
118
+ custom_config: dict[str, Any] | None,
119
+ ) -> Any:
120
+ """Instantiate an implementation class.
121
+
122
+ If cls.config_class is set, instantiate config (loads from .env).
123
+ If cls.config_class is None, use custom_config dict.
124
+ """
125
+ config_class = getattr(cls, "config_class", None)
126
+
127
+ if config_class is None:
128
+ # Custom implementation - use dict
129
+ if custom_config is None:
130
+ raise ValueError(
131
+ f"{cls.__name__} has config_class=None but no custom_config provided"
132
+ )
133
+ return await cls.from_dict(custom_config)
134
+ else:
135
+ # Standard implementation - instantiate settings (loads from .env)
136
+ config = config_class()
137
+ return await cls.from_config(config)
138
+
139
+ async def _connect(self) -> None:
140
+ """Initialize connections and services."""
141
+ if self._connected:
142
+ return
143
+
144
+ # Instantiate implementations
145
+ self._storage = await self._instantiate_class(
146
+ self._storage_class, self._storage_custom_config
147
+ )
148
+ self._graph = await self._instantiate_class(
149
+ self._graphdb_class, self._graphdb_custom_config
150
+ )
151
+ self._llm = await self._instantiate_class(self._llm_class, self._llm_custom_config)
152
+
153
+ if self._embedding_class is self._llm_class:
154
+ self._embedding = self._llm # type: ignore[assignment]
155
+ else:
156
+ self._embedding = await self._instantiate_class(
157
+ self._embedding_class, self._embedding_custom_config
158
+ )
159
+
160
+ # Wire services
161
+ self._chat_processor = ChatProcessingService(self._storage, self._llm)
162
+ self._topic_extractor = TopicExtractionService(self._llm, self._embedding)
163
+ self._dedup_service = DedupService(
164
+ self._embedding,
165
+ self._storage,
166
+ high_threshold=self._config.dedup_high_threshold,
167
+ low_threshold=self._config.dedup_low_threshold,
168
+ )
169
+ self._graph_service = GraphService(self._graph)
170
+ self._recall_service = RecallService(
171
+ self._storage,
172
+ self._graph,
173
+ stability_k=self._config.recall_stability_k,
174
+ semantic_boost=self._config.recall_semantic_boost,
175
+ )
176
+
177
+ self._connected = True
178
+ logger.info("bot_knows_connected")
179
+
180
+ async def _disconnect(self) -> None:
181
+ """Close all connections."""
182
+ if self._storage and hasattr(self._storage, "close"):
183
+ await self._storage.close()
184
+ if self._graph and hasattr(self._graph, "close"):
185
+ await self._graph.close()
186
+ if self._llm and hasattr(self._llm, "close"):
187
+ await self._llm.close()
188
+ if (
189
+ self._embedding
190
+ and self._embedding is not self._llm
191
+ and hasattr(self._embedding, "close")
192
+ ):
193
+ await self._embedding.close()
194
+
195
+ self._connected = False
196
+ logger.info("bot_knows_disconnected")
197
+
198
+ async def __aenter__(self) -> "BotKnows":
199
+ """Async context manager entry - connects automatically."""
200
+ await self._connect()
201
+ return self
202
+
203
+ async def __aexit__(
204
+ self,
205
+ exc_type: type[BaseException] | None,
206
+ exc_val: BaseException | None,
207
+ exc_tb: Any,
208
+ ) -> None:
209
+ """Async context manager exit - disconnects automatically."""
210
+ await self._disconnect()
211
+
212
+ def _ensure_connected(self) -> None:
213
+ if not self._connected:
214
+ raise RuntimeError("BotKnows not connected. Use 'async with BotKnows(...) as bk:'")
215
+
216
+ # === MAIN WORKFLOW ===
217
+
218
+ async def insert_chats(
219
+ self,
220
+ chats: dict[str, Any] | str | Path,
221
+ adapter_class: type[ChatImportAdapter],
222
+ ) -> InsertResult:
223
+ """Ingest chats through the complete processing pipeline.
224
+
225
+ Args:
226
+ chats: Raw export data (dict), JSON string, or path to JSON file
227
+ adapter_class: Import adapter class to use for parsing
228
+
229
+ Returns:
230
+ InsertResult with statistics
231
+ """
232
+ self._ensure_connected()
233
+
234
+ # Parse input
235
+ adapter = adapter_class()
236
+ chat_ingests: list[ChatIngest]
237
+
238
+ if isinstance(chats, Path):
239
+ chat_ingests = adapter.parse_file(chats)
240
+ elif isinstance(chats, str):
241
+ path = Path(chats)
242
+ if path.exists():
243
+ chat_ingests = adapter.parse_file(path)
244
+ else:
245
+ chat_ingests = adapter.parse_string(chats)
246
+ else:
247
+ chat_ingests = adapter.parse(chats)
248
+
249
+ result = InsertResult()
250
+
251
+ for chat_ingest in chat_ingests:
252
+ try:
253
+ await self._process_single_chat(chat_ingest, result)
254
+ except Exception as e:
255
+ logger.error("chat_processing_failed", error=str(e))
256
+ result.errors.append(f"Chat '{chat_ingest.title}': {e}")
257
+
258
+ logger.info(
259
+ "insert_chats_completed",
260
+ chats_processed=result.chats_processed,
261
+ chats_new=result.chats_new,
262
+ topics_created=result.topics_created,
263
+ )
264
+
265
+ return result
266
+
267
+ async def _process_single_chat(
268
+ self,
269
+ chat_ingest: ChatIngest,
270
+ result: InsertResult,
271
+ ) -> None:
272
+ """Process a single chat through the pipeline."""
273
+ assert self._chat_processor is not None
274
+ assert self._storage is not None
275
+ assert self._graph_service is not None
276
+
277
+ result.chats_processed += 1
278
+
279
+ # Step 1: Process chat (identity, classification, persistence)
280
+ chat, is_new = await self._chat_processor.process(chat_ingest)
281
+
282
+ if not is_new:
283
+ result.chats_skipped += 1
284
+ return
285
+
286
+ result.chats_new += 1
287
+
288
+ # Step 2: Build and save messages
289
+ messages = self._message_builder.build(chat_ingest.messages, chat.id)
290
+ for message in messages:
291
+ await self._storage.save_message(message)
292
+ result.messages_created += 1
293
+
294
+ # Step 3: Create graph nodes for chat and messages
295
+ await self._graph_service.add_chat_with_messages(chat, messages)
296
+
297
+ # Step 4: Process each message for topics
298
+ for message in messages:
299
+ await self._process_message_topics(message, result)
300
+
301
+ async def _process_message_topics(
302
+ self,
303
+ message: MessageDTO,
304
+ result: InsertResult,
305
+ ) -> None:
306
+ """Extract and process topics from a message."""
307
+ assert self._topic_extractor is not None
308
+ assert self._dedup_service is not None
309
+ assert self._storage is not None
310
+ assert self._graph_service is not None
311
+ assert self._recall_service is not None
312
+
313
+ candidates = await self._topic_extractor.extract(message)
314
+
315
+ for candidate in candidates:
316
+ dedup_result = await self._dedup_service.check_duplicate(candidate.embedding)
317
+
318
+ if dedup_result.action == DedupAction.MERGE:
319
+ assert dedup_result.existing_topic is not None
320
+ topic, evidence = await self._topic_extractor.create_evidence_for_existing(
321
+ candidate, dedup_result.existing_topic
322
+ )
323
+ await self._storage.update_topic(topic)
324
+ await self._storage.append_evidence(evidence)
325
+ await self._graph_service.add_evidence_to_existing_topic(topic, evidence)
326
+ result.topics_merged += 1
327
+
328
+ elif dedup_result.action == DedupAction.SOFT_MATCH:
329
+ assert dedup_result.existing_topic is not None
330
+ topic, evidence = await self._topic_extractor.create_topic_and_evidence(candidate)
331
+ await self._storage.save_topic(topic)
332
+ await self._storage.append_evidence(evidence)
333
+ await self._graph_service.add_topic_with_evidence(topic, evidence)
334
+ await self._graph_service.create_potential_duplicate_link(
335
+ topic.topic_id,
336
+ dedup_result.existing_topic.topic_id,
337
+ dedup_result.similarity,
338
+ )
339
+ result.topics_soft_matched += 1
340
+ result.topics_created += 1
341
+
342
+ else: # NEW
343
+ topic, evidence = await self._topic_extractor.create_topic_and_evidence(candidate)
344
+ await self._storage.save_topic(topic)
345
+ await self._storage.append_evidence(evidence)
346
+ await self._graph_service.add_topic_with_evidence(topic, evidence)
347
+ result.topics_created += 1
348
+
349
+ # Reinforce recall state
350
+ await self._recall_service.reinforce(
351
+ topic.topic_id,
352
+ confidence=candidate.confidence,
353
+ context="passive",
354
+ )
355
+
356
+ # === RETRIEVAL METHODS ===
357
+
358
+ async def get_messages_for_chat(self, chat_id: str) -> list[MessageDTO]:
359
+ """Get all messages for a chat."""
360
+ self._ensure_connected()
361
+ assert self._storage is not None
362
+ return await self._storage.get_messages_for_chat(chat_id)
363
+
364
+ async def get_related_topics(self, topic_id: str, limit: int = 10) -> list[tuple[str, float]]:
365
+ """Get topics related to a given topic."""
366
+ self._ensure_connected()
367
+ assert self._graph is not None
368
+ return await self._graph.get_related_topics(topic_id, limit)
369
+
370
+ async def get_topic_evidence(self, topic_id: str) -> list[TopicEvidenceDTO]:
371
+ """Get all evidence for a topic."""
372
+ self._ensure_connected()
373
+ assert self._storage is not None
374
+ return await self._storage.get_evidence_for_topic(topic_id)
375
+
376
+ async def get_chat_topics(self, chat_id: str) -> list[str]:
377
+ """Get all topic IDs associated with a chat's messages."""
378
+ self._ensure_connected()
379
+ assert self._graph is not None
380
+ return await self._graph.get_chat_topics(chat_id)
381
+
382
+ async def get_recall_state(self, topic_id: str) -> TopicRecallStateDTO | None:
383
+ """Get recall state for a topic."""
384
+ self._ensure_connected()
385
+ assert self._storage is not None
386
+ return await self._storage.get_recall_state(topic_id)
387
+
388
+ async def get_due_topics(self, threshold: float = 0.3) -> list[TopicRecallStateDTO]:
389
+ """Get topics due for recall review."""
390
+ self._ensure_connected()
391
+ assert self._storage is not None
392
+ return await self._storage.get_due_topics(threshold)
393
+
394
+ async def get_all_recall_states(self) -> list[TopicRecallStateDTO]:
395
+ """Get all recall states."""
396
+ self._ensure_connected()
397
+ assert self._storage is not None
398
+ return await self._storage.get_all_recall_states()
bot_knows/py.typed ADDED
File without changes
@@ -0,0 +1,24 @@
1
+ """Service layer for bot_knows.
2
+
3
+ This module exports the main service entry points.
4
+ """
5
+
6
+ from bot_knows.services.chat_processing import ChatProcessingService
7
+ from bot_knows.services.dedup_service import DedupAction, DedupResult, DedupService
8
+ from bot_knows.services.graph_service import GraphService
9
+ from bot_knows.services.message_builder import MessageBuilder
10
+ from bot_knows.services.recall_service import CONTEXT_WEIGHTS, RecallService
11
+ from bot_knows.services.topic_extraction import TopicCandidate, TopicExtractionService
12
+
13
+ __all__ = [
14
+ "CONTEXT_WEIGHTS",
15
+ "ChatProcessingService",
16
+ "DedupAction",
17
+ "DedupResult",
18
+ "DedupService",
19
+ "GraphService",
20
+ "MessageBuilder",
21
+ "RecallService",
22
+ "TopicCandidate",
23
+ "TopicExtractionService",
24
+ ]
@@ -0,0 +1,182 @@
1
+ """Chat processing service for bot_knows.
2
+
3
+ This module provides the main service for chat creation and classification.
4
+ """
5
+
6
+ from bot_knows.interfaces.llm import LLMInterface
7
+ from bot_knows.interfaces.storage import StorageInterface
8
+ from bot_knows.logging import get_logger
9
+ from bot_knows.models.chat import ChatCategory, ChatDTO
10
+ from bot_knows.models.ingest import ChatIngest, IngestMessage
11
+ from bot_knows.utils.hashing import generate_chat_id
12
+
13
+ __all__ = [
14
+ "ChatProcessingService",
15
+ ]
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class ChatProcessingService:
21
+ """Service for chat creation and classification.
22
+
23
+ Processes ChatIngest objects into ChatDTO objects, handling:
24
+ - Chat identity resolution (deterministic ID generation)
25
+ - Title resolution (from import or first message)
26
+ - One-time classification (only for new chats)
27
+
28
+ Example:
29
+ service = ChatProcessingService(storage, llm)
30
+ chat, is_new = await service.process(chat_ingest)
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ storage: StorageInterface,
36
+ llm: LLMInterface,
37
+ ) -> None:
38
+ """Initialize service with dependencies.
39
+
40
+ Args:
41
+ storage: Storage interface for persistence
42
+ llm: LLM interface for classification
43
+ """
44
+ self._storage = storage
45
+ self._llm = llm
46
+
47
+ async def process(self, chat_ingest: ChatIngest) -> tuple[ChatDTO, bool]:
48
+ """Process chat ingest into ChatDTO.
49
+
50
+ If chat already exists (by ID), returns existing chat.
51
+ Classification only runs for new chats.
52
+
53
+ Args:
54
+ chat_ingest: Ingested chat data
55
+
56
+ Returns:
57
+ Tuple of (ChatDTO, is_new) where is_new indicates
58
+ if this was a newly created chat
59
+ """
60
+ # Resolve title first (needed for ID)
61
+ title = self._resolve_title(chat_ingest)
62
+
63
+ # Generate deterministic chat ID
64
+ chat_id = generate_chat_id(
65
+ title=title,
66
+ source=chat_ingest.source,
67
+ timestamp=chat_ingest.imported_chat_timestamp,
68
+ )
69
+
70
+ # Check if already exists (idempotency)
71
+ existing = await self._storage.get_chat(chat_id)
72
+ if existing:
73
+ logger.debug("chat_already_exists", chat_id=chat_id)
74
+ return existing, False
75
+
76
+ # Classify new chat
77
+ category, tags = await self._classify(chat_ingest)
78
+
79
+ # Create ChatDTO
80
+ chat = ChatDTO(
81
+ id=chat_id,
82
+ title=title,
83
+ source=chat_ingest.source,
84
+ category=category,
85
+ tags=tags,
86
+ created_on=chat_ingest.imported_chat_timestamp,
87
+ )
88
+
89
+ # Persist
90
+ await self._storage.save_chat(chat)
91
+
92
+ logger.info(
93
+ "chat_created",
94
+ chat_id=chat_id,
95
+ title=title[:50],
96
+ category=category.value,
97
+ message_count=len(chat_ingest.messages),
98
+ )
99
+
100
+ return chat, True
101
+
102
+ def _resolve_title(self, chat_ingest: ChatIngest) -> str:
103
+ """Resolve chat title from ingest or first message.
104
+
105
+ Priority:
106
+ 1. Title from import
107
+ 2. First sentence of first message
108
+ 3. "Untitled Chat" fallback
109
+ """
110
+ if chat_ingest.title:
111
+ return chat_ingest.title
112
+
113
+ # Use first sentence of first message
114
+ for msg in chat_ingest.messages:
115
+ if msg.content:
116
+ # Extract first sentence (up to first period or 100 chars)
117
+ content = msg.content.strip()
118
+ period_idx = content.find(".")
119
+ first_sentence = content[:100]
120
+ if period_idx > 0:
121
+ first_sentence = content[:period_idx]
122
+ if first_sentence:
123
+ return first_sentence.strip()
124
+
125
+ return "Untitled Chat"
126
+
127
+ async def _classify(
128
+ self,
129
+ chat_ingest: ChatIngest,
130
+ ) -> tuple[ChatCategory, list[str]]:
131
+ """Classify chat using LLM.
132
+
133
+ Uses first and last user-assistant pairs for classification.
134
+ """
135
+ messages = chat_ingest.messages
136
+
137
+ # Find first user-assistant pair
138
+ first_pair = self._find_pair(messages, from_start=True)
139
+
140
+ # Find last user-assistant pair
141
+ last_pair = self._find_pair(messages, from_start=False)
142
+
143
+ if not first_pair:
144
+ return ChatCategory.GENERAL, []
145
+
146
+ # Use first pair for both if no distinct last pair
147
+ if not last_pair or last_pair == first_pair:
148
+ last_pair = first_pair
149
+
150
+ try:
151
+ return await self._llm.classify_chat(first_pair, last_pair)
152
+ except Exception as e:
153
+ logger.warning("classification_failed", error=str(e))
154
+ return ChatCategory.GENERAL, []
155
+
156
+ def _find_pair(
157
+ self,
158
+ messages: list[IngestMessage],
159
+ from_start: bool,
160
+ ) -> tuple[str, str] | None:
161
+ """Find user-assistant pair from start or end of messages.
162
+
163
+ Args:
164
+ messages: List of ingest messages
165
+ from_start: If True, search from start; else from end
166
+
167
+ Returns:
168
+ (user_content, assistant_content) tuple or None
169
+ """
170
+ if not messages:
171
+ return None
172
+
173
+ msg_iter = messages if from_start else reversed(messages)
174
+ user_content: str | None = None
175
+
176
+ for msg in msg_iter:
177
+ if msg.role == "user" and user_content is None:
178
+ user_content = msg.content
179
+ elif msg.role == "assistant" and user_content is not None:
180
+ return (user_content, msg.content)
181
+
182
+ return None