PyPI - bot-knows - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

bot_knows/__init__.py +70 -0
bot_knows/config.py +115 -0
bot_knows/domain/__init__.py +5 -0
bot_knows/domain/chat.py +62 -0
bot_knows/domain/message.py +64 -0
bot_knows/domain/relation.py +56 -0
bot_knows/domain/topic.py +132 -0
bot_knows/domain/topic_evidence.py +55 -0
bot_knows/importers/__init__.py +12 -0
bot_knows/importers/base.py +116 -0
bot_knows/importers/chatgpt.py +154 -0
bot_knows/importers/claude.py +172 -0
bot_knows/importers/generic_json.py +272 -0
bot_knows/importers/registry.py +125 -0
bot_knows/infra/__init__.py +5 -0
bot_knows/infra/llm/__init__.py +6 -0
bot_knows/infra/llm/anthropic_provider.py +172 -0
bot_knows/infra/llm/openai_provider.py +195 -0
bot_knows/infra/mongo/__init__.py +5 -0
bot_knows/infra/mongo/client.py +145 -0
bot_knows/infra/mongo/repositories.py +348 -0
bot_knows/infra/neo4j/__init__.py +5 -0
bot_knows/infra/neo4j/client.py +152 -0
bot_knows/infra/neo4j/graph_repository.py +329 -0
bot_knows/infra/redis/__init__.py +6 -0
bot_knows/infra/redis/cache.py +198 -0
bot_knows/infra/redis/client.py +193 -0
bot_knows/interfaces/__init__.py +18 -0
bot_knows/interfaces/embedding.py +55 -0
bot_knows/interfaces/graph.py +194 -0
bot_knows/interfaces/llm.py +70 -0
bot_knows/interfaces/recall.py +92 -0
bot_knows/interfaces/storage.py +225 -0
bot_knows/logging.py +101 -0
bot_knows/models/__init__.py +22 -0
bot_knows/models/chat.py +55 -0
bot_knows/models/ingest.py +70 -0
bot_knows/models/message.py +49 -0
bot_knows/models/recall.py +58 -0
bot_knows/models/topic.py +100 -0
bot_knows/orchestrator.py +398 -0
bot_knows/py.typed +0 -0
bot_knows/services/__init__.py +24 -0
bot_knows/services/chat_processing.py +182 -0
bot_knows/services/dedup_service.py +161 -0
bot_knows/services/graph_service.py +217 -0
bot_knows/services/message_builder.py +135 -0
bot_knows/services/recall_service.py +296 -0
bot_knows/services/tasks.py +128 -0
bot_knows/services/topic_extraction.py +199 -0
bot_knows/utils/__init__.py +22 -0
bot_knows/utils/hashing.py +126 -0
bot_knows-0.1.0.dist-info/METADATA +294 -0
bot_knows-0.1.0.dist-info/RECORD +56 -0
bot_knows-0.1.0.dist-info/WHEEL +4 -0
bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0

bot_knows/services/dedup_service.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Semantic deduplication service for bot_knows.
+This module provides the service for deduplicating topics based on
+semantic similarity using embedding vectors.
+"""
+from dataclasses import dataclass
+from enum import StrEnum
+from bot_knows.interfaces.embedding import EmbeddingServiceInterface
+from bot_knows.interfaces.storage import StorageInterface
+from bot_knows.logging import get_logger
+from bot_knows.models.topic import TopicDTO
+__all__ = [
+    "DedupAction",
+    "DedupResult",
+    "DedupService",
+]
+logger = get_logger(__name__)
+class DedupAction(StrEnum):
+    """Actions resulting from deduplication check."""
+    MERGE = "merge"
+    """Similarity >= high_threshold: same topic, merge evidence"""
+    SOFT_MATCH = "soft_match"
+    """Similarity between low and high threshold: new topic + POTENTIALLY_DUPLICATE_OF edge"""
+    NEW = "new"
+    """Similarity < low_threshold: completely new topic"""
+@dataclass
+class DedupResult:
+    """Result of deduplication check."""
+    action: DedupAction
+    existing_topic: TopicDTO | None = None
+    similarity: float = 0.0
+class DedupService:
+    """Semantic deduplication service.
+    Compares candidate topics against existing topics using
+    embedding similarity to determine:
+    - MERGE (>= 0.92): Same topic, link evidence to existing
+    - SOFT_MATCH (0.80-0.92): Create new topic with POTENTIALLY_DUPLICATE_OF edge
+    - NEW (< 0.80): Create completely new topic
+    Example:
+        service = DedupService(embedding_service, storage)
+        result = await service.check_duplicate(embedding)
+        if result.action == DedupAction.MERGE:
+            # Link evidence to result.existing_topic
+    """
+    def __init__(
+        self,
+        embedding_service: EmbeddingServiceInterface,
+        storage: StorageInterface,
+        high_threshold: float = 0.92,
+        low_threshold: float = 0.80,
+    ) -> None:
+        """Initialize service with dependencies.
+        Args:
+            embedding_service: Embedding service for similarity calculation
+            storage: Storage interface for topic lookup
+            high_threshold: Similarity threshold for MERGE (default: 0.92)
+            low_threshold: Similarity threshold for SOFT_MATCH (default: 0.80)
+        """
+        self._embedding = embedding_service
+        self._storage = storage
+        self._high_threshold = high_threshold
+        self._low_threshold = low_threshold
+    async def check_duplicate(
+        self,
+        candidate_embedding: list[float],
+    ) -> DedupResult:
+        """Check if candidate embedding matches existing topics.
+        Args:
+            candidate_embedding: Embedding vector for candidate topic
+        Returns:
+            DedupResult with action and matched topic (if any)
+        """
+        # Find similar topics above low threshold
+        similar_topics = await self._storage.find_similar_topics(
+            embedding=candidate_embedding,
+            threshold=self._low_threshold,
+        )
+        if not similar_topics:
+            return DedupResult(action=DedupAction.NEW)
+        # Get best match (highest similarity)
+        best_topic, best_similarity = similar_topics[0]
+        if best_similarity >= self._high_threshold:
+            logger.debug(
+                "dedup_merge",
+                topic_id=best_topic.topic_id,
+                similarity=best_similarity,
+            )
+            return DedupResult(
+                action=DedupAction.MERGE,
+                existing_topic=best_topic,
+                similarity=best_similarity,
+            )
+        # Between thresholds: soft match
+        logger.debug(
+            "dedup_soft_match",
+            topic_id=best_topic.topic_id,
+            similarity=best_similarity,
+        )
+        return DedupResult(
+            action=DedupAction.SOFT_MATCH,
+            existing_topic=best_topic,
+            similarity=best_similarity,
+        )
+    async def find_best_match(
+        self,
+        candidate_embedding: list[float],
+        min_similarity: float = 0.5,
+    ) -> tuple[TopicDTO, float] | None:
+        """Find the best matching topic above minimum similarity.
+        Args:
+            candidate_embedding: Embedding vector to match
+            min_similarity: Minimum similarity threshold
+        Returns:
+            (TopicDTO, similarity) tuple or None if no match
+        """
+        similar_topics = await self._storage.find_similar_topics(
+            embedding=candidate_embedding,
+            threshold=min_similarity,
+        )
+        if similar_topics:
+            return similar_topics[0]
+        return None
+    @property
+    def high_threshold(self) -> float:
+        """Get high similarity threshold (MERGE)."""
+        return self._high_threshold
+    @property
+    def low_threshold(self) -> float:
+        """Get low similarity threshold (SOFT_MATCH)."""
+        return self._low_threshold

bot_knows/services/graph_service.py ADDED Viewed

@@ -0,0 +1,217 @@
+"""Graph service for bot_knows.
+This module provides the service for managing the knowledge graph.
+"""
+from bot_knows.interfaces.graph import GraphServiceInterface
+from bot_knows.logging import get_logger
+from bot_knows.models.chat import ChatDTO
+from bot_knows.models.message import MessageDTO
+from bot_knows.models.topic import TopicDTO, TopicEvidenceDTO
+__all__ = [
+    "GraphService",
+]
+logger = get_logger(__name__)
+class GraphService:
+    """Service for managing the knowledge graph.
+    Wraps the graph interface to provide higher-level operations
+    for building and querying the knowledge graph.
+    Graph structure:
+    - (Message)-[:IS_PART_OF]->(Chat)
+    - (Message)-[:FOLLOWS_AFTER]->(Message)
+    - (Topic)-[:IS_SUPPORTED_BY {evidence}]->(Message)
+    - (Topic)-[:POTENTIALLY_DUPLICATE_OF {similarity}]->(Topic)
+    - (Topic)-[:RELATES_TO {type, weight}]->(Topic)
+    Example:
+        service = GraphService(graph_interface)
+        await service.add_chat_with_messages(chat, messages)
+    """
+    def __init__(self, graph: GraphServiceInterface) -> None:
+        """Initialize service with graph interface.
+        Args:
+            graph: Graph interface implementation
+        """
+        self._graph = graph
+    async def add_chat_with_messages(
+        self,
+        chat: ChatDTO,
+        messages: list[MessageDTO],
+    ) -> None:
+        """Add a chat and its messages to the graph.
+        Creates:
+        - Chat node
+        - Message nodes
+        - IS_PART_OF edges (Message -> Chat)
+        - FOLLOWS_AFTER edges (Message -> Message)
+        Args:
+            chat: Chat to add
+            messages: Messages to add (should be ordered)
+        """
+        # Create chat node
+        await self._graph.create_chat_node(chat)
+        # Create message nodes and edges
+        previous_message_id: str | None = None
+        for message in messages:
+            # Create message node
+            await self._graph.create_message_node(message)
+            # Create IS_PART_OF edge
+            await self._graph.create_is_part_of_edge(
+                message_id=message.message_id,
+                chat_id=chat.id,
+            )
+            # Create FOLLOWS_AFTER edge if not first message
+            if previous_message_id:
+                await self._graph.create_follows_after_edge(
+                    message_id=message.message_id,
+                    previous_message_id=previous_message_id,
+                )
+            previous_message_id = message.message_id
+        logger.debug(
+            "chat_added_to_graph",
+            chat_id=chat.id,
+            message_count=len(messages),
+        )
+    async def add_topic_with_evidence(
+        self,
+        topic: TopicDTO,
+        evidence: TopicEvidenceDTO,
+    ) -> None:
+        """Add a topic and link it to supporting message.
+        Creates:
+        - Topic node
+        - IS_SUPPORTED_BY edge with evidence properties
+        Args:
+            topic: Topic to add
+            evidence: Evidence linking topic to message
+        """
+        # Create topic node
+        await self._graph.create_topic_node(topic)
+        # Create IS_SUPPORTED_BY edge with evidence
+        await self._graph.create_is_supported_by_edge(
+            topic_id=topic.topic_id,
+            message_id=evidence.source_message_id,
+            evidence=evidence,
+        )
+        logger.debug(
+            "topic_added_to_graph",
+            topic_id=topic.topic_id,
+            message_id=evidence.source_message_id,
+        )
+    async def add_evidence_to_existing_topic(
+        self,
+        topic: TopicDTO,
+        evidence: TopicEvidenceDTO,
+    ) -> None:
+        """Add evidence to an existing topic.
+        Creates:
+        - IS_SUPPORTED_BY edge with evidence properties
+        - Updates topic node properties
+        Args:
+            topic: Updated topic (with new centroid)
+            evidence: New evidence
+        """
+        # Update topic node
+        await self._graph.update_topic_node(topic)
+        # Create IS_SUPPORTED_BY edge with evidence
+        await self._graph.create_is_supported_by_edge(
+            topic_id=topic.topic_id,
+            message_id=evidence.source_message_id,
+            evidence=evidence,
+        )
+        logger.debug(
+            "evidence_added_to_topic",
+            topic_id=topic.topic_id,
+            evidence_id=evidence.evidence_id,
+        )
+    async def create_potential_duplicate_link(
+        self,
+        new_topic_id: str,
+        existing_topic_id: str,
+        similarity: float,
+    ) -> None:
+        """Create POTENTIALLY_DUPLICATE_OF edge between topics.
+        Args:
+            new_topic_id: New topic ID
+            existing_topic_id: Existing similar topic ID
+            similarity: Similarity score
+        """
+        await self._graph.create_potentially_duplicate_of_edge(
+            topic_id=new_topic_id,
+            existing_topic_id=existing_topic_id,
+            similarity=similarity,
+        )
+        logger.debug(
+            "potential_duplicate_link_created",
+            new_topic=new_topic_id,
+            existing_topic=existing_topic_id,
+            similarity=similarity,
+        )
+    async def create_topic_relation(
+        self,
+        topic_id: str,
+        related_topic_id: str,
+        relation_type: str,
+        weight: float,
+    ) -> None:
+        """Create RELATES_TO edge between topics.
+        Args:
+            topic_id: Source topic ID
+            related_topic_id: Related topic ID
+            relation_type: Type of relationship
+            weight: Relationship strength (0.0-1.0)
+        """
+        await self._graph.create_relates_to_edge(
+            topic_id=topic_id,
+            related_topic_id=related_topic_id,
+            relation_type=relation_type,
+            weight=weight,
+        )
+    async def get_related_topics(
+        self,
+        topic_id: str,
+        limit: int = 10,
+    ) -> list[tuple[str, float]]:
+        """Get topics related to a given topic.
+        Args:
+            topic_id: Topic to find relations for
+            limit: Maximum results
+        Returns:
+            List of (topic_id, weight) tuples
+        """
+        return await self._graph.get_related_topics(topic_id, limit)

bot_knows/services/message_builder.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Message builder service for bot_knows.
+This module provides the service for building MessageDTOs from IngestMessages.
+"""
+from bot_knows.logging import get_logger
+from bot_knows.models.ingest import IngestMessage
+from bot_knows.models.message import MessageDTO
+from bot_knows.utils.hashing import generate_message_id
+__all__ = [
+    "MessageBuilder",
+]
+logger = get_logger(__name__)
+class MessageBuilder:
+    """Service for building MessageDTOs from IngestMessages.
+    Transforms a list of IngestMessages into MessageDTOs by:
+    - Pairing user and assistant messages
+    - Generating deterministic message IDs
+    - Handling edge cases (missing pairs, system messages)
+    Example:
+        builder = MessageBuilder()
+        messages = builder.build(ingest_messages, chat_id)
+    """
+    def build(
+        self,
+        ingest_messages: list[IngestMessage],
+        chat_id: str,
+    ) -> list[MessageDTO]:
+        """Build MessageDTOs from IngestMessages.
+        Pairs consecutive user-assistant messages into single MessageDTO objects.
+        System messages are stored with empty user_content.
+        Args:
+            ingest_messages: List of ingested messages
+            chat_id: Parent chat ID
+        Returns:
+            List of MessageDTO objects
+        """
+        if not ingest_messages:
+            return []
+        # Sort by timestamp to ensure correct ordering
+        sorted_messages = sorted(ingest_messages, key=lambda m: m.timestamp)
+        messages: list[MessageDTO] = []
+        pending_user: IngestMessage | None = None
+        for msg in sorted_messages:
+            if msg.role == "system":
+                # System messages become standalone with empty user_content
+                message_dto = self._create_message(
+                    chat_id=chat_id,
+                    user_content="",
+                    assistant_content=msg.content,
+                    timestamp=msg.timestamp,
+                )
+                messages.append(message_dto)
+            elif msg.role == "user":
+                # If we have a pending user message, create it as standalone
+                if pending_user:
+                    message_dto = self._create_message(
+                        chat_id=chat_id,
+                        user_content=pending_user.content,
+                        assistant_content="",
+                        timestamp=pending_user.timestamp,
+                    )
+                    messages.append(message_dto)
+                pending_user = msg
+            elif msg.role == "assistant":
+                # Pair with pending user message if available
+                user_content = pending_user.content if pending_user else ""
+                timestamp = pending_user.timestamp if pending_user else msg.timestamp
+                message_dto = self._create_message(
+                    chat_id=chat_id,
+                    user_content=user_content,
+                    assistant_content=msg.content,
+                    timestamp=timestamp,
+                )
+                messages.append(message_dto)
+                pending_user = None
+        # Handle trailing user message
+        if pending_user:
+            message_dto = self._create_message(
+                chat_id=chat_id,
+                user_content=pending_user.content,
+                assistant_content="",
+                timestamp=pending_user.timestamp,
+            )
+            messages.append(message_dto)
+        logger.debug(
+            "messages_built",
+            chat_id=chat_id,
+            input_count=len(ingest_messages),
+            output_count=len(messages),
+        )
+        return messages
+    def _create_message(
+        self,
+        chat_id: str,
+        user_content: str,
+        assistant_content: str,
+        timestamp: int,
+    ) -> MessageDTO:
+        """Create a MessageDTO with deterministic ID."""
+        message_id = generate_message_id(
+            chat_id=chat_id,
+            user_content=user_content,
+            assistant_content=assistant_content,
+            timestamp=timestamp,
+        )
+        return MessageDTO(
+            message_id=message_id,
+            chat_id=chat_id,
+            user_content=user_content,
+            assistant_content=assistant_content,
+            created_on=timestamp,
+        )