PyPI - bot-knows - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

bot_knows/__init__.py +70 -0
bot_knows/config.py +115 -0
bot_knows/domain/__init__.py +5 -0
bot_knows/domain/chat.py +62 -0
bot_knows/domain/message.py +64 -0
bot_knows/domain/relation.py +56 -0
bot_knows/domain/topic.py +132 -0
bot_knows/domain/topic_evidence.py +55 -0
bot_knows/importers/__init__.py +12 -0
bot_knows/importers/base.py +116 -0
bot_knows/importers/chatgpt.py +154 -0
bot_knows/importers/claude.py +172 -0
bot_knows/importers/generic_json.py +272 -0
bot_knows/importers/registry.py +125 -0
bot_knows/infra/__init__.py +5 -0
bot_knows/infra/llm/__init__.py +6 -0
bot_knows/infra/llm/anthropic_provider.py +172 -0
bot_knows/infra/llm/openai_provider.py +195 -0
bot_knows/infra/mongo/__init__.py +5 -0
bot_knows/infra/mongo/client.py +145 -0
bot_knows/infra/mongo/repositories.py +348 -0
bot_knows/infra/neo4j/__init__.py +5 -0
bot_knows/infra/neo4j/client.py +152 -0
bot_knows/infra/neo4j/graph_repository.py +329 -0
bot_knows/infra/redis/__init__.py +6 -0
bot_knows/infra/redis/cache.py +198 -0
bot_knows/infra/redis/client.py +193 -0
bot_knows/interfaces/__init__.py +18 -0
bot_knows/interfaces/embedding.py +55 -0
bot_knows/interfaces/graph.py +194 -0
bot_knows/interfaces/llm.py +70 -0
bot_knows/interfaces/recall.py +92 -0
bot_knows/interfaces/storage.py +225 -0
bot_knows/logging.py +101 -0
bot_knows/models/__init__.py +22 -0
bot_knows/models/chat.py +55 -0
bot_knows/models/ingest.py +70 -0
bot_knows/models/message.py +49 -0
bot_knows/models/recall.py +58 -0
bot_knows/models/topic.py +100 -0
bot_knows/orchestrator.py +398 -0
bot_knows/py.typed +0 -0
bot_knows/services/__init__.py +24 -0
bot_knows/services/chat_processing.py +182 -0
bot_knows/services/dedup_service.py +161 -0
bot_knows/services/graph_service.py +217 -0
bot_knows/services/message_builder.py +135 -0
bot_knows/services/recall_service.py +296 -0
bot_knows/services/tasks.py +128 -0
bot_knows/services/topic_extraction.py +199 -0
bot_knows/utils/__init__.py +22 -0
bot_knows/utils/hashing.py +126 -0
bot_knows-0.1.0.dist-info/METADATA +294 -0
bot_knows-0.1.0.dist-info/RECORD +56 -0
bot_knows-0.1.0.dist-info/WHEEL +4 -0
bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0

bot_knows/infra/neo4j/graph_repository.py ADDED Viewed

@@ -0,0 +1,329 @@
+"""Neo4j graph repository for bot_knows.
+This module provides the graph repository implementation for Neo4j.
+"""
+from typing import Any, Self
+from bot_knows.config import Neo4jSettings
+from bot_knows.infra.neo4j.client import Neo4jClient
+from bot_knows.interfaces.graph import GraphServiceInterface
+from bot_knows.logging import get_logger
+from bot_knows.models.chat import ChatDTO
+from bot_knows.models.message import MessageDTO
+from bot_knows.models.topic import TopicDTO, TopicEvidenceDTO
+__all__ = [
+    "Neo4jGraphRepository",
+]
+logger = get_logger(__name__)
+class Neo4jGraphRepository(GraphServiceInterface):
+    """Neo4j implementation of GraphServiceInterface.
+    Provides graph operations for the knowledge base including
+    node creation, edge creation, and graph queries.
+    """
+    config_class = Neo4jSettings
+    def __init__(self, client: Neo4jClient) -> None:
+        """Initialize repository with Neo4j client.
+        Args:
+            client: Connected Neo4jClient instance
+        """
+        self._client = client
+        self._owns_client = False
+    @classmethod
+    async def from_config(cls, config: Neo4jSettings) -> Self:
+        """Factory method for BotKnows instantiation.
+        Creates a Neo4jClient, connects, creates indexes/constraints, and returns repository.
+        Args:
+            config: Neo4j settings
+        Returns:
+            Connected Neo4jGraphRepository instance
+        """
+        client = Neo4jClient(config)
+        await client.connect()
+        await client.create_indexes()
+        await client.create_constraints()
+        instance = cls(client)
+        instance._owns_client = True
+        return instance
+    @classmethod
+    async def from_dict(cls, config: dict[str, Any]) -> Self:
+        """Factory method for custom config dict.
+        Args:
+            config: Dictionary with Neo4j settings
+        Returns:
+            Connected Neo4jGraphRepository instance
+        """
+        settings = Neo4jSettings(**config)
+        return await cls.from_config(settings)
+    async def close(self) -> None:
+        """Close owned resources."""
+        if self._owns_client and self._client:
+            await self._client.disconnect()
+    # Node operations
+    async def create_chat_node(self, chat: ChatDTO) -> str:
+        """Create or update a Chat node."""
+        query = """
+        MERGE (c:Chat {id: $id})
+        SET c.title = $title,
+            c.source = $source,
+            c.category = $category,
+            c.tags = $tags,
+            c.created_on = $created_on
+        RETURN c.id as id
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "id": chat.id,
+                "title": chat.title,
+                "source": chat.source,
+                "category": chat.category.value,
+                "tags": chat.tags,
+                "created_on": chat.created_on,
+            },
+        )
+        return chat.id
+    async def create_message_node(self, message: MessageDTO) -> str:
+        """Create or update a Message node."""
+        query = """
+        MERGE (m:Message {message_id: $message_id})
+        SET m.chat_id = $chat_id,
+            m.created_on = $created_on,
+            m.user_content = $user_content,
+            m.assistent_content = $assistent_content
+        RETURN m.message_id as id
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "message_id": message.message_id,
+                "chat_id": message.chat_id,
+                "created_on": message.created_on,
+                "user_content": message.user_content,
+                "assistent_content": message.assistant_content,
+            },
+        )
+        return message.message_id
+    async def create_topic_node(self, topic: TopicDTO) -> str:
+        """Create or update a Topic node."""
+        query = """
+        MERGE (t:Topic {topic_id: $topic_id})
+        SET t.canonical_name = $canonical_name,
+            t.importance = $importance,
+            t.recall_strength = $recall_strength
+        RETURN t.topic_id as id
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "topic_id": topic.topic_id,
+                "canonical_name": topic.canonical_name,
+                "importance": topic.importance,
+                "recall_strength": topic.recall_strength,
+            },
+        )
+        return topic.topic_id
+    async def update_topic_node(self, topic: TopicDTO) -> None:
+        """Update an existing Topic node."""
+        await self.create_topic_node(topic)
+    # Edge operations
+    async def create_is_part_of_edge(self, message_id: str, chat_id: str) -> None:
+        """Create IS_PART_OF edge: (Message)-[:IS_PART_OF]->(Chat)."""
+        query = """
+        MATCH (m:Message {message_id: $message_id})
+        MATCH (c:Chat {id: $chat_id})
+        MERGE (m)-[:IS_PART_OF]->(c)
+        """
+        await self._client.execute_write(
+            query,
+            {"message_id": message_id, "chat_id": chat_id},
+        )
+    async def create_follows_after_edge(
+        self,
+        message_id: str,
+        previous_message_id: str,
+    ) -> None:
+        """Create FOLLOWS_AFTER edge: (Message)-[:FOLLOWS_AFTER]->(Message)."""
+        query = """
+        MATCH (m1:Message {message_id: $message_id})
+        MATCH (m2:Message {message_id: $previous_message_id})
+        MERGE (m1)-[:FOLLOWS_AFTER]->(m2)
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "message_id": message_id,
+                "previous_message_id": previous_message_id,
+            },
+        )
+    async def create_is_supported_by_edge(
+        self,
+        topic_id: str,
+        message_id: str,
+        evidence: TopicEvidenceDTO,
+    ) -> None:
+        """Create IS_SUPPORTED_BY edge with evidence properties.
+        (Topic)-[:IS_SUPPORTED_BY {evidence data}]->(Message)
+        """
+        query = """
+        MATCH (t:Topic {topic_id: $topic_id})
+        MATCH (m:Message {message_id: $message_id})
+        MERGE (t)-[r:IS_SUPPORTED_BY {evidence_id: $evidence_id}]->(m)
+        SET r.extracted_name = $extracted_name,
+            r.confidence = $confidence,
+            r.timestamp = $timestamp
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "topic_id": topic_id,
+                "message_id": message_id,
+                "evidence_id": evidence.evidence_id,
+                "extracted_name": evidence.extracted_name,
+                "confidence": evidence.confidence,
+                "timestamp": evidence.timestamp,
+            },
+        )
+    async def create_potentially_duplicate_of_edge(
+        self,
+        topic_id: str,
+        existing_topic_id: str,
+        similarity: float,
+    ) -> None:
+        """Create POTENTIALLY_DUPLICATE_OF edge between topics."""
+        query = """
+        MATCH (t1:Topic {topic_id: $topic_id})
+        MATCH (t2:Topic {topic_id: $existing_topic_id})
+        MERGE (t1)-[r:POTENTIALLY_DUPLICATE_OF]->(t2)
+        SET r.similarity = $similarity
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "topic_id": topic_id,
+                "existing_topic_id": existing_topic_id,
+                "similarity": similarity,
+            },
+        )
+    async def create_relates_to_edge(
+        self,
+        topic_id: str,
+        related_topic_id: str,
+        relation_type: str,
+        weight: float,
+    ) -> None:
+        """Create RELATES_TO edge between topics."""
+        query = """
+        MATCH (t1:Topic {topic_id: $topic_id})
+        MATCH (t2:Topic {topic_id: $related_topic_id})
+        MERGE (t1)-[r:RELATES_TO]->(t2)
+        SET r.type = $relation_type,
+            r.weight = $weight
+        """
+        await self._client.execute_write(
+            query,
+            {
+                "topic_id": topic_id,
+                "related_topic_id": related_topic_id,
+                "relation_type": relation_type,
+                "weight": weight,
+            },
+        )
+    # Query operations
+    async def get_messages_for_chat(self, chat_id: str) -> list[MessageDTO]:
+        """Get all messages in a chat, ordered by FOLLOWS_AFTER."""
+        # Get messages ordered by created_on since FOLLOWS_AFTER may not exist
+        query = """
+        MATCH (m:Message)-[:IS_PART_OF]->(c:Chat {id: $chat_id})
+        RETURN m.message_id as message_id,
+               m.chat_id as chat_id,
+               m.created_on as created_on
+        ORDER BY m.created_on
+        """
+        records = await self._client.execute_query(query, {"chat_id": chat_id})
+        return [
+            MessageDTO(
+                message_id=r["message_id"],
+                chat_id=r["chat_id"],
+                created_on=r["created_on"],
+            )
+            for r in records
+        ]
+    async def get_related_topics(
+        self,
+        topic_id: str,
+        limit: int = 10,
+    ) -> list[tuple[str, float]]:
+        """Get topics related to a given topic."""
+        query = """
+        MATCH (t1:Topic {topic_id: $topic_id})-[r:RELATES_TO]->(t2:Topic)
+        RETURN t2.topic_id as topic_id, r.weight as weight
+        ORDER BY r.weight DESC
+        LIMIT $limit
+        """
+        records = await self._client.execute_query(
+            query,
+            {"topic_id": topic_id, "limit": limit},
+        )
+        return [(r["topic_id"], r["weight"]) for r in records]
+    async def get_topic_evidence(self, topic_id: str) -> list[dict[str, Any]]:
+        """Get all evidence for a topic from IS_SUPPORTED_BY edges."""
+        query = """
+        MATCH (t:Topic {topic_id: $topic_id})-[r:IS_SUPPORTED_BY]->(m:Message)
+        RETURN r.evidence_id as evidence_id,
+               r.extracted_name as extracted_name,
+               r.confidence as confidence,
+               r.timestamp as timestamp,
+               m.message_id as source_message_id
+        ORDER BY r.timestamp
+        """
+        records = await self._client.execute_query(query, {"topic_id": topic_id})
+        return [
+            {
+                "evidence_id": r["evidence_id"],
+                "topic_id": topic_id,
+                "extracted_name": r["extracted_name"],
+                "confidence": r["confidence"],
+                "timestamp": r["timestamp"],
+                "source_message_id": r["source_message_id"],
+            }
+            for r in records
+        ]
+    async def get_chat_topics(self, chat_id: str) -> list[str]:
+        """Get all topic IDs associated with a chat's messages."""
+        query = """
+        MATCH (t:Topic)-[:IS_SUPPORTED_BY]->(m:Message)-[:IS_PART_OF]->(c:Chat {id: $chat_id})
+        RETURN DISTINCT t.topic_id as topic_id
+        """
+        records = await self._client.execute_query(query, {"chat_id": chat_id})
+        return [r["topic_id"] for r in records]

bot_knows/infra/redis/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Redis infrastructure for bot_knows (optional)."""
+from bot_knows.infra.redis.cache import EmbeddingCache
+from bot_knows.infra.redis.client import RedisClient
+__all__ = ["EmbeddingCache", "RedisClient"]

bot_knows/infra/redis/cache.py ADDED Viewed

@@ -0,0 +1,198 @@
+"""Redis cache implementations for bot_knows.
+This module provides caching utilities for embeddings
+and frequently accessed data.
+"""
+import hashlib
+import json
+from typing import Any
+from bot_knows.infra.redis.client import RedisClient
+from bot_knows.logging import get_logger
+__all__ = [
+    "EmbeddingCache",
+    "TopicCache",
+]
+logger = get_logger(__name__)
+class EmbeddingCache:
+    """Redis cache for text embeddings.
+    Caches embeddings to avoid redundant API calls.
+    Falls back gracefully if Redis is unavailable.
+    """
+    def __init__(
+        self,
+        redis_client: RedisClient,
+        ttl: int = 86400,  # 24 hours
+        prefix: str = "emb:",
+    ) -> None:
+        """Initialize embedding cache.
+        Args:
+            redis_client: Redis client instance
+            ttl: Cache TTL in seconds (default: 24 hours)
+            prefix: Key prefix for embedding cache
+        """
+        self._redis = redis_client
+        self._ttl = ttl
+        self._prefix = prefix
+    def _make_key(self, text: str) -> str:
+        """Generate cache key for text."""
+        text_hash = hashlib.sha256(text.encode()).hexdigest()
+        return f"{self._prefix}{text_hash}"
+    async def get(self, text: str) -> list[float] | None:
+        """Get cached embedding for text.
+        Args:
+            text: Input text
+        Returns:
+            Embedding vector if cached, None otherwise
+        """
+        if not self._redis.is_connected:
+            return None
+        key = self._make_key(text)
+        cached = await self._redis.get(key)
+        if cached:
+            try:
+                return json.loads(cached)
+            except json.JSONDecodeError:
+                return None
+        return None
+    async def set(self, text: str, embedding: list[float]) -> bool:
+        """Cache embedding for text.
+        Args:
+            text: Input text
+            embedding: Embedding vector to cache
+        Returns:
+            True if cached successfully
+        """
+        if not self._redis.is_connected:
+            return False
+        key = self._make_key(text)
+        return await self._redis.set(key, json.dumps(embedding), ex=self._ttl)
+    async def get_or_compute(
+        self,
+        text: str,
+        compute_fn: Any,
+    ) -> list[float]:
+        """Get cached embedding or compute and cache.
+        Args:
+            text: Input text
+            compute_fn: Async function to compute embedding if not cached
+        Returns:
+            Embedding vector
+        """
+        # Try cache first
+        cached = await self.get(text)
+        if cached is not None:
+            return cached
+        # Compute embedding
+        embedding = await compute_fn(text)
+        # Cache result
+        await self.set(text, embedding)
+        return embedding
+class TopicCache:
+    """Redis cache for hot topics.
+    Caches frequently accessed topic data to reduce
+    database lookups.
+    """
+    def __init__(
+        self,
+        redis_client: RedisClient,
+        ttl: int = 3600,  # 1 hour
+        prefix: str = "topic:",
+    ) -> None:
+        """Initialize topic cache.
+        Args:
+            redis_client: Redis client instance
+            ttl: Cache TTL in seconds (default: 1 hour)
+            prefix: Key prefix for topic cache
+        """
+        self._redis = redis_client
+        self._ttl = ttl
+        self._prefix = prefix
+    def _make_key(self, topic_id: str) -> str:
+        """Generate cache key for topic."""
+        return f"{self._prefix}{topic_id}"
+    async def get(self, topic_id: str) -> dict[str, Any] | None:
+        """Get cached topic data.
+        Args:
+            topic_id: Topic ID
+        Returns:
+            Topic data dict if cached, None otherwise
+        """
+        if not self._redis.is_connected:
+            return None
+        key = self._make_key(topic_id)
+        cached = await self._redis.get(key)
+        if cached:
+            try:
+                return json.loads(cached)
+            except json.JSONDecodeError:
+                return None
+        return None
+    async def set(self, topic_id: str, data: dict[str, Any]) -> bool:
+        """Cache topic data.
+        Args:
+            topic_id: Topic ID
+            data: Topic data to cache
+        Returns:
+            True if cached successfully
+        """
+        if not self._redis.is_connected:
+            return False
+        key = self._make_key(topic_id)
+        return await self._redis.set(key, json.dumps(data), ex=self._ttl)
+    async def invalidate(self, topic_id: str) -> bool:
+        """Invalidate cached topic.
+        Args:
+            topic_id: Topic ID to invalidate
+        Returns:
+            True if invalidated successfully
+        """
+        if not self._redis.is_connected:
+            return False
+        key = self._make_key(topic_id)
+        return await self._redis.delete(key)