PyPI - bot-knows - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

bot_knows/__init__.py +70 -0
bot_knows/config.py +115 -0
bot_knows/domain/__init__.py +5 -0
bot_knows/domain/chat.py +62 -0
bot_knows/domain/message.py +64 -0
bot_knows/domain/relation.py +56 -0
bot_knows/domain/topic.py +132 -0
bot_knows/domain/topic_evidence.py +55 -0
bot_knows/importers/__init__.py +12 -0
bot_knows/importers/base.py +116 -0
bot_knows/importers/chatgpt.py +154 -0
bot_knows/importers/claude.py +172 -0
bot_knows/importers/generic_json.py +272 -0
bot_knows/importers/registry.py +125 -0
bot_knows/infra/__init__.py +5 -0
bot_knows/infra/llm/__init__.py +6 -0
bot_knows/infra/llm/anthropic_provider.py +172 -0
bot_knows/infra/llm/openai_provider.py +195 -0
bot_knows/infra/mongo/__init__.py +5 -0
bot_knows/infra/mongo/client.py +145 -0
bot_knows/infra/mongo/repositories.py +348 -0
bot_knows/infra/neo4j/__init__.py +5 -0
bot_knows/infra/neo4j/client.py +152 -0
bot_knows/infra/neo4j/graph_repository.py +329 -0
bot_knows/infra/redis/__init__.py +6 -0
bot_knows/infra/redis/cache.py +198 -0
bot_knows/infra/redis/client.py +193 -0
bot_knows/interfaces/__init__.py +18 -0
bot_knows/interfaces/embedding.py +55 -0
bot_knows/interfaces/graph.py +194 -0
bot_knows/interfaces/llm.py +70 -0
bot_knows/interfaces/recall.py +92 -0
bot_knows/interfaces/storage.py +225 -0
bot_knows/logging.py +101 -0
bot_knows/models/__init__.py +22 -0
bot_knows/models/chat.py +55 -0
bot_knows/models/ingest.py +70 -0
bot_knows/models/message.py +49 -0
bot_knows/models/recall.py +58 -0
bot_knows/models/topic.py +100 -0
bot_knows/orchestrator.py +398 -0
bot_knows/py.typed +0 -0
bot_knows/services/__init__.py +24 -0
bot_knows/services/chat_processing.py +182 -0
bot_knows/services/dedup_service.py +161 -0
bot_knows/services/graph_service.py +217 -0
bot_knows/services/message_builder.py +135 -0
bot_knows/services/recall_service.py +296 -0
bot_knows/services/tasks.py +128 -0
bot_knows/services/topic_extraction.py +199 -0
bot_knows/utils/__init__.py +22 -0
bot_knows/utils/hashing.py +126 -0
bot_knows-0.1.0.dist-info/METADATA +294 -0
bot_knows-0.1.0.dist-info/RECORD +56 -0
bot_knows-0.1.0.dist-info/WHEEL +4 -0
bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0

bot_knows/__init__.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""bot_knows - Framework-agnostic Python library for graph-backed personal knowledge bases.
+This package provides tools for:
+- Ingesting chats from multiple sources (ChatGPT, Claude, custom JSON)
+- Classifying and organizing chat data
+- Extracting semantic topics with deduplication
+- Building a graph-backed knowledge base
+- Evidence-weighted recall with spaced repetition
+Example usage:
+    from bot_knows import (
+        BotKnows,
+        MongoStorageRepository,
+        Neo4jGraphRepository,
+        OpenAIProvider,
+        ChatGPTAdapter,
+    )
+    # Simple usage - config loaded from .env automatically
+    async with BotKnows(
+        storage_class=MongoStorageRepository,
+        graphdb_class=Neo4jGraphRepository,
+        llm_class=OpenAIProvider,
+    ) as bk:
+        result = await bk.insert_chats("conversations.json", ChatGPTAdapter)
+        topics = await bk.get_chat_topics(chat_id)
+"""
+__version__ = "0.1.0"
+# Orchestrator
+# Interfaces
+from bot_knows.importers.base import ChatImportAdapter
+# Import adapters
+from bot_knows.importers.chatgpt import ChatGPTAdapter
+from bot_knows.importers.claude import ClaudeAdapter
+from bot_knows.importers.generic_json import GenericJSONAdapter
+from bot_knows.infra.llm.anthropic_provider import AnthropicProvider
+from bot_knows.infra.llm.openai_provider import OpenAIProvider
+# Implementations
+from bot_knows.infra.mongo.repositories import MongoStorageRepository
+from bot_knows.infra.neo4j.graph_repository import Neo4jGraphRepository
+from bot_knows.interfaces.embedding import EmbeddingServiceInterface
+from bot_knows.interfaces.graph import GraphServiceInterface
+from bot_knows.interfaces.llm import LLMInterface
+from bot_knows.interfaces.storage import StorageInterface
+from bot_knows.orchestrator import BotKnows, InsertResult
+__all__ = [  # noqa: RUF022
+    # Orchestrator
+    "BotKnows",
+    "InsertResult",
+    # Implementations
+    "MongoStorageRepository",
+    "Neo4jGraphRepository",
+    "OpenAIProvider",
+    "AnthropicProvider",
+    # Import adapters
+    "ChatGPTAdapter",
+    "ClaudeAdapter",
+    "GenericJSONAdapter",
+    # Interfaces
+    "ChatImportAdapter",
+    "EmbeddingServiceInterface",
+    "GraphServiceInterface",
+    "LLMInterface",
+    "StorageInterface",
+]

bot_knows/config.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""Configuration management for bot_knows.
+This module provides typed configuration classes using pydantic-settings.
+Configuration is loaded from environment variables with optional .env file support.
+"""
+from pydantic import SecretStr
+from pydantic_settings import BaseSettings, SettingsConfigDict
+__all__ = [
+    "BotKnowsConfig",
+    "LLMSettings",
+    "MongoSettings",
+    "Neo4jSettings",
+    "RedisSettings",
+]
+class MongoSettings(BaseSettings):
+    """MongoDB connection settings."""
+    model_config = SettingsConfigDict(
+        env_prefix="BOT_KNOWS_MONGO_",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    uri: SecretStr = SecretStr("mongodb://localhost:27017")
+    database: str = "bot_knows"
+    collection_prefix: str = ""
+class Neo4jSettings(BaseSettings):
+    """Neo4j connection settings."""
+    model_config = SettingsConfigDict(
+        env_prefix="BOT_KNOWS_NEO4J_",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    uri: str = "bolt://localhost:7687"
+    username: str = "neo4j"
+    password: SecretStr = SecretStr("")
+class RedisSettings(BaseSettings):
+    """Redis connection settings (optional).
+    If url is not configured or connection fails, caching will be disabled.
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="BOT_KNOWS_REDIS_",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    url: str | None = None
+    enabled: bool = True  # Can be explicitly disabled
+class LLMSettings(BaseSettings):
+    """LLM provider settings."""
+    model_config = SettingsConfigDict(
+        env_prefix="BOT_KNOWS_LLM_",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    provider: str = "openai"  # "openai" or "anthropic"
+    api_key: SecretStr | None = None
+    model: str = "gpt-4o"
+    embedding_model: str = "text-embedding-3-small"
+    embedding_dimensions: int = 1536
+class BotKnowsConfig(BaseSettings):
+    """Main configuration aggregating all settings.
+    Example usage:
+        config = BotKnowsConfig()
+        mongo_uri = config.mongo.uri.get_secret_value()
+    """
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    # Component settings (nested)
+    mongo: MongoSettings = MongoSettings()
+    neo4j: Neo4jSettings = Neo4jSettings()
+    redis: RedisSettings = RedisSettings()
+    llm: LLMSettings = LLMSettings()
+    # Deduplication thresholds
+    dedup_high_threshold: float = 0.92
+    dedup_low_threshold: float = 0.80
+    # Recall settings
+    recall_stability_k: float = 0.1
+    recall_semantic_boost: float = 0.1
+    decay_batch_interval_hours: int = 24
+    @property
+    def redis_enabled(self) -> bool:
+        """Check if Redis caching is enabled and configured."""
+        return self.redis.enabled and self.redis.url is not None

bot_knows/domain/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Internal domain entities for bot_knows.
+This module contains internal domain models with business logic.
+These are not part of the public API.
+"""

bot_knows/domain/chat.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""Internal Chat entity for bot_knows.
+This module contains the internal Chat domain model with business logic.
+"""
+import time
+from dataclasses import dataclass, field
+from bot_knows.models.chat import ChatCategory, ChatDTO
+__all__ = [
+    "Chat",
+]
+@dataclass
+class Chat:
+    """Internal Chat entity with business logic.
+    This is a mutable internal representation used during processing.
+    Convert to ChatDTO for persistence and external use.
+    """
+    id: str
+    title: str
+    source: str
+    category: ChatCategory = ChatCategory.GENERAL
+    tags: list[str] = field(default_factory=list)
+    created_on: int = field(default_factory=lambda: int(time.time()))
+    def add_tag(self, tag: str) -> None:
+        """Add a tag if not already present."""
+        if tag and tag not in self.tags:
+            self.tags.append(tag)
+    def add_tags(self, tags: list[str]) -> None:
+        """Add multiple tags."""
+        for tag in tags:
+            self.add_tag(tag)
+    def to_dto(self) -> ChatDTO:
+        """Convert to immutable DTO for persistence."""
+        return ChatDTO(
+            id=self.id,
+            title=self.title,
+            source=self.source,
+            category=self.category,
+            tags=list(self.tags),
+            created_on=self.created_on,
+        )
+    @classmethod
+    def from_dto(cls, dto: ChatDTO) -> "Chat":
+        """Create from DTO."""
+        return cls(
+            id=dto.id,
+            title=dto.title,
+            source=dto.source,
+            category=dto.category,
+            tags=list(dto.tags),
+            created_on=dto.created_on,
+        )

bot_knows/domain/message.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Internal Message entity for bot_knows.
+This module contains the internal Message domain model.
+"""
+import time
+from dataclasses import dataclass, field
+from bot_knows.models.message import MessageDTO
+__all__ = [
+    "Message",
+]
+@dataclass
+class Message:
+    """Internal Message entity.
+    This is a mutable internal representation used during processing.
+    Convert to MessageDTO for persistence and external use.
+    """
+    message_id: str
+    chat_id: str
+    user_content: str = ""
+    assistant_content: str = ""
+    created_on: int = field(default_factory=lambda: int(time.time()))
+    @property
+    def combined_content(self) -> str:
+        """Get combined user and assistant content."""
+        parts = []
+        if self.user_content:
+            parts.append(f"User: {self.user_content}")
+        if self.assistant_content:
+            parts.append(f"Assistant: {self.assistant_content}")
+        return "\n\n".join(parts)
+    @property
+    def is_empty(self) -> bool:
+        """Check if both contents are empty."""
+        return not self.user_content and not self.assistant_content
+    def to_dto(self) -> MessageDTO:
+        """Convert to immutable DTO for persistence."""
+        return MessageDTO(
+            message_id=self.message_id,
+            chat_id=self.chat_id,
+            user_content=self.user_content,
+            assistant_content=self.assistant_content,
+            created_on=self.created_on,
+        )
+    @classmethod
+    def from_dto(cls, dto: MessageDTO) -> "Message":
+        """Create from DTO."""
+        return cls(
+            message_id=dto.message_id,
+            chat_id=dto.chat_id,
+            user_content=dto.user_content,
+            assistant_content=dto.assistant_content,
+            created_on=dto.created_on,
+        )

bot_knows/domain/relation.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Graph relationship types for bot_knows.
+This module defines the edge types used in the Neo4j knowledge graph.
+"""
+from enum import StrEnum
+__all__ = [
+    "RelationType",
+]
+class RelationType(StrEnum):
+    """Graph edge types for the knowledge graph.
+    These define the relationships between nodes in Neo4j.
+    """
+    # Message relationships
+    IS_PART_OF = "IS_PART_OF"
+    """(Message)-[:IS_PART_OF]->(Chat)"""
+    FOLLOWS_AFTER = "FOLLOWS_AFTER"
+    """(Message)-[:FOLLOWS_AFTER]->(Message) - defines ordering"""
+    # Topic relationships
+    IS_SUPPORTED_BY = "IS_SUPPORTED_BY"
+    """(Topic)-[:IS_SUPPORTED_BY {evidence}]->(Message)"""
+    POTENTIALLY_DUPLICATE_OF = "POTENTIALLY_DUPLICATE_OF"
+    """(Topic)-[:POTENTIALLY_DUPLICATE_OF {similarity}]->(Topic)"""
+    RELATES_TO = "RELATES_TO"
+    """(Topic)-[:RELATES_TO {type, weight}]->(Topic)"""
+class SemanticRelationType(StrEnum):
+    """Semantic relationship types between topics.
+    Used as the 'type' property on RELATES_TO edges.
+    """
+    PART_OF = "part_of"
+    """Topic A is part of Topic B"""
+    CAUSES = "causes"
+    """Topic A causes Topic B"""
+    RELATED_TO = "related_to"
+    """General semantic relationship"""
+    PREREQUISITE_OF = "prerequisite_of"
+    """Topic A is a prerequisite for Topic B"""
+    SIMILAR_TO = "similar_to"
+    """Topics are semantically similar"""

bot_knows/domain/topic.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Internal Topic entity for bot_knows.
+This module contains the internal Topic domain model with
+recall business logic including decay and reinforcement.
+"""
+import math
+import time
+from dataclasses import dataclass, field
+from bot_knows.models.topic import TopicDTO
+__all__ = [
+    "Topic",
+]
+@dataclass
+class Topic:
+    """Internal Topic entity with recall business logic.
+    This is a mutable internal representation used during processing.
+    Includes methods for decay and reinforcement calculations.
+    """
+    topic_id: str
+    canonical_name: str
+    centroid_embedding: list[float] = field(default_factory=list)
+    evidence_count: int = 0
+    importance: float = 0.0
+    recall_strength: float = 0.0
+    stability: float = 1.0
+    last_seen: int = field(default_factory=lambda: int(time.time()))
+    last_updated: int = field(default_factory=lambda: int(time.time()))
+    def update_centroid(self, new_embedding: list[float]) -> None:
+        """Incrementally update centroid embedding.
+        Uses formula: new_centroid = (old_centroid * n + new_embedding) / (n + 1)
+        Args:
+            new_embedding: New embedding to incorporate
+        """
+        n = self.evidence_count
+        if n == 0:
+            self.centroid_embedding = list(new_embedding)
+        else:
+            self.centroid_embedding = [
+                (old * n + new) / (n + 1)
+                for old, new in zip(self.centroid_embedding, new_embedding, strict=False)
+            ]
+        self.evidence_count += 1
+    def reinforce(
+        self,
+        confidence: float,
+        novelty_factor: float = 1.0,
+        context_weight: float = 1.0,
+        stability_k: float = 0.1,
+    ) -> None:
+        """Reinforce topic recall strength.
+        Context weights:
+            - passive: 0.2 (reading without interaction)
+            - active: 0.6 (actively querying)
+            - recall: 1.0 (responding to recall prompt)
+        Formula:
+            delta = confidence * novelty_factor * context_weight
+            strength = min(1.0, strength + delta)
+            stability += k * confidence
+        Args:
+            confidence: Evidence confidence (0.0 - 1.0)
+            novelty_factor: How novel this reinforcement is
+            context_weight: Weight based on interaction type
+            stability_k: Stability increment factor
+        """
+        delta = confidence * novelty_factor * context_weight
+        self.recall_strength = min(1.0, self.recall_strength + delta)
+        self.stability += stability_k * confidence
+        self.last_seen = int(time.time())
+        self.last_updated = int(time.time())
+    def apply_decay(self, current_time: int | None = None) -> None:
+        """Apply time-based decay to recall strength.
+        Formula: strength *= exp(-Δt / (stability * 86400))
+        Higher stability means slower decay.
+        Args:
+            current_time: Current time in epoch seconds (default: now)
+        """
+        now = current_time or int(time.time())
+        delta_t = now - self.last_updated
+        if delta_t > 0:
+            # Stability is multiplied by seconds per day for the decay rate
+            decay_factor = math.exp(-delta_t / (self.stability * 86400))
+            self.recall_strength *= decay_factor
+            self.last_updated = now
+    def increment_importance(self, delta: float = 0.1) -> None:
+        """Increment importance score.
+        Args:
+            delta: Amount to increment (capped at 1.0)
+        """
+        self.importance = min(1.0, self.importance + delta)
+    def to_dto(self) -> TopicDTO:
+        """Convert to immutable DTO for persistence."""
+        return TopicDTO(
+            topic_id=self.topic_id,
+            canonical_name=self.canonical_name,
+            centroid_embedding=list(self.centroid_embedding),
+            evidence_count=self.evidence_count,
+            importance=self.importance,
+            recall_strength=self.recall_strength,
+        )
+    @classmethod
+    def from_dto(cls, dto: TopicDTO) -> "Topic":
+        """Create from DTO."""
+        return cls(
+            topic_id=dto.topic_id,
+            canonical_name=dto.canonical_name,
+            centroid_embedding=list(dto.centroid_embedding),
+            evidence_count=dto.evidence_count,
+            importance=dto.importance,
+            recall_strength=dto.recall_strength,
+        )

bot_knows/domain/topic_evidence.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Internal TopicEvidence entity for bot_knows.
+This module contains the internal TopicEvidence domain model.
+Evidence records are append-only and never modified.
+"""
+import time
+from dataclasses import dataclass, field
+from bot_knows.models.topic import TopicEvidenceDTO
+__all__ = [
+    "TopicEvidence",
+]
+@dataclass(frozen=True)
+class TopicEvidence:
+    """Internal TopicEvidence entity.
+    Evidence records are append-only - they are never modified or deleted.
+    This provides a complete audit trail of topic extractions.
+    Note: This dataclass is frozen (immutable) to enforce append-only semantics.
+    """
+    evidence_id: str
+    topic_id: str
+    extracted_name: str
+    source_message_id: str
+    confidence: float
+    timestamp: int = field(default_factory=lambda: int(time.time()))
+    def to_dto(self) -> TopicEvidenceDTO:
+        """Convert to immutable DTO for persistence."""
+        return TopicEvidenceDTO(
+            evidence_id=self.evidence_id,
+            topic_id=self.topic_id,
+            extracted_name=self.extracted_name,
+            source_message_id=self.source_message_id,
+            confidence=self.confidence,
+            timestamp=self.timestamp,
+        )
+    @classmethod
+    def from_dto(cls, dto: TopicEvidenceDTO) -> "TopicEvidence":
+        """Create from DTO."""
+        return cls(
+            evidence_id=dto.evidence_id,
+            topic_id=dto.topic_id,
+            extracted_name=dto.extracted_name,
+            source_message_id=dto.source_message_id,
+            confidence=dto.confidence,
+            timestamp=dto.timestamp,
+        )

bot_knows/importers/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Import adapters for bot_knows.
+This module exports the import adapter base class and registry.
+"""
+from bot_knows.importers.base import ChatImportAdapter
+from bot_knows.importers.registry import ImportAdapterRegistry
+__all__ = [
+    "ChatImportAdapter",
+    "ImportAdapterRegistry",
+]

bot_knows/importers/base.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Base import adapter for bot_knows.
+This module defines the abstract base class for chat import adapters.
+"""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, BinaryIO
+from bot_knows.models.ingest import ChatIngest
+__all__ = [
+    "ChatImportAdapter",
+]
+class ChatImportAdapter(ABC):
+    """Abstract base class for chat import adapters.
+    Import adapters are responsible for parsing provider-specific
+    export formats into the canonical ChatIngest model.
+    Important: Adapters must NOT persist data or mutate any state.
+    They only normalize data.
+    Example:
+        class MyAdapter(ChatImportAdapter):
+            @property
+            def source_name(self) -> str:
+                return "my_source"
+            def parse(self, raw_export: dict) -> list[ChatIngest]:
+                # Parse and return ChatIngest objects
+                ...
+    """
+    @property
+    @abstractmethod
+    def source_name(self) -> str:
+        """Return unique identifier for this import source.
+        This name is used to identify the source in ChatIngest.source
+        and for adapter registry lookup.
+        Returns:
+            Source identifier string (e.g., "chatgpt", "claude")
+        """
+        ...
+    @abstractmethod
+    def parse(self, raw_export: dict[str, Any]) -> list[ChatIngest]:
+        """Parse raw export data into ChatIngest objects.
+        This method must be pure - it should not persist data,
+        generate IDs, classify, or mutate any state.
+        Args:
+            raw_export: Raw JSON data from the export file
+        Returns:
+            List of ChatIngest objects (one export may contain multiple chats)
+        Raises:
+            ValueError: If the export format is invalid
+        """
+        ...
+    def parse_file(self, path: Path | str) -> list[ChatIngest]:
+        """Parse from file path.
+        Convenience method that loads JSON from file and calls parse().
+        Args:
+            path: Path to the export JSON file
+        Returns:
+            List of ChatIngest objects
+        """
+        import json
+        path = Path(path)
+        with path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        return self.parse(data)
+    def parse_stream(self, stream: BinaryIO) -> list[ChatIngest]:
+        """Parse from file stream.
+        Convenience method that loads JSON from stream and calls parse().
+        Args:
+            stream: Binary file stream containing JSON data
+        Returns:
+            List of ChatIngest objects
+        """
+        import json
+        data = json.load(stream)
+        return self.parse(data)
+    def parse_string(self, json_string: str) -> list[ChatIngest]:
+        """Parse from JSON string.
+        Convenience method that parses JSON string and calls parse().
+        Args:
+            json_string: JSON string
+        Returns:
+            List of ChatIngest objects
+        """
+        import json
+        data = json.loads(json_string)
+        return self.parse(data)