PyPI - dataknobs-bots - Versions diffs - 0.2.4__py3-none-any.whl - Mend

dataknobs-bots 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

dataknobs_bots/__init__.py +42 -0
dataknobs_bots/api/__init__.py +42 -0
dataknobs_bots/api/dependencies.py +140 -0
dataknobs_bots/api/exceptions.py +289 -0
dataknobs_bots/bot/__init__.py +15 -0
dataknobs_bots/bot/base.py +1091 -0
dataknobs_bots/bot/context.py +102 -0
dataknobs_bots/bot/manager.py +430 -0
dataknobs_bots/bot/registry.py +629 -0
dataknobs_bots/config/__init__.py +39 -0
dataknobs_bots/config/resolution.py +353 -0
dataknobs_bots/knowledge/__init__.py +82 -0
dataknobs_bots/knowledge/query/__init__.py +25 -0
dataknobs_bots/knowledge/query/expander.py +262 -0
dataknobs_bots/knowledge/query/transformer.py +288 -0
dataknobs_bots/knowledge/rag.py +738 -0
dataknobs_bots/knowledge/retrieval/__init__.py +23 -0
dataknobs_bots/knowledge/retrieval/formatter.py +249 -0
dataknobs_bots/knowledge/retrieval/merger.py +279 -0
dataknobs_bots/memory/__init__.py +56 -0
dataknobs_bots/memory/base.py +38 -0
dataknobs_bots/memory/buffer.py +58 -0
dataknobs_bots/memory/vector.py +188 -0
dataknobs_bots/middleware/__init__.py +11 -0
dataknobs_bots/middleware/base.py +92 -0
dataknobs_bots/middleware/cost.py +421 -0
dataknobs_bots/middleware/logging.py +184 -0
dataknobs_bots/reasoning/__init__.py +65 -0
dataknobs_bots/reasoning/base.py +50 -0
dataknobs_bots/reasoning/react.py +299 -0
dataknobs_bots/reasoning/simple.py +51 -0
dataknobs_bots/registry/__init__.py +41 -0
dataknobs_bots/registry/backend.py +181 -0
dataknobs_bots/registry/memory.py +244 -0
dataknobs_bots/registry/models.py +102 -0
dataknobs_bots/registry/portability.py +210 -0
dataknobs_bots/tools/__init__.py +5 -0
dataknobs_bots/tools/knowledge_search.py +113 -0
dataknobs_bots/utils/__init__.py +1 -0
dataknobs_bots-0.2.4.dist-info/METADATA +591 -0
dataknobs_bots-0.2.4.dist-info/RECORD +42 -0
dataknobs_bots-0.2.4.dist-info/WHEEL +4 -0

dataknobs_bots/knowledge/query/expander.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""Contextual query expansion using conversation history.
+This module provides query expansion without requiring LLM calls,
+using recent conversation context to enrich ambiguous queries.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Callable
+@dataclass
+class Message:
+    """A conversation message.
+    Attributes:
+        role: Message role ("user", "assistant", "system")
+        content: Message content
+    """
+    role: str
+    content: str
+class ContextualExpander:
+    """Expands queries using conversation context.
+    This expander enriches ambiguous or context-dependent queries
+    by incorporating information from recent conversation turns.
+    Unlike QueryTransformer, it doesn't require LLM calls.
+    Example:
+        ```python
+        expander = ContextualExpander(max_context_turns=3)
+        # User asks: "Show me an example"
+        # Recent context: discussing chain-of-thought prompting
+        expanded = expander.expand(
+            "Show me an example",
+            conversation_history
+        )
+        # Returns: "chain-of-thought prompting examples Show me an example"
+        ```
+    """
+    def __init__(
+        self,
+        max_context_turns: int = 3,
+        include_assistant: bool = False,
+        keyword_weight: int = 2,
+    ):
+        """Initialize the contextual expander.
+        Args:
+            max_context_turns: Maximum conversation turns to consider
+            include_assistant: Whether to include assistant messages
+            keyword_weight: How many times to repeat extracted keywords
+        """
+        self.max_context_turns = max_context_turns
+        self.include_assistant = include_assistant
+        self.keyword_weight = keyword_weight
+        # Common words to filter out
+        self._stop_words = {
+            "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
+            "have", "has", "had", "do", "does", "did", "will", "would", "could",
+            "should", "may", "might", "must", "can", "this", "that", "these",
+            "those", "i", "you", "he", "she", "it", "we", "they", "what", "which",
+            "who", "when", "where", "why", "how", "all", "each", "every", "both",
+            "few", "more", "most", "other", "some", "such", "no", "not", "only",
+            "own", "same", "so", "than", "too", "very", "just", "also", "now",
+            "here", "there", "about", "into", "through", "during", "before",
+            "after", "above", "below", "to", "from", "up", "down", "in", "out",
+            "on", "off", "over", "under", "again", "further", "then", "once",
+            "and", "but", "or", "nor", "for", "yet", "because", "as", "until",
+            "while", "of", "at", "by", "with", "without", "between", "me", "my",
+            "your", "his", "her", "its", "our", "their", "please", "help", "want",
+            "need", "like", "show", "tell", "give", "make", "let", "get", "see",
+        }
+    def expand(
+        self,
+        user_input: str,
+        conversation_history: list[Message] | list[dict[str, Any]],
+    ) -> str:
+        """Expand query with conversation context.
+        Args:
+            user_input: The user's current message
+            conversation_history: Recent conversation messages
+        Returns:
+            Expanded query string
+        """
+        # Normalize conversation history to Message objects
+        messages = self._normalize_messages(conversation_history)
+        # Get recent context
+        recent = self._get_recent_context(messages)
+        # Extract keywords from context
+        keywords = self._extract_keywords(recent)
+        # Build expanded query
+        if keywords:
+            keyword_str = " ".join(keywords)
+            return f"{keyword_str} {user_input}"
+        return user_input
+    def _normalize_messages(
+        self,
+        history: list[Message] | list[dict[str, Any]],
+    ) -> list[Message]:
+        """Normalize history to Message objects.
+        Args:
+            history: Conversation history in various formats
+        Returns:
+            List of Message objects
+        """
+        messages = []
+        for item in history:
+            if isinstance(item, Message):
+                messages.append(item)
+            elif isinstance(item, dict):
+                messages.append(Message(
+                    role=item.get("role", "user"),
+                    content=item.get("content", ""),
+                ))
+        return messages
+    def _get_recent_context(self, messages: list[Message]) -> list[str]:
+        """Get recent relevant context from conversation.
+        Args:
+            messages: Conversation messages
+        Returns:
+            List of context strings
+        """
+        context = []
+        count = 0
+        # Walk backwards through messages
+        for msg in reversed(messages):
+            if count >= self.max_context_turns:
+                break
+            if msg.role == "user" or (msg.role == "assistant" and self.include_assistant):
+                context.insert(0, msg.content)
+                count += 1
+        return context
+    def _extract_keywords(self, context: list[str]) -> list[str]:
+        """Extract meaningful keywords from context.
+        Args:
+            context: List of context strings
+        Returns:
+            List of extracted keywords
+        """
+        # Combine all context
+        combined = " ".join(context)
+        # Tokenize and filter
+        words = combined.lower().split()
+        keywords = []
+        for word in words:
+            # Clean punctuation
+            cleaned = word.strip(".,!?\"'()[]{}:;")
+            # Skip short words, stop words, and numbers
+            if (
+                len(cleaned) < 3
+                or cleaned in self._stop_words
+                or cleaned.isdigit()
+            ):
+                continue
+            # Add keyword if not already present
+            if cleaned not in keywords:
+                keywords.append(cleaned)
+        # Return top keywords (most recent first gives natural weighting)
+        return keywords[:5]
+    def expand_with_topics(
+        self,
+        user_input: str,
+        conversation_history: list[Message] | list[dict[str, Any]],
+        topic_extractor: Callable[[str], list[str]] | None = None,
+    ) -> str:
+        """Expand query with extracted topics.
+        Enhanced expansion that uses a custom topic extractor.
+        Args:
+            user_input: The user's current message
+            conversation_history: Recent conversation messages
+            topic_extractor: Optional function to extract topics from text
+        Returns:
+            Expanded query string
+        """
+        messages = self._normalize_messages(conversation_history)
+        recent = self._get_recent_context(messages)
+        if topic_extractor:
+            # Use custom topic extraction
+            topics = []
+            for text in recent:
+                topics.extend(topic_extractor(text))
+            topic_str = " ".join(topics[:5])
+        else:
+            # Fall back to keyword extraction
+            keywords = self._extract_keywords(recent)
+            topic_str = " ".join(keywords)
+        if topic_str:
+            return f"{topic_str} {user_input}"
+        return user_input
+def is_ambiguous_query(query: str) -> bool:
+    """Check if a query is likely ambiguous and needs expansion.
+    Args:
+        query: The query to check
+    Returns:
+        True if query appears ambiguous
+    Example:
+        ```python
+        is_ambiguous_query("Show me an example")  # True
+        is_ambiguous_query("How do I configure OAuth?")  # False
+        ```
+    """
+    # Short queries are often ambiguous
+    words = query.split()
+    if len(words) < 4:
+        return True
+    # Queries with demonstratives are often context-dependent
+    ambiguous_patterns = [
+        "this", "that", "these", "those", "it", "them",
+        "example", "more", "another", "same", "similar",
+    ]
+    query_lower = query.lower()
+    for pattern in ambiguous_patterns:
+        if pattern in query_lower:
+            return True
+    return False

dataknobs_bots/knowledge/query/transformer.py ADDED Viewed

@@ -0,0 +1,288 @@
+"""Query transformation using LLM for improved retrieval.
+This module provides LLM-based query transformation to generate
+optimized search queries from user input.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class TransformerConfig:
+    """Configuration for query transformation.
+    Attributes:
+        enabled: Whether transformation is enabled
+        llm_provider: LLM provider name (e.g., "ollama", "openai")
+        llm_model: Model to use for transformation
+        num_queries: Number of alternative queries to generate
+        domain_context: Domain-specific context for better queries
+    """
+    enabled: bool = False
+    llm_provider: str = "ollama"
+    llm_model: str = "llama3.2"
+    num_queries: int = 3
+    domain_context: str = ""
+class QueryTransformer:
+    """LLM-based query transformation for improved RAG retrieval.
+    Transforms user input into optimized search queries by using an LLM
+    to extract key concepts and generate alternative phrasings.
+    This is particularly useful when:
+    - User input contains literal text to analyze (not queries)
+    - User asks vague questions that need expansion
+    - Domain-specific terminology needs translation
+    Example:
+        ```python
+        config = TransformerConfig(
+            enabled=True,
+            llm_provider="ollama",
+            llm_model="llama3.2",
+            domain_context="prompt engineering"
+        )
+        transformer = QueryTransformer(config)
+        await transformer.initialize()
+        # Transform user input to search queries
+        queries = await transformer.transform(
+            "Analyze this: Write a poem about cats"
+        )
+        # Returns: ["prompt analysis techniques", "evaluating prompt quality", ...]
+        ```
+    """
+    def __init__(self, config: TransformerConfig | None = None):
+        """Initialize the query transformer.
+        Args:
+            config: Transformer configuration, uses defaults if not provided
+        """
+        self.config = config or TransformerConfig()
+        self._llm = None
+        self._initialized = False
+    async def initialize(self) -> None:
+        """Initialize the LLM provider.
+        Must be called before using transform() if enabled.
+        """
+        if not self.config.enabled:
+            return
+        from dataknobs_llm.llm import LLMProviderFactory
+        factory = LLMProviderFactory(is_async=True)
+        self._llm = factory.create({
+            "provider": self.config.llm_provider,
+            "model": self.config.llm_model,
+        })
+        await self._llm.initialize()
+        self._initialized = True
+    async def close(self) -> None:
+        """Close the LLM provider and release resources."""
+        if self._llm and hasattr(self._llm, "close"):
+            await self._llm.close()
+        self._initialized = False
+    async def transform(
+        self,
+        user_input: str,
+        num_queries: int | None = None,
+    ) -> list[str]:
+        """Transform user input into optimized search queries.
+        Args:
+            user_input: The user's message or question
+            num_queries: Number of queries to generate (overrides config)
+        Returns:
+            List of optimized search queries
+        Raises:
+            RuntimeError: If transformer is enabled but not initialized
+        """
+        # If disabled, return the original input as a single query
+        if not self.config.enabled:
+            return [user_input]
+        if not self._initialized:
+            raise RuntimeError(
+                "QueryTransformer not initialized. Call initialize() first."
+            )
+        num = num_queries or self.config.num_queries
+        # Build the transformation prompt
+        prompt = self._build_prompt(user_input, num)
+        # Generate queries using LLM
+        response = await self._llm.generate(prompt)
+        # Parse the response into individual queries
+        queries = self._parse_response(response, user_input)
+        return queries[:num]
+    def _build_prompt(self, user_input: str, num_queries: int) -> str:
+        """Build the transformation prompt.
+        Args:
+            user_input: User's message
+            num_queries: Number of queries to generate
+        Returns:
+            Prompt string for LLM
+        """
+        domain_context = ""
+        if self.config.domain_context:
+            domain_context = f" in the context of {self.config.domain_context}"
+        return f"""Generate {num_queries} search queries to find relevant knowledge base content for the following user message{domain_context}.
+User message: "{user_input}"
+Focus on:
+- Key concepts and techniques being discussed
+- The underlying intent, not the literal text
+- Related topics that would provide useful context
+Return ONLY the search queries, one per line, without numbering or explanation.
+Keep each query concise (2-6 words).
+"""
+    def _parse_response(self, response: str, fallback: str) -> list[str]:
+        """Parse LLM response into list of queries.
+        Args:
+            response: Raw LLM response
+            fallback: Fallback query if parsing fails
+        Returns:
+            List of parsed queries
+        """
+        # Split by newlines and clean up
+        lines = response.strip().split("\n")
+        queries = []
+        for line in lines:
+            # Remove common prefixes (numbering, bullets, etc.)
+            cleaned = line.strip()
+            cleaned = cleaned.lstrip("0123456789.-) ")
+            cleaned = cleaned.strip('"\'')
+            if cleaned and len(cleaned) > 2:
+                queries.append(cleaned)
+        # Ensure we have at least one query
+        if not queries:
+            queries = [fallback]
+        return queries
+    async def transform_with_context(
+        self,
+        user_input: str,
+        conversation_context: str,
+        num_queries: int | None = None,
+    ) -> list[str]:
+        """Transform with additional conversation context.
+        Args:
+            user_input: The user's message
+            conversation_context: Recent conversation history
+            num_queries: Number of queries to generate
+        Returns:
+            List of optimized search queries
+        """
+        if not self.config.enabled:
+            return [user_input]
+        if not self._initialized:
+            raise RuntimeError(
+                "QueryTransformer not initialized. Call initialize() first."
+            )
+        num = num_queries or self.config.num_queries
+        # Build enhanced prompt with context
+        prompt = self._build_contextual_prompt(
+            user_input, conversation_context, num
+        )
+        response = await self._llm.generate(prompt)
+        queries = self._parse_response(response, user_input)
+        return queries[:num]
+    def _build_contextual_prompt(
+        self,
+        user_input: str,
+        conversation_context: str,
+        num_queries: int,
+    ) -> str:
+        """Build prompt with conversation context.
+        Args:
+            user_input: User's message
+            conversation_context: Recent conversation
+            num_queries: Number of queries to generate
+        Returns:
+            Prompt string for LLM
+        """
+        domain_context = ""
+        if self.config.domain_context:
+            domain_context = f" in the context of {self.config.domain_context}"
+        return f"""Generate {num_queries} search queries to find relevant knowledge base content for the user's message{domain_context}.
+Recent conversation context:
+{conversation_context}
+Current user message: "{user_input}"
+Focus on:
+- Key concepts relevant to what the user is asking
+- Context from the conversation that clarifies the query
+- Related topics that would provide useful information
+Return ONLY the search queries, one per line, without numbering or explanation.
+Keep each query concise (2-6 words).
+"""
+async def create_transformer(config: dict[str, Any]) -> QueryTransformer:
+    """Create and initialize a QueryTransformer from config dict.
+    Convenience function for creating transformer from configuration.
+    Args:
+        config: Configuration dictionary with TransformerConfig fields
+    Returns:
+        Initialized QueryTransformer
+    Example:
+        ```python
+        transformer = await create_transformer({
+            "enabled": True,
+            "llm_provider": "ollama",
+            "llm_model": "llama3.2",
+            "domain_context": "prompt engineering"
+        })
+        ```
+    """
+    transformer_config = TransformerConfig(**config)
+    transformer = QueryTransformer(transformer_config)
+    await transformer.initialize()
+    return transformer