PyPI - gnosisllm-knowledge - Versions diffs - 0.2.0__py3-none-any.whl - Mend

gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

gnosisllm_knowledge/__init__.py +152 -0
gnosisllm_knowledge/api/__init__.py +5 -0
gnosisllm_knowledge/api/knowledge.py +548 -0
gnosisllm_knowledge/backends/__init__.py +26 -0
gnosisllm_knowledge/backends/memory/__init__.py +9 -0
gnosisllm_knowledge/backends/memory/indexer.py +384 -0
gnosisllm_knowledge/backends/memory/searcher.py +516 -0
gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
gnosisllm_knowledge/backends/opensearch/config.py +195 -0
gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
gnosisllm_knowledge/chunking/__init__.py +9 -0
gnosisllm_knowledge/chunking/fixed.py +138 -0
gnosisllm_knowledge/chunking/sentence.py +239 -0
gnosisllm_knowledge/cli/__init__.py +18 -0
gnosisllm_knowledge/cli/app.py +509 -0
gnosisllm_knowledge/cli/commands/__init__.py +7 -0
gnosisllm_knowledge/cli/commands/agentic.py +529 -0
gnosisllm_knowledge/cli/commands/load.py +369 -0
gnosisllm_knowledge/cli/commands/search.py +440 -0
gnosisllm_knowledge/cli/commands/setup.py +228 -0
gnosisllm_knowledge/cli/display/__init__.py +5 -0
gnosisllm_knowledge/cli/display/service.py +555 -0
gnosisllm_knowledge/cli/utils/__init__.py +5 -0
gnosisllm_knowledge/cli/utils/config.py +207 -0
gnosisllm_knowledge/core/__init__.py +87 -0
gnosisllm_knowledge/core/domain/__init__.py +43 -0
gnosisllm_knowledge/core/domain/document.py +240 -0
gnosisllm_knowledge/core/domain/result.py +176 -0
gnosisllm_knowledge/core/domain/search.py +327 -0
gnosisllm_knowledge/core/domain/source.py +139 -0
gnosisllm_knowledge/core/events/__init__.py +23 -0
gnosisllm_knowledge/core/events/emitter.py +216 -0
gnosisllm_knowledge/core/events/types.py +226 -0
gnosisllm_knowledge/core/exceptions.py +407 -0
gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
gnosisllm_knowledge/core/interfaces/loader.py +102 -0
gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
gnosisllm_knowledge/core/interfaces/setup.py +164 -0
gnosisllm_knowledge/fetchers/__init__.py +12 -0
gnosisllm_knowledge/fetchers/config.py +77 -0
gnosisllm_knowledge/fetchers/http.py +167 -0
gnosisllm_knowledge/fetchers/neoreader.py +204 -0
gnosisllm_knowledge/loaders/__init__.py +13 -0
gnosisllm_knowledge/loaders/base.py +399 -0
gnosisllm_knowledge/loaders/factory.py +202 -0
gnosisllm_knowledge/loaders/sitemap.py +285 -0
gnosisllm_knowledge/loaders/website.py +57 -0
gnosisllm_knowledge/py.typed +0 -0
gnosisllm_knowledge/services/__init__.py +9 -0
gnosisllm_knowledge/services/indexing.py +387 -0
gnosisllm_knowledge/services/search.py +349 -0
gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0

gnosisllm_knowledge/core/exceptions.py ADDED Viewed

@@ -0,0 +1,407 @@
+"""Exception hierarchy for gnosisllm-knowledge."""
+from __future__ import annotations
+from typing import Any
+class KnowledgeError(Exception):
+    """Base exception for gnosisllm-knowledge.
+    All library exceptions inherit from this class.
+    Attributes:
+        message: Human-readable error message.
+        code: Machine-readable error code.
+        details: Additional error details.
+        cause: Original exception that caused this error.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        code: str | None = None,
+        details: dict[str, Any] | None = None,
+        cause: Exception | None = None,
+    ) -> None:
+        """Initialize the exception.
+        Args:
+            message: Human-readable error message.
+            code: Machine-readable error code.
+            details: Additional error details.
+            cause: Original exception that caused this error.
+        """
+        super().__init__(message)
+        self.message = message
+        self.code = code
+        self.details = details or {}
+        self.cause = cause
+    def __str__(self) -> str:
+        """Return string representation."""
+        parts = [self.message]
+        if self.code:
+            parts.append(f"[{self.code}]")
+        if self.cause:
+            parts.append(f"(caused by: {self.cause})")
+        return " ".join(parts)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "error": self.__class__.__name__,
+            "message": self.message,
+            "code": self.code,
+            "details": self.details,
+        }
+class ConfigurationError(KnowledgeError):
+    """Invalid or missing configuration.
+    Raised when required configuration is missing or invalid.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        config_key: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.config_key = config_key
+        if config_key:
+            self.details["config_key"] = config_key
+class ConnectionError(KnowledgeError):
+    """Failed to connect to backend.
+    Raised when unable to establish connection to a service.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        host: str | None = None,
+        port: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.host = host
+        self.port = port
+        if host:
+            self.details["host"] = host
+        if port:
+            self.details["port"] = port
+class AuthenticationError(KnowledgeError):
+    """Authentication failed.
+    Raised when authentication to a service fails.
+    """
+    pass
+class AuthorizationError(KnowledgeError):
+    """Authorization denied.
+    Raised when a user doesn't have permission to perform an operation.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        required_permission: str | None = None,
+        resource: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.required_permission = required_permission
+        self.resource = resource
+        if required_permission:
+            self.details["required_permission"] = required_permission
+        if resource:
+            self.details["resource"] = resource
+class LoadError(KnowledgeError):
+    """Failed to load content.
+    Raised when content loading fails (fetch error, parse error, etc.).
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        source: str,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(f"Failed to load '{source}': {message}", **kwargs)
+        self.source = source
+        self.details["source"] = source
+class FetchError(LoadError):
+    """Failed to fetch content from URL.
+    More specific than LoadError, for HTTP/network failures.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        source: str,
+        status_code: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, source=source, **kwargs)
+        self.status_code = status_code
+        if status_code:
+            self.details["status_code"] = status_code
+class ValidationError(KnowledgeError):
+    """Content validation failed.
+    Raised when document content fails validation rules.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        field: str | None = None,
+        value: Any = None,
+        errors: list[str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.field = field
+        self.value = value
+        self.errors = errors or []
+        if field:
+            self.details["field"] = field
+        if errors:
+            self.details["errors"] = errors
+class IndexError(KnowledgeError):
+    """Failed to index documents.
+    Raised when document indexing fails.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        index_name: str | None = None,
+        doc_count: int = 0,
+        failed_count: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.index_name = index_name
+        self.doc_count = doc_count
+        self.failed_count = failed_count
+        if index_name:
+            self.details["index_name"] = index_name
+        self.details["doc_count"] = doc_count
+        self.details["failed_count"] = failed_count
+class SearchError(KnowledgeError):
+    """Failed to execute search.
+    Raised when search operations fail.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        query: str | None = None,
+        index_name: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.query = query
+        self.index_name = index_name
+        if query:
+            self.details["query"] = query
+        if index_name:
+            self.details["index_name"] = index_name
+class AgenticSearchError(SearchError):
+    """Failed to execute agentic search.
+    Raised when AI agent-powered search operations fail.
+    This includes agent execution failures, LLM errors, and timeouts.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        agent_id: str | None = None,
+        agent_type: str | None = None,
+        conversation_id: str | None = None,
+        iteration: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.agent_id = agent_id
+        self.agent_type = agent_type
+        self.conversation_id = conversation_id
+        self.iteration = iteration
+        if agent_id:
+            self.details["agent_id"] = agent_id
+        if agent_type:
+            self.details["agent_type"] = agent_type
+        if conversation_id:
+            self.details["conversation_id"] = conversation_id
+        if iteration is not None:
+            self.details["iteration"] = iteration
+class EmbeddingError(KnowledgeError):
+    """Failed to generate embeddings.
+    Raised when embedding generation fails.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        model: str | None = None,
+        text_length: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.model = model
+        self.text_length = text_length
+        if model:
+            self.details["model"] = model
+        self.details["text_length"] = text_length
+class SetupError(KnowledgeError):
+    """Failed during setup.
+    Raised when backend setup fails.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        step: str,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(f"Setup failed at '{step}': {message}", **kwargs)
+        self.step = step
+        self.details["step"] = step
+class TimeoutError(KnowledgeError):
+    """Operation timed out.
+    Raised when an operation exceeds its timeout.
+    """
+    def __init__(
+        self,
+        message: str = "Operation timed out",
+        *,
+        timeout: float | None = None,
+        operation: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.timeout = timeout
+        self.operation = operation
+        if timeout:
+            self.details["timeout"] = timeout
+        if operation:
+            self.details["operation"] = operation
+class CircuitBreakerOpenError(KnowledgeError):
+    """Circuit breaker is open.
+    Raised when a circuit breaker is open and rejecting requests.
+    """
+    def __init__(
+        self,
+        message: str = "Circuit breaker is open",
+        *,
+        recovery_time: float | None = None,
+        component: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.recovery_time = recovery_time
+        self.component = component
+        if recovery_time:
+            self.details["recovery_time"] = recovery_time
+        if component:
+            self.details["component"] = component
+class RateLimitError(KnowledgeError):
+    """Rate limit exceeded.
+    Raised when API rate limits are exceeded.
+    """
+    def __init__(
+        self,
+        message: str = "Rate limit exceeded",
+        *,
+        retry_after: float | None = None,
+        limit: int | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.retry_after = retry_after
+        self.limit = limit
+        if retry_after:
+            self.details["retry_after"] = retry_after
+        if limit:
+            self.details["limit"] = limit
+class DocumentNotFoundError(KnowledgeError):
+    """Document not found.
+    Raised when a document cannot be found.
+    """
+    def __init__(
+        self,
+        message: str = "Document not found",
+        *,
+        doc_id: str | None = None,
+        index_name: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(message, **kwargs)
+        self.doc_id = doc_id
+        self.index_name = index_name
+        if doc_id:
+            self.details["doc_id"] = doc_id
+        if index_name:
+            self.details["index_name"] = index_name

gnosisllm_knowledge/core/interfaces/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Interface definitions (protocols) for dependency injection."""
+from gnosisllm_knowledge.core.interfaces.agentic import IAgenticSearcher
+from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
+from gnosisllm_knowledge.core.interfaces.fetcher import FetchResult, IContentFetcher
+from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
+from gnosisllm_knowledge.core.interfaces.loader import IContentLoader
+from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
+from gnosisllm_knowledge.core.interfaces.setup import ISetupAdapter
+__all__ = [
+    "IContentLoader",
+    "IContentFetcher",
+    "FetchResult",
+    "ITextChunker",
+    "IDocumentIndexer",
+    "IKnowledgeSearcher",
+    "IAgenticSearcher",
+    "ISetupAdapter",
+]

gnosisllm_knowledge/core/interfaces/agentic.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""Agentic searcher protocol - Interface for AI-powered search operations."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from gnosisllm_knowledge.core.domain.search import (
+        AgenticSearchQuery,
+        AgenticSearchResult,
+    )
+@runtime_checkable
+class IAgenticSearcher(Protocol):
+    """Protocol for agentic search operations using AI agents.
+    Agentic searchers are responsible for:
+    - Understanding natural language queries
+    - Automatically constructing optimal search strategies
+    - Generating context-aware answers from retrieved documents
+    - Supporting multi-turn conversations with memory
+    Implementations should provide AI-powered search capabilities
+    that go beyond traditional search by understanding user intent
+    and generating comprehensive answers.
+    """
+    @property
+    def is_configured(self) -> bool:
+        """Check if agentic search is properly configured.
+        Returns:
+            True if all required agents and models are configured.
+        """
+        ...
+    @property
+    def flow_agent_available(self) -> bool:
+        """Check if flow agent is available.
+        Returns:
+            True if flow agent can be used.
+        """
+        ...
+    @property
+    def conversational_agent_available(self) -> bool:
+        """Check if conversational agent is available.
+        Returns:
+            True if conversational agent can be used.
+        """
+        ...
+    async def agentic_search(
+        self,
+        query: AgenticSearchQuery,
+        index_name: str,
+        **options: Any,
+    ) -> AgenticSearchResult:
+        """Execute agentic search with agent orchestration.
+        The agent will:
+        1. Analyze the query to understand user intent
+        2. Search for relevant documents
+        3. Generate a comprehensive answer with citations
+        4. (Optional) Maintain conversation memory
+        Args:
+            query: Agentic search query with agent type and context.
+            index_name: Target index name.
+            **options: Additional agent options.
+        Returns:
+            AgenticSearchResult with answer, reasoning, and sources.
+        """
+        ...
+    async def get_conversation(
+        self,
+        conversation_id: str,
+    ) -> list[dict[str, Any]]:
+        """Get conversation history for multi-turn searches.
+        Args:
+            conversation_id: Conversation identifier.
+        Returns:
+            List of conversation messages with role, content, and metadata.
+        """
+        ...
+    async def clear_conversation(
+        self,
+        conversation_id: str,
+    ) -> bool:
+        """Clear conversation history.
+        Args:
+            conversation_id: Conversation to clear.
+        Returns:
+            True if cleared successfully, False if not found.
+        """
+        ...
+    async def list_conversations(
+        self,
+        account_id: str | None = None,
+        limit: int = 100,
+    ) -> list[dict[str, Any]]:
+        """List active conversations.
+        Args:
+            account_id: Filter by account (multi-tenant).
+            limit: Maximum number of conversations.
+        Returns:
+            List of conversation metadata dicts.
+        """
+        ...
+    async def get_agent_status(
+        self,
+        agent_id: str,
+    ) -> dict[str, Any] | None:
+        """Get status of an agent.
+        Args:
+            agent_id: Agent identifier.
+        Returns:
+            Agent status info or None if not found.
+        """
+        ...

gnosisllm_knowledge/core/interfaces/chunker.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Text chunker protocol - Single Responsibility Principle."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from gnosisllm_knowledge.core.domain.document import TextChunk
+@runtime_checkable
+class ITextChunker(Protocol):
+    """Protocol for chunking text into smaller pieces.
+    Text chunkers are responsible for:
+    - Splitting large text into embedding-friendly chunks
+    - Preserving semantic boundaries (sentences, paragraphs)
+    - Handling overlap between chunks
+    - Maintaining position information
+    Implementations should follow the Single Responsibility Principle
+    and handle only text chunking, not fetching or indexing.
+    """
+    @property
+    def name(self) -> str:
+        """Return the chunker name for identification."""
+        ...
+    @property
+    def chunk_size(self) -> int:
+        """Return the target chunk size in characters."""
+        ...
+    @property
+    def chunk_overlap(self) -> int:
+        """Return the overlap between chunks in characters."""
+        ...
+    def chunk(self, text: str, **options: Any) -> list[TextChunk]:
+        """Split text into chunks suitable for embedding.
+        Args:
+            text: The text to chunk.
+            **options: Chunker-specific options like:
+                - chunk_size: Override default chunk size
+                - chunk_overlap: Override default overlap
+                - preserve_sentences: Keep sentences intact
+        Returns:
+            List of TextChunk objects with content and position info.
+        """
+        ...
+    def estimate_chunks(self, text: str) -> int:
+        """Estimate the number of chunks that would be created.
+        Args:
+            text: The text to estimate.
+        Returns:
+            Estimated number of chunks.
+        """
+        ...