PyPI - gnosisllm-knowledge - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

gnosisllm-knowledge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

gnosisllm_knowledge/__init__.py +91 -39
gnosisllm_knowledge/api/__init__.py +3 -2
gnosisllm_knowledge/api/knowledge.py +502 -32
gnosisllm_knowledge/api/memory.py +966 -0
gnosisllm_knowledge/backends/__init__.py +14 -5
gnosisllm_knowledge/backends/memory/indexer.py +27 -2
gnosisllm_knowledge/backends/memory/searcher.py +111 -10
gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
gnosisllm_knowledge/backends/opensearch/config.py +49 -28
gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
gnosisllm_knowledge/cli/app.py +436 -31
gnosisllm_knowledge/cli/commands/agentic.py +26 -9
gnosisllm_knowledge/cli/commands/load.py +169 -19
gnosisllm_knowledge/cli/commands/memory.py +733 -0
gnosisllm_knowledge/cli/commands/search.py +9 -10
gnosisllm_knowledge/cli/commands/setup.py +49 -23
gnosisllm_knowledge/cli/display/service.py +43 -0
gnosisllm_knowledge/cli/utils/config.py +62 -4
gnosisllm_knowledge/core/domain/__init__.py +54 -0
gnosisllm_knowledge/core/domain/discovery.py +166 -0
gnosisllm_knowledge/core/domain/document.py +19 -19
gnosisllm_knowledge/core/domain/memory.py +440 -0
gnosisllm_knowledge/core/domain/result.py +11 -3
gnosisllm_knowledge/core/domain/search.py +12 -25
gnosisllm_knowledge/core/domain/source.py +11 -12
gnosisllm_knowledge/core/events/__init__.py +8 -0
gnosisllm_knowledge/core/events/types.py +198 -5
gnosisllm_knowledge/core/exceptions.py +227 -0
gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
gnosisllm_knowledge/core/interfaces/memory.py +524 -0
gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
gnosisllm_knowledge/core/streaming/__init__.py +36 -0
gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
gnosisllm_knowledge/fetchers/__init__.py +8 -0
gnosisllm_knowledge/fetchers/config.py +27 -0
gnosisllm_knowledge/fetchers/neoreader.py +31 -3
gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
gnosisllm_knowledge/loaders/__init__.py +5 -1
gnosisllm_knowledge/loaders/base.py +3 -4
gnosisllm_knowledge/loaders/discovery.py +338 -0
gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
gnosisllm_knowledge/loaders/factory.py +46 -0
gnosisllm_knowledge/loaders/sitemap.py +129 -1
gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
gnosisllm_knowledge/services/indexing.py +100 -93
gnosisllm_knowledge/services/search.py +84 -31
gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0

gnosisllm_knowledge/api/knowledge.py CHANGED Viewed

@@ -1,8 +1,44 @@
-"""High-level Knowledge API facade."""
+"""High-level Knowledge API facade.
+This module provides the main entry point for the gnosisllm-knowledge library.
+The Knowledge class is a high-level facade that abstracts the complexity of
+loading, indexing, and searching knowledge documents.
+Note:
+    This library is tenant-agnostic. Multi-tenancy should be handled at the
+    API layer by using separate indices per account (e.g.,
+    `knowledge-{account_id}`) rather than filtering by account_id.
+Example:
+    ```python
+    # Create Knowledge instance for a specific tenant
+    knowledge = Knowledge.from_opensearch(
+        host="localhost",
+        port=9200,
+    )
+    # Use a tenant-specific index
+    tenant_index = f"knowledge-{account_id}"
+    # Load content
+    await knowledge.load(
+        "https://docs.example.com/sitemap.xml",
+        index_name=tenant_index,
+        collection_id="docs",
+    )
+    # Search (tenant isolation via index name)
+    results = await knowledge.search(
+        "how to configure",
+        index_name=tenant_index,
+    )
+    ```
+"""
 from __future__ import annotations
 import logging
+from collections.abc import Callable
 from typing import TYPE_CHECKING, Any
 from gnosisllm_knowledge.backends.opensearch import (
@@ -11,15 +47,24 @@ from gnosisllm_knowledge.backends.opensearch import (
     OpenSearchKnowledgeSearcher,
     OpenSearchSetupAdapter,
 )
+from gnosisllm_knowledge.backends.opensearch.agentic import OpenSearchAgenticSearcher
 from gnosisllm_knowledge.chunking import SentenceChunker
 from gnosisllm_knowledge.core.domain.result import IndexResult
-from gnosisllm_knowledge.core.domain.search import SearchMode, SearchResult
+from gnosisllm_knowledge.core.domain.search import (
+    AgentType,
+    AgenticSearchQuery,
+    AgenticSearchResult,
+    SearchMode,
+    SearchResult,
+)
 from gnosisllm_knowledge.core.events.emitter import EventEmitter
 from gnosisllm_knowledge.core.interfaces.setup import DiagnosticReport, HealthReport
+from gnosisllm_knowledge.core.streaming.pipeline import PipelineConfig
 from gnosisllm_knowledge.fetchers import NeoreaderContentFetcher
 from gnosisllm_knowledge.fetchers.config import NeoreaderConfig
 from gnosisllm_knowledge.loaders import LoaderFactory
 from gnosisllm_knowledge.services import KnowledgeIndexingService, KnowledgeSearchService
+from gnosisllm_knowledge.services.streaming_pipeline import StreamingIndexingPipeline
 if TYPE_CHECKING:
     from opensearchpy import AsyncOpenSearch
@@ -120,6 +165,10 @@ class Knowledge:
     ) -> Knowledge:
         """Create Knowledge instance with OpenSearch backend.
+        This factory creates a Knowledge instance configured for OpenSearch.
+        The returned instance is tenant-agnostic - multi-tenancy should be
+        handled by using separate indices per account.
         Args:
             host: OpenSearch host.
             port: OpenSearch port.
@@ -137,6 +186,19 @@ class Knowledge:
         Note:
             Embeddings are generated automatically by OpenSearch ingest pipeline.
             Run 'gnosisllm-knowledge setup' to configure the ML model.
+        Example:
+            ```python
+            # Create a Knowledge instance
+            knowledge = Knowledge.from_opensearch(
+                host="localhost",
+                port=9200,
+            )
+            # Use tenant-specific index for isolation
+            tenant_index = f"gnosisllm-{account_id}-knowledge"
+            await knowledge.load(source, index_name=tenant_index)
+            ```
         """
         # Import OpenSearch client
         try:
@@ -159,11 +221,12 @@ class Knowledge:
                 **kwargs,
             )
-        # Create client
+        # Create client with proper timeout settings
         client_kwargs: dict[str, Any] = {
             "hosts": [{"host": config.host, "port": config.port}],
             "use_ssl": config.use_ssl,
             "verify_certs": config.verify_certs,
+            "timeout": max(config.read_timeout, config.agentic_timeout_seconds),
         }
         if config.username and config.password:
@@ -181,11 +244,16 @@ class Knowledge:
         # Create fetcher
         fetcher = None
         if neoreader_url:
-            neoreader_config = NeoreaderConfig(base_url=neoreader_url)
+            neoreader_config = NeoreaderConfig(host=neoreader_url)
             fetcher = NeoreaderContentFetcher(neoreader_config)
-        # Create loader factory
-        loader_factory = LoaderFactory(default_fetcher=fetcher)
+        # Create chunker
+        chunker = SentenceChunker()
+        # Create loader factory (fetcher is optional, defaults will be used if None)
+        loader_factory = None
+        if fetcher:
+            loader_factory = LoaderFactory(fetcher=fetcher, chunker=chunker)
         return cls(
             indexer=indexer,
@@ -200,15 +268,29 @@ class Knowledge:
     def from_env(cls) -> Knowledge:
         """Create Knowledge instance from environment variables.
+        This factory creates a Knowledge instance using configuration from
+        environment variables. The returned instance is tenant-agnostic -
+        multi-tenancy should be handled by using separate indices per account.
         Returns:
             Configured Knowledge instance.
+        Example:
+            ```python
+            # Create from environment
+            knowledge = Knowledge.from_env()
+            # Use tenant-specific index for isolation
+            tenant_index = f"gnosisllm-{account_id}-knowledge"
+            await knowledge.search("query", index_name=tenant_index)
+            ```
         """
         config = OpenSearchConfig.from_env()
         neoreader_config = NeoreaderConfig.from_env()
         return cls.from_opensearch(
             config=config,
-            neoreader_url=neoreader_config.base_url if neoreader_config.base_url else None,
+            neoreader_url=neoreader_config.host if neoreader_config.host else None,
         )
     @property
@@ -302,7 +384,6 @@ class Knowledge:
         source: str,
         *,
         index_name: str | None = None,
-        account_id: str | None = None,
         collection_id: str | None = None,
         source_id: str | None = None,
         source_type: str | None = None,
@@ -313,10 +394,13 @@ class Knowledge:
         Automatically detects source type (sitemap, website, etc.).
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
         Args:
             source: Source URL or path.
-            index_name: Target index (uses default if not provided).
-            account_id: Account ID for multi-tenancy.
+            index_name: Target index (use tenant-specific name for isolation).
             collection_id: Collection ID.
             source_id: Source ID (auto-generated if not provided).
             source_type: Explicit source type (auto-detected if not provided).
@@ -335,9 +419,9 @@ class Knowledge:
         # Auto-detect or use explicit source type
         if source_type:
-            loader = self._loader_factory.create(source_type, self._fetcher)
+            loader = self._loader_factory.create(source_type)
         else:
-            loader = self._loader_factory.create_for_source(source, self._fetcher)
+            loader = self._loader_factory.create_for_source(source)
         # Create service for this load operation
         service = KnowledgeIndexingService(
@@ -350,12 +434,102 @@ class Knowledge:
         return await service.load_and_index(
             source=source,
             index_name=index,
-            account_id=account_id,
             collection_id=collection_id,
             source_id=source_id,
             **options,
         )
+    async def load_streaming(
+        self,
+        source: str,
+        *,
+        index_name: str | None = None,
+        collection_id: str | None = None,
+        collection_name: str | None = None,
+        source_id: str | None = None,
+        url_batch_size: int = 50,
+        fetch_concurrency: int = 10,
+        index_batch_size: int = 100,
+        on_progress: Callable[[int, int], None] | None = None,
+        **options: Any,
+    ) -> IndexResult:
+        """Load and index content using streaming pipeline with bounded memory.
+        This method is optimized for large sitemaps (10,000+ URLs) that would
+        otherwise exhaust memory. It processes URLs in batches, indexing
+        documents immediately rather than loading all content first.
+        Memory usage is bounded and independent of sitemap size:
+        - URL storage: O(url_batch_size)
+        - Document storage: O(index_batch_size)
+        - In-flight fetches: O(fetch_concurrency * avg_page_size)
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            source: Sitemap URL.
+            index_name: Target index (use tenant-specific name for isolation).
+            collection_id: Collection ID.
+            collection_name: Collection name for display.
+            source_id: Source ID (auto-generated if not provided).
+            url_batch_size: URLs to discover per batch (default 50).
+            fetch_concurrency: Parallel URL fetches (default 10).
+            index_batch_size: Documents per index batch (default 100).
+            on_progress: Optional progress callback (urls_processed, docs_indexed).
+            **options: Additional loading options (max_urls, patterns, etc.).
+        Returns:
+            Index result with counts.
+        Example:
+            ```python
+            # Efficiently load 100k+ URL sitemap
+            result = await knowledge.load_streaming(
+                "https://large-site.com/sitemap.xml",
+                index_name="knowledge-account123",  # Tenant-specific
+                url_batch_size=100,
+                fetch_concurrency=20,
+                max_urls=50000,
+            )
+            print(f"Indexed {result.indexed_count} documents")
+            ```
+        """
+        if self._loader_factory is None:
+            raise ValueError("Loader factory not configured")
+        index = index_name or self._default_index
+        if not index:
+            raise ValueError("No index specified and no default index configured")
+        # Create sitemap loader specifically for streaming
+        loader = self._loader_factory.create("sitemap")
+        # Configure pipeline
+        config = PipelineConfig(
+            url_batch_size=url_batch_size,
+            fetch_concurrency=fetch_concurrency,
+            index_batch_size=index_batch_size,
+        )
+        # Create streaming pipeline
+        pipeline = StreamingIndexingPipeline(
+            loader=loader,
+            indexer=self._indexer,
+            config=config,
+            events=self._events,
+        )
+        return await pipeline.execute(
+            source=source,
+            index_name=index,
+            collection_id=collection_id,
+            collection_name=collection_name,
+            source_id=source_id,
+            **options,
+        )
     # === Search Methods ===
     async def search(
@@ -366,7 +540,6 @@ class Knowledge:
         mode: SearchMode = SearchMode.HYBRID,
         limit: int = 10,
         offset: int = 0,
-        account_id: str | None = None,
         collection_ids: list[str] | None = None,
         source_ids: list[str] | None = None,
         min_score: float | None = None,
@@ -374,13 +547,16 @@ class Knowledge:
     ) -> SearchResult:
         """Search for knowledge documents.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
         Args:
             query: Search query text.
-            index_name: Index to search (uses default if not provided).
+            index_name: Index to search (use tenant-specific name for isolation).
             mode: Search mode (semantic, keyword, hybrid).
             limit: Maximum results.
             offset: Result offset for pagination.
-            account_id: Account ID for multi-tenancy.
             collection_ids: Filter by collection IDs.
             source_ids: Filter by source IDs.
             min_score: Minimum score threshold.
@@ -395,7 +571,6 @@ class Knowledge:
             mode=mode,
             limit=limit,
             offset=offset,
-            account_id=account_id,
             collection_ids=collection_ids,
             source_ids=source_ids,
             min_score=min_score,
@@ -473,19 +648,73 @@ class Knowledge:
     # === Management Methods ===
+    async def get_document(
+        self,
+        document_id: str,
+        *,
+        index_name: str | None = None,
+    ) -> dict[str, Any] | None:
+        """Get a single document by ID.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            document_id: Document ID to retrieve.
+            index_name: Index name (use tenant-specific name for isolation).
+                Uses default index if not provided.
+        Returns:
+            Document dict with all fields (excluding embeddings) or None if not found.
+        """
+        index = index_name or self._default_index
+        if not index:
+            raise ValueError("No index specified and no default index configured")
+        return await self._indexer.get(document_id, index)
+    async def delete_document(
+        self,
+        document_id: str,
+        *,
+        index_name: str | None = None,
+    ) -> bool:
+        """Delete a single document by ID.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            document_id: Document ID to delete.
+            index_name: Index name (use tenant-specific name for isolation).
+                Uses default index if not provided.
+        Returns:
+            True if deleted, False if not found.
+        """
+        index = index_name or self._default_index
+        if not index:
+            raise ValueError("No index specified and no default index configured")
+        return await self._indexer.delete(document_id, index)
     async def delete_source(
         self,
         source_id: str,
         *,
         index_name: str | None = None,
-        account_id: str | None = None,
     ) -> int:
         """Delete all documents from a source.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
         Args:
             source_id: Source ID to delete.
-            index_name: Index name.
-            account_id: Account ID for multi-tenancy.
+            index_name: Index name (use tenant-specific name for isolation).
         Returns:
             Count of deleted documents.
@@ -494,21 +723,23 @@ class Knowledge:
         if not index:
             raise ValueError("No index specified")
-        return await self.indexing.delete_source(source_id, index, account_id)
+        return await self.indexing.delete_source(source_id, index)
     async def delete_collection(
         self,
         collection_id: str,
         *,
         index_name: str | None = None,
-        account_id: str | None = None,
     ) -> int:
         """Delete all documents from a collection.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
         Args:
             collection_id: Collection ID to delete.
-            index_name: Index name.
-            account_id: Account ID for multi-tenancy.
+            index_name: Index name (use tenant-specific name for isolation).
         Returns:
             Count of deleted documents.
@@ -517,32 +748,271 @@ class Knowledge:
         if not index:
             raise ValueError("No index specified")
-        return await self.indexing.delete_collection(collection_id, index, account_id)
+        return await self.indexing.delete_collection(collection_id, index)
     async def count(
         self,
         *,
         index_name: str | None = None,
-        account_id: str | None = None,
         collection_id: str | None = None,
+        source_id: str | None = None,
     ) -> int:
         """Count documents.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
         Args:
-            index_name: Index to count.
-            account_id: Filter by account.
+            index_name: Index to count (use tenant-specific name for isolation).
             collection_id: Filter by collection.
+            source_id: Filter by source (for source deletion confirmation).
         Returns:
             Document count.
         """
         return await self.search_service.count(
             index_name=index_name,
-            account_id=account_id,
             collection_id=collection_id,
+            source_id=source_id,
         )
+    # === Collection and Stats Methods ===
+    async def get_collections(
+        self,
+        *,
+        index_name: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """Get all collections with document counts.
+        Aggregates unique collection_ids from indexed documents.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            index_name: Index to query (use tenant-specific name for isolation).
+                Uses default index if not provided.
+        Returns:
+            List of collection dictionaries with id, name, and document_count.
+        """
+        index = index_name or self._default_index
+        return await self.search_service.get_collections(index_name=index)
+    async def get_stats(
+        self,
+        *,
+        index_name: str | None = None,
+    ) -> dict[str, Any]:
+        """Get index statistics.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            index_name: Index to query (use tenant-specific name for isolation).
+                Uses default index if not provided.
+        Returns:
+            Dictionary with document_count, index_name, and other stats.
+        """
+        index = index_name or self._default_index
+        return await self.search_service.get_stats(index_name=index)
+    async def list_documents(
+        self,
+        *,
+        index_name: str | None = None,
+        source_id: str | None = None,
+        collection_id: str | None = None,
+        limit: int = 50,
+        offset: int = 0,
+    ) -> dict[str, Any]:
+        """List documents with optional filters.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            by using separate indices per account.
+        Args:
+            index_name: Index to query (use tenant-specific name for isolation).
+                Uses default index if not provided.
+            source_id: Optional source ID filter.
+            collection_id: Optional collection ID filter.
+            limit: Maximum documents to return (max 100).
+            offset: Number of documents to skip.
+        Returns:
+            Dictionary with documents, total, limit, offset.
+        """
+        index = index_name or self._default_index
+        if not index:
+            raise ValueError("No index specified and no default index configured")
+        # Clamp limit to reasonable bounds
+        limit = min(max(1, limit), 100)
+        offset = max(0, offset)
+        return await self._searcher.list_documents(
+            index_name=index,
+            source_id=source_id,
+            collection_id=collection_id,
+            limit=limit,
+            offset=offset,
+        )
+    # === Agentic Search Status ===
+    @property
+    def is_agentic_configured(self) -> bool:
+        """Check if agentic search is configured.
+        Returns:
+            True if at least one agent type is configured.
+        """
+        if not hasattr(self, '_searcher') or not hasattr(self._searcher, '_config'):
+            return False
+        config = self._searcher._config
+        return bool(config.flow_agent_id or config.conversational_agent_id)
+    async def get_agentic_status(self) -> dict[str, Any]:
+        """Get status of agentic search configuration.
+        Returns:
+            Dictionary with agent availability status:
+            - available: True if any agent is configured
+            - flow_agent: True if flow agent is configured
+            - conversational_agent: True if conversational agent is configured
+        """
+        if not hasattr(self, '_searcher') or not hasattr(self._searcher, '_config'):
+            return {
+                "available": False,
+                "flow_agent": False,
+                "conversational_agent": False,
+            }
+        config = self._searcher._config
+        return {
+            "available": bool(config.flow_agent_id or config.conversational_agent_id),
+            "flow_agent": bool(config.flow_agent_id),
+            "conversational_agent": bool(config.conversational_agent_id),
+        }
+    async def agentic_search(
+        self,
+        query: str,
+        *,
+        agent_type: AgentType = AgentType.FLOW,
+        index_name: str | None = None,
+        collection_ids: list[str] | None = None,
+        source_ids: list[str] | None = None,
+        conversation_id: str | None = None,
+        include_reasoning: bool = True,
+        limit: int = 10,
+        **options: Any,
+    ) -> AgenticSearchResult:
+        """Execute agentic search with AI-powered reasoning.
+        Uses OpenSearch ML agents to understand queries, retrieve relevant
+        documents, and generate natural language answers.
+        Args:
+            query: Search query text.
+            agent_type: Type of agent (FLOW for fast RAG, CONVERSATIONAL for multi-turn).
+            index_name: Index to search (uses default if not provided).
+            collection_ids: Filter by collection IDs.
+            source_ids: Filter by source IDs.
+            conversation_id: Conversation ID for multi-turn (conversational agent).
+            include_reasoning: Include reasoning steps in response.
+            limit: Maximum source documents to retrieve.
+            **options: Additional agent options.
+        Returns:
+            AgenticSearchResult with answer, reasoning steps, and sources.
+        Raises:
+            AgenticSearchError: If agent execution fails.
+            ValueError: If agentic search is not configured.
+        Example:
+            ```python
+            result = await knowledge.agentic_search(
+                "How does authentication work?",
+                agent_type=AgentType.FLOW,
+            )
+            print(result.answer)
+            for source in result.items:
+                print(f"- {source.title}")
+            ```
+        """
+        # Check if agentic search is configured
+        if not self.is_agentic_configured:
+            raise ValueError(
+                "Agentic search is not configured. "
+                "Run 'gnosisllm-knowledge agentic setup' and set agent IDs in environment."
+            )
+        # Get client and config from the searcher
+        if not hasattr(self._searcher, '_client') or not hasattr(self._searcher, '_config'):
+            raise ValueError("Searcher does not have OpenSearch client/config")
+        client = self._searcher._client
+        config = self._searcher._config
+        # Create agentic searcher
+        agentic_searcher = OpenSearchAgenticSearcher(client, config)
+        # Build agentic query
+        agentic_query = AgenticSearchQuery(
+            text=query,
+            agent_type=agent_type,
+            collection_ids=collection_ids,
+            source_ids=source_ids,
+            conversation_id=conversation_id,
+            include_reasoning=include_reasoning,
+            limit=limit,
+        )
+        # Determine index name
+        index = index_name or self._default_index
+        if not index:
+            raise ValueError("No index specified and no default index configured")
+        # Execute agentic search
+        return await agentic_searcher.agentic_search(agentic_query, index, **options)
     async def close(self) -> None:
-        """Close connections and clean up resources."""
-        # Subclasses or future implementations can override this
-        pass
+        """Close connections and clean up resources.
+        Closes the underlying AsyncOpenSearch client to prevent
+        unclosed aiohttp session warnings. Properly handles
+        CancelledError during event loop shutdown.
+        """
+        import asyncio
+        # Close the OpenSearch client via the searcher
+        # Note: indexer, searcher, and setup share the same client instance,
+        # so closing via searcher is sufficient
+        if hasattr(self._searcher, '_client') and self._searcher._client is not None:
+            client = self._searcher._client
+            try:
+                await client.close()
+                logger.debug("Closed OpenSearch client connection")
+            except asyncio.CancelledError:
+                # Event loop is shutting down - this is expected during cleanup
+                logger.debug("OpenSearch client close cancelled (event loop shutting down)")
+            except Exception as e:
+                logger.warning(f"Error closing OpenSearch client: {e}")
+            finally:
+                # Clear client reference on all components that share it
+                # This prevents any accidental reuse after close
+                if hasattr(self._searcher, '_client'):
+                    self._searcher._client = None
+                if hasattr(self._indexer, '_client'):
+                    self._indexer._client = None
+                if self._setup and hasattr(self._setup, '_client'):
+                    self._setup._client = None

gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

gnosisllm-knowledge 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl