PyPI - gnosisllm-knowledge - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

gnosisllm-knowledge 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

gnosisllm_knowledge/api/knowledge.py +225 -35
gnosisllm_knowledge/backends/memory/indexer.py +27 -2
gnosisllm_knowledge/backends/memory/searcher.py +111 -10
gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
gnosisllm_knowledge/backends/opensearch/searcher.py +9 -6
gnosisllm_knowledge/cli/app.py +58 -19
gnosisllm_knowledge/cli/commands/agentic.py +15 -9
gnosisllm_knowledge/cli/commands/load.py +169 -19
gnosisllm_knowledge/cli/commands/memory.py +10 -0
gnosisllm_knowledge/cli/commands/search.py +9 -10
gnosisllm_knowledge/cli/commands/setup.py +25 -1
gnosisllm_knowledge/cli/utils/config.py +4 -4
gnosisllm_knowledge/core/domain/__init__.py +13 -0
gnosisllm_knowledge/core/domain/discovery.py +166 -0
gnosisllm_knowledge/core/domain/document.py +14 -19
gnosisllm_knowledge/core/domain/search.py +10 -25
gnosisllm_knowledge/core/domain/source.py +11 -12
gnosisllm_knowledge/core/events/__init__.py +8 -0
gnosisllm_knowledge/core/events/types.py +122 -5
gnosisllm_knowledge/core/exceptions.py +93 -0
gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
gnosisllm_knowledge/fetchers/__init__.py +8 -0
gnosisllm_knowledge/fetchers/config.py +27 -0
gnosisllm_knowledge/fetchers/neoreader.py +31 -3
gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
gnosisllm_knowledge/loaders/__init__.py +5 -1
gnosisllm_knowledge/loaders/discovery.py +338 -0
gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
gnosisllm_knowledge/loaders/factory.py +46 -0
gnosisllm_knowledge/services/indexing.py +35 -20
gnosisllm_knowledge/services/search.py +37 -20
gnosisllm_knowledge/services/streaming_pipeline.py +39 -7
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +30 -10
gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
gnosisllm_knowledge-0.3.0.dist-info/RECORD +0 -77
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0

gnosisllm_knowledge/loaders/factory.py CHANGED Viewed

@@ -9,7 +9,11 @@ from typing import Any
 from gnosisllm_knowledge.core.events.emitter import EventEmitter
 from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
 from gnosisllm_knowledge.core.interfaces.fetcher import IContentFetcher
+from gnosisllm_knowledge.fetchers.config import NeoreaderConfig
+from gnosisllm_knowledge.fetchers.neoreader import NeoreaderContentFetcher
+from gnosisllm_knowledge.fetchers.neoreader_discovery import NeoreaderDiscoveryClient
 from gnosisllm_knowledge.loaders.base import BaseLoader
+from gnosisllm_knowledge.loaders.discovery import DiscoveryLoader
 from gnosisllm_knowledge.loaders.sitemap import SitemapLoader
 from gnosisllm_knowledge.loaders.website import WebsiteLoader
@@ -20,6 +24,43 @@ LoaderCreator = Callable[
 ]
+def _create_discovery_loader(
+    fetcher: IContentFetcher,
+    chunker: ITextChunker,
+    config: dict[str, Any] | None,
+    event_emitter: EventEmitter | None,
+) -> DiscoveryLoader:
+    """Factory function for creating DiscoveryLoader instances.
+    Creates a DiscoveryLoader with a NeoreaderDiscoveryClient. If the fetcher
+    is a NeoreaderContentFetcher, reuses its config to ensure consistency.
+    Otherwise, creates config from environment variables.
+    Args:
+        fetcher: Content fetcher for retrieving URL content.
+        chunker: Text chunker for splitting content.
+        config: Optional configuration dictionary.
+        event_emitter: Optional event emitter for progress events.
+    Returns:
+        Configured DiscoveryLoader instance.
+    """
+    # Get config from fetcher if it's NeoreaderContentFetcher, otherwise use env
+    if isinstance(fetcher, NeoreaderContentFetcher):
+        neoreader_config = fetcher.config
+    else:
+        neoreader_config = NeoreaderConfig.from_env()
+    discovery_client = NeoreaderDiscoveryClient(neoreader_config)
+    return DiscoveryLoader(
+        fetcher=fetcher,
+        chunker=chunker,
+        discovery_client=discovery_client,
+        config=config,
+        event_emitter=event_emitter,
+    )
 class LoaderFactory:
     """Factory for creating content loaders (Registry Pattern).
@@ -29,6 +70,7 @@ class LoaderFactory:
     Built-in loaders:
     - website: Single URL loading
     - sitemap: Sitemap XML with recursive discovery
+    - discovery: Website crawling via Neo Reader Discovery API
     Example:
         ```python
@@ -40,6 +82,9 @@ class LoaderFactory:
         # Explicit type
         loader = factory.create("sitemap", config={"max_urls": 500})
+        # Discovery loader for full website crawling
+        loader = factory.create("discovery", config={"max_depth": 3, "max_pages": 100})
         # Register custom loader
         factory.register("custom", MyCustomLoader)
         ```
@@ -76,6 +121,7 @@ class LoaderFactory:
         """Register built-in loader types."""
         self.register("website", lambda f, c, cfg, e: WebsiteLoader(f, c, cfg, e))
         self.register("sitemap", lambda f, c, cfg, e: SitemapLoader(f, c, cfg, e))
+        self.register("discovery", _create_discovery_loader)
     def register(self, name: str, creator: LoaderCreator) -> None:
         """Register a loader type.

gnosisllm_knowledge/services/indexing.py CHANGED Viewed

@@ -1,4 +1,13 @@
-"""Knowledge indexing service."""
+"""Knowledge indexing service.
+This service orchestrates the document ingestion pipeline from source to index,
+including loading, chunking, and indexing.
+Note:
+    This service is tenant-agnostic. Multi-tenancy should be handled at the
+    API layer by using separate indices per account (e.g.,
+    `knowledge-{account_id}`) rather than filtering by account_id.
+"""
 from __future__ import annotations
@@ -82,7 +91,6 @@ class KnowledgeIndexingService:
         source: str,
         index_name: str,
         *,
-        account_id: str | None = None,
         collection_id: str | None = None,
         source_id: str | None = None,
         batch_size: int = 100,
@@ -93,10 +101,13 @@ class KnowledgeIndexingService:
         Uses streaming to process and index documents as they're fetched,
         avoiding memory issues with large sitemaps.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             source: Source URL or path.
-            index_name: Target index name.
-            account_id: Account ID for multi-tenancy.
+            index_name: Target index name (use tenant-specific name for isolation).
             collection_id: Collection ID.
             source_id: Source ID (auto-generated if not provided).
             batch_size: Documents per batch for indexing.
@@ -127,14 +138,13 @@ class KnowledgeIndexingService:
             # Stream documents and index in batches as they arrive
             # Note: Loader already chunks content, so we don't re-chunk here
             async for doc in self._loader.load_streaming(source, **options):
-                # Enrich document with tenant info
+                # Enrich document with collection info
                 enriched_doc = Document(
                     content=doc.content,
                     source=source,
                     doc_id=doc.doc_id,
                     url=doc.url,
                     title=doc.title,
-                    account_id=account_id,
                     collection_id=collection_id,
                     source_id=source_id,
                     chunk_index=doc.chunk_index,
@@ -230,8 +240,8 @@ class KnowledgeIndexingService:
                         doc_id=f"{doc.doc_id}-chunk-{i}",
                         url=doc.url,
                         title=doc.title,
-                        account_id=doc.account_id,
                         collection_id=doc.collection_id,
+                        collection_name=doc.collection_name,
                         source_id=doc.source_id,
                         chunk_index=i,
                         total_chunks=len(chunks),
@@ -271,14 +281,16 @@ class KnowledgeIndexingService:
         self,
         source_id: str,
         index_name: str,
-        account_id: str | None = None,
     ) -> int:
         """Delete all documents from a source.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             source_id: Source ID to delete.
-            index_name: Index name.
-            account_id: Optional account filter.
+            index_name: Index name (use tenant-specific name for isolation).
         Returns:
             Count of deleted documents.
@@ -287,21 +299,23 @@ class KnowledgeIndexingService:
             build_delete_by_source_query,
         )
-        query = build_delete_by_source_query(source_id, account_id)
+        query = build_delete_by_source_query(source_id)
         return await self._indexer.delete_by_query(query, index_name)
     async def delete_collection(
         self,
         collection_id: str,
         index_name: str,
-        account_id: str | None = None,
     ) -> int:
         """Delete all documents from a collection.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             collection_id: Collection ID to delete.
-            index_name: Index name.
-            account_id: Optional account filter.
+            index_name: Index name (use tenant-specific name for isolation).
         Returns:
             Count of deleted documents.
@@ -310,7 +324,7 @@ class KnowledgeIndexingService:
             build_delete_by_collection_query,
         )
-        query = build_delete_by_collection_query(collection_id, account_id)
+        query = build_delete_by_collection_query(collection_id)
         return await self._indexer.delete_by_query(query, index_name)
     async def reindex_source(
@@ -319,17 +333,19 @@ class KnowledgeIndexingService:
         source_id: str,
         index_name: str,
         *,
-        account_id: str | None = None,
         collection_id: str | None = None,
         **options: Any,
     ) -> IndexResult:
         """Reindex a source by deleting and re-loading.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             source: Source URL or path.
             source_id: Existing source ID.
-            index_name: Index name.
-            account_id: Account ID.
+            index_name: Index name (use tenant-specific name for isolation).
             collection_id: Collection ID.
             **options: Additional options.
@@ -337,13 +353,12 @@ class KnowledgeIndexingService:
             Index result.
         """
         # Delete existing documents
-        await self.delete_source(source_id, index_name, account_id)
+        await self.delete_source(source_id, index_name)
         # Re-index
         return await self.load_and_index(
             source=source,
             index_name=index_name,
-            account_id=account_id,
             collection_id=collection_id,
             source_id=source_id,
             **options,

gnosisllm_knowledge/services/search.py CHANGED Viewed

@@ -1,4 +1,12 @@
-"""Knowledge search service."""
+"""Knowledge search service.
+This service provides a high-level interface for searching knowledge documents
+using semantic, keyword, and hybrid search modes.
+Note:
+    This service is tenant-agnostic. Multi-tenancy should be handled at the
+    API layer by using separate indices per account (e.g., knowledge-{account_id}).
+"""
 from __future__ import annotations
@@ -70,7 +78,6 @@ class KnowledgeSearchService:
         mode: SearchMode = SearchMode.HYBRID,
         limit: int = 10,
         offset: int = 0,
-        account_id: str | None = None,
         collection_ids: list[str] | None = None,
         source_ids: list[str] | None = None,
         min_score: float | None = None,
@@ -78,13 +85,16 @@ class KnowledgeSearchService:
     ) -> SearchResult:
         """Search for knowledge documents.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             query: Search query text.
             index_name: Index to search (uses default if not provided).
             mode: Search mode (semantic, keyword, hybrid).
             limit: Maximum results.
             offset: Result offset for pagination.
-            account_id: Account ID for multi-tenancy.
             collection_ids: Filter by collection IDs.
             source_ids: Filter by source IDs.
             min_score: Minimum score threshold.
@@ -105,7 +115,6 @@ class KnowledgeSearchService:
             mode=mode,
             limit=limit,
             offset=offset,
-            account_id=account_id,
             collection_ids=collection_ids,
             source_ids=source_ids,
             min_score=min_score,
@@ -133,17 +142,19 @@ class KnowledgeSearchService:
         *,
         index_name: str | None = None,
         limit: int = 10,
-        account_id: str | None = None,
         collection_ids: list[str] | None = None,
         **options: Any,
     ) -> SearchResult:
         """Execute semantic (vector) search.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             query: Search query text.
             index_name: Index to search.
             limit: Maximum results.
-            account_id: Account ID for multi-tenancy.
             collection_ids: Filter by collection IDs.
             **options: Additional options.
@@ -155,7 +166,6 @@ class KnowledgeSearchService:
             index_name=index_name,
             mode=SearchMode.SEMANTIC,
             limit=limit,
-            account_id=account_id,
             collection_ids=collection_ids,
             **options,
         )
@@ -166,17 +176,19 @@ class KnowledgeSearchService:
         *,
         index_name: str | None = None,
         limit: int = 10,
-        account_id: str | None = None,
         collection_ids: list[str] | None = None,
         **options: Any,
     ) -> SearchResult:
         """Execute keyword (BM25) search.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             query: Search query text.
             index_name: Index to search.
             limit: Maximum results.
-            account_id: Account ID for multi-tenancy.
             collection_ids: Filter by collection IDs.
             **options: Additional options.
@@ -188,7 +200,6 @@ class KnowledgeSearchService:
             index_name=index_name,
             mode=SearchMode.KEYWORD,
             limit=limit,
-            account_id=account_id,
             collection_ids=collection_ids,
             **options,
         )
@@ -199,7 +210,6 @@ class KnowledgeSearchService:
         *,
         index_name: str | None = None,
         limit: int = 10,
-        account_id: str | None = None,
         collection_ids: list[str] | None = None,
         semantic_weight: float = 0.7,
         keyword_weight: float = 0.3,
@@ -207,11 +217,14 @@ class KnowledgeSearchService:
     ) -> SearchResult:
         """Execute hybrid search (semantic + keyword).
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             query: Search query text.
             index_name: Index to search.
             limit: Maximum results.
-            account_id: Account ID for multi-tenancy.
             collection_ids: Filter by collection IDs.
             semantic_weight: Weight for semantic score.
             keyword_weight: Weight for keyword score.
@@ -225,7 +238,6 @@ class KnowledgeSearchService:
             index_name=index_name,
             mode=SearchMode.HYBRID,
             limit=limit,
-            account_id=account_id,
             collection_ids=collection_ids,
             semantic_weight=semantic_weight,
             keyword_weight=keyword_weight,
@@ -264,17 +276,19 @@ class KnowledgeSearchService:
         index_name: str | None = None,
         mode: SearchMode = SearchMode.HYBRID,
         limit: int = 10,
-        account_id: str | None = None,
         **options: Any,
     ) -> list[SearchResult]:
         """Execute multiple searches in parallel.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             queries: List of query texts.
             index_name: Index to search.
             mode: Search mode.
             limit: Maximum results per query.
-            account_id: Account ID for multi-tenancy.
             **options: Additional options.
         Returns:
@@ -289,7 +303,6 @@ class KnowledgeSearchService:
                 text=query,
                 mode=mode,
                 limit=limit,
-                account_id=account_id,
             )
             for query in queries
         ]
@@ -310,15 +323,19 @@ class KnowledgeSearchService:
     async def count(
         self,
         index_name: str | None = None,
-        account_id: str | None = None,
         collection_id: str | None = None,
+        source_id: str | None = None,
     ) -> int:
         """Count documents in index.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account.
         Args:
             index_name: Index to count.
-            account_id: Filter by account.
             collection_id: Filter by collection.
+            source_id: Filter by source (for source deletion confirmation).
         Returns:
             Document count.
@@ -327,12 +344,12 @@ class KnowledgeSearchService:
         if not index:
             raise SearchError(message="No index specified")
-        # Build count query
+        # Build count query with optional filters
         query = SearchQuery(
             text="",
             limit=0,
-            account_id=account_id,
             collection_ids=[collection_id] if collection_id else None,
+            source_ids=[source_id] if source_id else None,
         )
         # Use a simple match_all to get total count

gnosisllm_knowledge/services/streaming_pipeline.py CHANGED Viewed

@@ -2,12 +2,19 @@
 This module provides the StreamingIndexingPipeline that orchestrates
 the load -> index pipeline with guaranteed bounded memory usage.
+Note:
+    This module is tenant-agnostic. Multi-tenancy should be handled at the
+    API layer by using separate indices per account (e.g.,
+    gnosisllm-{account_id}-knowledge) rather than filtering by account_id.
+    The account_id parameters are deprecated and will be ignored.
 """
 from __future__ import annotations
 import logging
 import time
+import warnings
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
@@ -141,10 +148,16 @@ class StreamingIndexingPipeline:
     ) -> IndexResult:
         """Execute the streaming pipeline.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account. The
+            account_id parameter is deprecated and will be ignored.
         Args:
             source: Sitemap URL.
             index_name: Target OpenSearch index.
-            account_id: For multi-tenancy filtering.
+            account_id: Deprecated. This parameter is ignored.
+                Use index isolation (separate index per account) instead.
             collection_id: Collection within account.
             collection_name: Collection name for display.
             source_id: Source identifier.
@@ -153,6 +166,13 @@ class StreamingIndexingPipeline:
         Returns:
             Aggregated index result.
         """
+        if account_id is not None:
+            warnings.warn(
+                "account_id parameter is deprecated and will be ignored. "
+                "Use index isolation (separate index per account) instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         start_time = time.time()
         self._progress = StreamingProgress(current_phase="starting")
         await self._emit_progress()
@@ -167,7 +187,6 @@ class StreamingIndexingPipeline:
                 self._enrich_document(
                     doc,
                     source=source,
-                    account_id=account_id,
                     collection_id=collection_id,
                     collection_name=collection_name,
                     source_id=source_id,
@@ -248,31 +267,44 @@ class StreamingIndexingPipeline:
         self,
         doc: Document,
         source: str,
-        account_id: str | None,
         collection_id: str | None,
         collection_name: str | None,
         source_id: str | None,
+        account_id: str | None = None,
     ) -> Document:
-        """Add tenant and source info to document.
+        """Add source info to document.
+        Note:
+            This method is tenant-agnostic. Multi-tenancy should be handled
+            at the API layer by using separate indices per account. The
+            account_id parameter is deprecated and will be ignored.
         Args:
             doc: Original document.
             source: Source URL.
-            account_id: Account identifier.
             collection_id: Collection identifier.
             collection_name: Collection name for display.
             source_id: Source identifier.
+            account_id: Deprecated. This parameter is ignored.
+                Use index isolation (separate index per account) instead.
         Returns:
-            New Document with tenant info.
+            New Document with source info.
         """
+        if account_id is not None:
+            warnings.warn(
+                "account_id parameter is deprecated and will be ignored. "
+                "Use index isolation (separate index per account) instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         return Document(
             content=doc.content,
             source=source,
             doc_id=doc.doc_id,
             url=doc.url,
             title=doc.title,
-            account_id=account_id,
             collection_id=collection_id,
             collection_name=collection_name,
             source_id=source_id,

{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gnosisllm-knowledge
-Version: 0.3.0
+Version: 0.4.0
 Summary: Enterprise-grade knowledge loading, indexing, and search for Python
 License: MIT
 Keywords: knowledge-base,rag,semantic-search,vector-search,opensearch,llm,embeddings,enterprise
@@ -46,7 +46,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
 - **Multiple Loaders**: Load content from websites, sitemaps, and files
 - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
 - **OpenSearch Backend**: Production-ready with k-NN vector search
-- **Multi-Tenancy**: Built-in support for account and collection isolation
+- **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
 - **Event-Driven**: Observer pattern for progress tracking and monitoring
 - **SOLID Architecture**: Clean, maintainable, and extensible codebase
@@ -144,14 +144,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
 Options:
   --type         Source type: website, sitemap (auto-detects)
-  --index        Target index name (default: knowledge)
-  --account-id   Multi-tenant account ID
+  --index        Target index name (e.g., knowledge-tenant-123)
   --collection-id Collection grouping ID
   --batch-size   Documents per batch (default: 100)
   --max-urls     Max URLs from sitemap (default: 1000)
   --dry-run      Preview without indexing
 ```
+Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
 ### Search
 Search indexed content with multiple modes:
@@ -161,14 +162,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
 Options:
   --mode         Search mode: semantic, keyword, hybrid, agentic
-  --index        Index to search (default: knowledge)
+  --index        Index to search (e.g., knowledge-tenant-123)
   --limit        Max results (default: 5)
-  --account-id   Filter by account
   --collection-ids Filter by collections (comma-separated)
   --json         Output as JSON for scripting
   --interactive  Interactive search session
 ```
+Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
 ## Architecture
 ```
@@ -319,22 +321,40 @@ agent_body = {
 ## Multi-Tenancy
+This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
 ```python
-# Load with tenant isolation
+# The calling application (e.g., API) constructs tenant-specific index names
+index_name = f"knowledge-{account_id}"
+# Create Knowledge instance for the tenant
+knowledge = Knowledge.from_opensearch(
+    host="localhost",
+    port=9200,
+    index_prefix=index_name,  # knowledge-tenant-123
+)
+# Load content to tenant's isolated index
 await knowledge.load(
     source="https://docs.example.com/sitemap.xml",
-    account_id="tenant-123",
     collection_id="docs",
 )
-# Search within tenant
+# Search within tenant's index (no account_id filter needed)
 results = await knowledge.search(
     "query",
-    account_id="tenant-123",
     collection_ids=["docs"],
 )
 ```
+**Note**: For audit purposes, you can store `account_id` in document metadata:
+```python
+await knowledge.load(
+    source="https://docs.example.com/sitemap.xml",
+    document_defaults={"metadata": {"account_id": "tenant-123"}},
+)
+```
 ## Agentic Memory
 Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.

gnosisllm-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

gnosisllm-knowledge 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl