PyPI - gnosisllm-knowledge - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

gnosisllm-knowledge 0.3.0py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

gnosisllm_knowledge/api/knowledge.py +233 -35
gnosisllm_knowledge/backends/memory/indexer.py +27 -2
gnosisllm_knowledge/backends/memory/searcher.py +132 -10
gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
gnosisllm_knowledge/backends/opensearch/config.py +7 -0
gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
gnosisllm_knowledge/backends/opensearch/searcher.py +64 -6
gnosisllm_knowledge/backends/opensearch/setup.py +29 -33
gnosisllm_knowledge/cli/app.py +58 -19
gnosisllm_knowledge/cli/commands/agentic.py +15 -9
gnosisllm_knowledge/cli/commands/load.py +169 -19
gnosisllm_knowledge/cli/commands/memory.py +10 -0
gnosisllm_knowledge/cli/commands/search.py +9 -10
gnosisllm_knowledge/cli/commands/setup.py +25 -1
gnosisllm_knowledge/cli/utils/config.py +4 -4
gnosisllm_knowledge/core/domain/__init__.py +13 -0
gnosisllm_knowledge/core/domain/discovery.py +166 -0
gnosisllm_knowledge/core/domain/document.py +14 -19
gnosisllm_knowledge/core/domain/search.py +10 -25
gnosisllm_knowledge/core/domain/source.py +11 -12
gnosisllm_knowledge/core/events/__init__.py +8 -0
gnosisllm_knowledge/core/events/types.py +122 -5
gnosisllm_knowledge/core/exceptions.py +93 -0
gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
gnosisllm_knowledge/core/interfaces/searcher.py +30 -1
gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
gnosisllm_knowledge/fetchers/__init__.py +8 -0
gnosisllm_knowledge/fetchers/config.py +27 -0
gnosisllm_knowledge/fetchers/neoreader.py +31 -3
gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
gnosisllm_knowledge/loaders/__init__.py +5 -1
gnosisllm_knowledge/loaders/discovery.py +338 -0
gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
gnosisllm_knowledge/loaders/factory.py +46 -0
gnosisllm_knowledge/services/indexing.py +51 -21
gnosisllm_knowledge/services/search.py +42 -28
gnosisllm_knowledge/services/streaming_pipeline.py +45 -7
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/METADATA +30 -10
gnosisllm_knowledge-0.4.3.dist-info/RECORD +81 -0
gnosisllm_knowledge-0.3.0.dist-info/RECORD +0 -77
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/WHEEL +0 -0
{gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/entry_points.txt +0 -0

gnosisllm_knowledge/backends/opensearch/setup.py CHANGED Viewed

@@ -249,10 +249,9 @@ class OpenSearchSetupAdapter:
             self._model_id = self._config.model_id
         # Step 4: Create ingest pipeline
-        # Only create ingest pipeline for global setup (not per-account)
-        # Account indices should use the global pipeline to ensure consistent model
-        is_global_setup = self._config.index_prefix == "gnosisllm"
-        if self._model_id and is_global_setup:
+        # Create pipeline for any setup that has a model deployed
+        # Each index_prefix namespace gets its own pipeline
+        if self._model_id:
             try:
                 await self._create_ingest_pipeline()
                 pipeline_name = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
@@ -261,35 +260,33 @@ class OpenSearchSetupAdapter:
                 errors.append(f"Failed to create ingest pipeline: {e}")
                 logger.error(f"Failed to create ingest pipeline: {e}")
-        # Step 5: Create search pipeline (only for global setup)
-        if is_global_setup:
-            try:
-                await self._create_search_pipeline()
-                pipeline_name = self._config.search_pipeline_name or f"{self._config.index_prefix}-search-pipeline"
-                steps_completed.append(f"Created search pipeline: {pipeline_name}")
-            except Exception as e:
-                errors.append(f"Failed to create search pipeline: {e}")
-                logger.error(f"Failed to create search pipeline: {e}")
+        # Step 5: Create search pipeline for hybrid search
+        try:
+            await self._create_search_pipeline()
+            pipeline_name = self._config.search_pipeline_name or f"{self._config.index_prefix}-search-pipeline"
+            steps_completed.append(f"Created search pipeline: {pipeline_name}")
+        except Exception as e:
+            errors.append(f"Failed to create search pipeline: {e}")
+            logger.error(f"Failed to create search pipeline: {e}")
-        # Step 6: Create index template (only for global setup)
-        # Template covers all gnosisllm-* indices including per-account indices
-        if is_global_setup:
-            try:
-                template_name = f"{self._config.index_prefix}-template"
-                template_body = get_index_template(self._config)
+        # Step 6: Create index template for this namespace
+        # Template covers all {index_prefix}-* indices
+        try:
+            template_name = f"{self._config.index_prefix}-template"
+            template_body = get_index_template(self._config)
-                # Ensure template has global pipeline for auto-index creation
-                global_pipeline = self._config.ingest_pipeline_name or "gnosisllm-ingest-pipeline"
-                template_body["template"]["settings"]["index"]["default_pipeline"] = global_pipeline
+            # Set default pipeline for auto-index creation within this namespace
+            default_pipeline = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
+            template_body["template"]["settings"]["index"]["default_pipeline"] = default_pipeline
-                await self._client.indices.put_index_template(
-                    name=template_name,
-                    body=template_body,
-                )
-                steps_completed.append(f"Created index template: {template_name}")
-            except Exception as e:
-                errors.append(f"Failed to create index template: {e}")
-                logger.error(f"Failed to create index template: {e}")
+            await self._client.indices.put_index_template(
+                name=template_name,
+                body=template_body,
+            )
+            steps_completed.append(f"Created index template: {template_name}")
+        except Exception as e:
+            errors.append(f"Failed to create index template: {e}")
+            logger.error(f"Failed to create index template: {e}")
         # Step 7: Create knowledge index
         try:
@@ -298,9 +295,8 @@ class OpenSearchSetupAdapter:
             if not exists:
                 settings = get_knowledge_index_settings(self._config)
-                # Add default pipeline - always use global pipeline for consistency
-                # This ensures all accounts use the same embedding model
-                pipeline_name = self._config.ingest_pipeline_name or "gnosisllm-ingest-pipeline"
+                # Add default pipeline for this namespace
+                pipeline_name = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
                 settings["index"]["default_pipeline"] = pipeline_name
                 await self._client.indices.create(

gnosisllm_knowledge/cli/app.py CHANGED Viewed

@@ -1,6 +1,11 @@
 """GnosisLLM Knowledge CLI Application.
 Main entry point assembling all CLI commands with enterprise-grade UX.
+Note:
+    This library is tenant-agnostic. Multi-tenancy is achieved through index
+    isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
+    Use --index to target tenant-specific indices.
 """
 from __future__ import annotations
@@ -147,17 +152,13 @@ def load(
         typer.Option(
             "--type",
             "-t",
-            help="Source type: website, sitemap (auto-detects if not specified).",
+            help="Source type: website, sitemap, discovery (auto-detects if not specified).",
         ),
     ] = None,
     index: Annotated[
         str,
-        typer.Option("--index", "-i", help="Target index name."),
+        typer.Option("--index", "-i", help="Target index name (use tenant-specific name for multi-tenancy)."),
     ] = "knowledge",
-    account_id: Annotated[
-        Optional[str],
-        typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
-    ] = None,
     collection_id: Annotated[
         Optional[str],
         typer.Option("--collection-id", "-c", help="Collection grouping ID."),
@@ -186,16 +187,50 @@ def load(
         bool,
         typer.Option("--verbose", "-V", help="Show per-document progress."),
     ] = False,
+    discovery: Annotated[
+        bool,
+        typer.Option(
+            "--discovery",
+            "-D",
+            help="Use discovery loader to crawl and discover all URLs from the website.",
+        ),
+    ] = False,
+    max_depth: Annotated[
+        int,
+        typer.Option("--max-depth", help="Maximum crawl depth for discovery (default: 3)."),
+    ] = 3,
+    max_pages: Annotated[
+        int,
+        typer.Option("--max-pages", help="Maximum pages to discover (default: 100)."),
+    ] = 100,
+    same_domain: Annotated[
+        bool,
+        typer.Option(
+            "--same-domain/--any-domain",
+            help="Only crawl URLs on the same domain (default: same domain only).",
+        ),
+    ] = True,
 ) -> None:
     """Load and index content from URLs or sitemaps.
     Fetches content, chunks it for optimal embedding, and indexes
     into OpenSearch with automatic embedding generation.
+    [bold]Multi-tenancy:[/bold]
+    Use --index with tenant-specific index names for isolation
+    (e.g., --index knowledge-{account_id}). Each tenant's data
+    is stored in a separate index for complete isolation.
+    [bold]Discovery Mode:[/bold]
+    Use --discovery to crawl and discover all URLs from a website
+    before loading. This is useful for sites without a sitemap.
     [bold]Example:[/bold]
         $ gnosisllm-knowledge load https://docs.example.com/intro
         $ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
         $ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
+        $ gnosisllm-knowledge load https://docs.example.com --discovery --max-depth 5
+        $ gnosisllm-knowledge load https://docs.example.com --index knowledge-tenant-123
     """
     from gnosisllm_knowledge.cli.commands.load import load_command
@@ -205,7 +240,6 @@ def load(
             source=source,
             source_type=source_type,
             index_name=index,
-            account_id=account_id,
             collection_id=collection_id,
             source_id=source_id,
             batch_size=batch_size,
@@ -213,6 +247,10 @@ def load(
             force=force,
             dry_run=dry_run,
             verbose=verbose,
+            discovery=discovery,
+            max_depth=max_depth,
+            max_pages=max_pages,
+            same_domain=same_domain,
         )
     )
@@ -238,7 +276,7 @@ def search(
     ] = "hybrid",
     index: Annotated[
         str,
-        typer.Option("--index", "-i", help="Index to search."),
+        typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
     ] = "knowledge",
     limit: Annotated[
         int,
@@ -248,10 +286,6 @@ def search(
         int,
         typer.Option("--offset", "-o", help="Pagination offset."),
     ] = 0,
-    account_id: Annotated[
-        Optional[str],
-        typer.Option("--account-id", "-a", help="Filter by account ID."),
-    ] = None,
     collection_ids: Annotated[
         Optional[str],
         typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
@@ -289,10 +323,16 @@ def search(
     - [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
     - [cyan]agentic[/cyan]: AI-powered search with reasoning
+    [bold]Multi-tenancy:[/bold]
+    Use --index with tenant-specific index names for isolation
+    (e.g., --index knowledge-{account_id}). Each tenant's data
+    is stored in a separate index for complete isolation.
     [bold]Example:[/bold]
         $ gnosisllm-knowledge search "how to configure auth"
         $ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
         $ gnosisllm-knowledge search --interactive
+        $ gnosisllm-knowledge search "query" --index knowledge-tenant-123
     """
     from gnosisllm_knowledge.cli.commands.search import search_command
@@ -304,7 +344,6 @@ def search(
             index_name=index,
             limit=limit,
             offset=offset,
-            account_id=account_id,
             collection_ids=collection_ids,
             source_ids=source_ids,
             min_score=min_score,
@@ -451,7 +490,7 @@ def agentic_setup(
 def agentic_chat(
     index: Annotated[
         str,
-        typer.Option("--index", "-i", help="Index to search."),
+        typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
     ] = "knowledge",
     agent_type: Annotated[
         str,
@@ -461,10 +500,6 @@ def agentic_chat(
             help="Agent type: flow or conversational (default).",
         ),
     ] = "conversational",
-    account_id: Annotated[
-        Optional[str],
-        typer.Option("--account-id", "-a", help="Filter by account ID."),
-    ] = None,
     collection_ids: Annotated[
         Optional[str],
         typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
@@ -479,10 +514,15 @@ def agentic_chat(
     Start a conversation with the AI-powered knowledge assistant.
     The agent remembers context for multi-turn dialogue.
+    [bold]Multi-tenancy:[/bold]
+    Use --index with tenant-specific index names for isolation
+    (e.g., --index knowledge-{account_id}).
     [bold]Example:[/bold]
         $ gnosisllm-knowledge agentic chat
         $ gnosisllm-knowledge agentic chat --type flow
         $ gnosisllm-knowledge agentic chat --verbose
+        $ gnosisllm-knowledge agentic chat --index knowledge-tenant-123
     """
     from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
@@ -491,7 +531,6 @@ def agentic_chat(
             display=display,
             index_name=index,
             agent_type=agent_type,
-            account_id=account_id,
             collection_ids=collection_ids,
             verbose=verbose,
         )

gnosisllm_knowledge/cli/commands/agentic.py CHANGED Viewed

@@ -4,6 +4,10 @@ Commands:
 - setup: Configure agents in OpenSearch
 - chat: Interactive agentic chat session
 - status: Show agent configuration status
+Note:
+    This library is tenant-agnostic. Multi-tenancy is achieved through index
+    isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
 """
 from __future__ import annotations
@@ -202,17 +206,19 @@ async def agentic_chat_command(
     display: RichDisplayService,
     index_name: str = "knowledge",
     agent_type: str = "conversational",
-    account_id: str | None = None,
     collection_ids: str | None = None,
     verbose: bool = False,
 ) -> None:
     """Interactive agentic chat session.
+    Note:
+        Multi-tenancy is achieved through index isolation. Use tenant-specific
+        index names instead (e.g., --index knowledge-tenant-123).
     Args:
         display: Display service for output.
-        index_name: Index to search.
+        index_name: Index to search (use tenant-specific name for isolation).
         agent_type: Agent type ('flow' or 'conversational').
-        account_id: Filter by account ID.
         collection_ids: Filter by collection IDs (comma-separated).
         verbose: Show reasoning steps.
     """
@@ -242,7 +248,6 @@ async def agentic_chat_command(
         if agent_type == "conversational":
             return await searcher.create_conversation(
                 name="CLI Chat Session",
-                account_id=account_id,
             )
         return None
@@ -291,7 +296,6 @@ async def agentic_chat_command(
                     agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
                     conversation_id=conversation_id,
                     collection_ids=collection_list,
-                    account_id=account_id,
                     include_reasoning=verbose,
                 )
@@ -395,7 +399,6 @@ async def agentic_search_command(
     query: str,
     index_name: str = "knowledge",
     agent_type: str = "flow",
-    account_id: str | None = None,
     collection_ids: str | None = None,
     source_ids: str | None = None,
     limit: int = 5,
@@ -404,12 +407,15 @@ async def agentic_search_command(
 ) -> dict[str, Any] | None:
     """Execute agentic search.
+    Note:
+        Multi-tenancy is achieved through index isolation. Use tenant-specific
+        index names instead (e.g., --index knowledge-tenant-123).
     Args:
         display: Display service for output.
         query: Search query text.
-        index_name: Index to search.
+        index_name: Index to search (use tenant-specific name for isolation).
         agent_type: Agent type ('flow' or 'conversational').
-        account_id: Filter by account ID.
         collection_ids: Filter by collection IDs (comma-separated).
         source_ids: Filter by source IDs (comma-separated).
         limit: Maximum source documents to retrieve.
@@ -447,12 +453,12 @@ async def agentic_search_command(
             )
         # Build query
+        # Note: account_id is deprecated and ignored - use index isolation instead
         agentic_query = AgenticSearchQuery(
             text=query,
             agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
             collection_ids=collection_list,
             source_ids=source_list,
-            account_id=account_id,
             limit=limit,
             include_reasoning=verbose,
         )

gnosisllm-knowledge 0.3.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

gnosisllm-knowledge 0.3.0py3-none-any.whl → 0.4.3py3-none-any.whl