PyPI - gnosisllm-knowledge - Versions diffs - 0.2.0__py3-none-any.whl - Mend

gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

gnosisllm_knowledge/__init__.py +152 -0
gnosisllm_knowledge/api/__init__.py +5 -0
gnosisllm_knowledge/api/knowledge.py +548 -0
gnosisllm_knowledge/backends/__init__.py +26 -0
gnosisllm_knowledge/backends/memory/__init__.py +9 -0
gnosisllm_knowledge/backends/memory/indexer.py +384 -0
gnosisllm_knowledge/backends/memory/searcher.py +516 -0
gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
gnosisllm_knowledge/backends/opensearch/config.py +195 -0
gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
gnosisllm_knowledge/chunking/__init__.py +9 -0
gnosisllm_knowledge/chunking/fixed.py +138 -0
gnosisllm_knowledge/chunking/sentence.py +239 -0
gnosisllm_knowledge/cli/__init__.py +18 -0
gnosisllm_knowledge/cli/app.py +509 -0
gnosisllm_knowledge/cli/commands/__init__.py +7 -0
gnosisllm_knowledge/cli/commands/agentic.py +529 -0
gnosisllm_knowledge/cli/commands/load.py +369 -0
gnosisllm_knowledge/cli/commands/search.py +440 -0
gnosisllm_knowledge/cli/commands/setup.py +228 -0
gnosisllm_knowledge/cli/display/__init__.py +5 -0
gnosisllm_knowledge/cli/display/service.py +555 -0
gnosisllm_knowledge/cli/utils/__init__.py +5 -0
gnosisllm_knowledge/cli/utils/config.py +207 -0
gnosisllm_knowledge/core/__init__.py +87 -0
gnosisllm_knowledge/core/domain/__init__.py +43 -0
gnosisllm_knowledge/core/domain/document.py +240 -0
gnosisllm_knowledge/core/domain/result.py +176 -0
gnosisllm_knowledge/core/domain/search.py +327 -0
gnosisllm_knowledge/core/domain/source.py +139 -0
gnosisllm_knowledge/core/events/__init__.py +23 -0
gnosisllm_knowledge/core/events/emitter.py +216 -0
gnosisllm_knowledge/core/events/types.py +226 -0
gnosisllm_knowledge/core/exceptions.py +407 -0
gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
gnosisllm_knowledge/core/interfaces/loader.py +102 -0
gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
gnosisllm_knowledge/core/interfaces/setup.py +164 -0
gnosisllm_knowledge/fetchers/__init__.py +12 -0
gnosisllm_knowledge/fetchers/config.py +77 -0
gnosisllm_knowledge/fetchers/http.py +167 -0
gnosisllm_knowledge/fetchers/neoreader.py +204 -0
gnosisllm_knowledge/loaders/__init__.py +13 -0
gnosisllm_knowledge/loaders/base.py +399 -0
gnosisllm_knowledge/loaders/factory.py +202 -0
gnosisllm_knowledge/loaders/sitemap.py +285 -0
gnosisllm_knowledge/loaders/website.py +57 -0
gnosisllm_knowledge/py.typed +0 -0
gnosisllm_knowledge/services/__init__.py +9 -0
gnosisllm_knowledge/services/indexing.py +387 -0
gnosisllm_knowledge/services/search.py +349 -0
gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0

gnosisllm_knowledge/cli/app.py ADDED Viewed

@@ -0,0 +1,509 @@
+"""GnosisLLM Knowledge CLI Application.
+Main entry point assembling all CLI commands with enterprise-grade UX.
+"""
+from __future__ import annotations
+import asyncio
+from typing import Annotated, Optional
+import typer
+from rich.console import Console
+from gnosisllm_knowledge.cli.display import RichDisplayService
+from gnosisllm_knowledge.cli.utils import CliConfig
+# Main application
+app = typer.Typer(
+    name="gnosisllm-knowledge",
+    help="Enterprise-grade knowledge loading, indexing, and semantic search.",
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+    pretty_exceptions_enable=True,
+    pretty_exceptions_show_locals=False,
+)
+# Shared console and display service
+console = Console()
+display = RichDisplayService(console)
+def version_callback(value: bool) -> None:
+    """Display version and exit."""
+    if value:
+        from gnosisllm_knowledge import __version__
+        console.print(f"gnosisllm-knowledge [cyan]{__version__}[/cyan]")
+        raise typer.Exit()
+@app.callback()
+def main_callback(
+    version: Annotated[
+        Optional[bool],
+        typer.Option(
+            "--version",
+            "-v",
+            help="Show version and exit.",
+            callback=version_callback,
+            is_eager=True,
+        ),
+    ] = None,
+) -> None:
+    """GnosisLLM Knowledge - Enterprise knowledge management CLI."""
+    pass
+# ============================================================================
+# SETUP COMMAND
+# ============================================================================
+@app.command()
+def setup(
+    host: Annotated[
+        str,
+        typer.Option("--host", "-h", help="OpenSearch host."),
+    ] = "localhost",
+    port: Annotated[
+        int,
+        typer.Option("--port", "-p", help="OpenSearch port."),
+    ] = 9200,
+    username: Annotated[
+        Optional[str],
+        typer.Option("--username", "-u", help="OpenSearch username."),
+    ] = None,
+    password: Annotated[
+        Optional[str],
+        typer.Option("--password", help="OpenSearch password."),
+    ] = None,
+    use_ssl: Annotated[
+        bool,
+        typer.Option("--use-ssl", help="Enable SSL."),
+    ] = False,
+    verify_certs: Annotated[
+        bool,
+        typer.Option("--verify-certs", help="Verify SSL certificates."),
+    ] = False,
+    force: Annotated[
+        bool,
+        typer.Option("--force", "-f", help="Clean up existing resources first."),
+    ] = False,
+    no_sample_data: Annotated[
+        bool,
+        typer.Option("--no-sample-data", help="Skip sample data ingestion."),
+    ] = False,
+    no_hybrid: Annotated[
+        bool,
+        typer.Option("--no-hybrid", help="Skip hybrid search pipeline."),
+    ] = False,
+) -> None:
+    """Configure OpenSearch with ML model for neural search.
+    Sets up the complete neural search infrastructure:
+    - OpenAI connector for embeddings
+    - Model group and deployed ML model
+    - Ingest pipeline for automatic embedding generation
+    - Search pipeline for hybrid scoring
+    - Knowledge index with k-NN vector mapping
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge setup
+        $ gnosisllm-knowledge setup --host opensearch.example.com --port 443 --use-ssl
+        $ gnosisllm-knowledge setup --force  # Clean and recreate
+    """
+    from gnosisllm_knowledge.cli.commands.setup import setup_command
+    asyncio.run(
+        setup_command(
+            display=display,
+            host=host,
+            port=port,
+            username=username,
+            password=password,
+            use_ssl=use_ssl,
+            verify_certs=verify_certs,
+            force=force,
+            no_sample_data=no_sample_data,
+            no_hybrid=no_hybrid,
+        )
+    )
+# ============================================================================
+# LOAD COMMAND
+# ============================================================================
+@app.command()
+def load(
+    source: Annotated[
+        str,
+        typer.Argument(help="URL or sitemap to load content from."),
+    ],
+    source_type: Annotated[
+        Optional[str],
+        typer.Option(
+            "--type",
+            "-t",
+            help="Source type: website, sitemap (auto-detects if not specified).",
+        ),
+    ] = None,
+    index: Annotated[
+        str,
+        typer.Option("--index", "-i", help="Target index name."),
+    ] = "knowledge",
+    account_id: Annotated[
+        Optional[str],
+        typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
+    ] = None,
+    collection_id: Annotated[
+        Optional[str],
+        typer.Option("--collection-id", "-c", help="Collection grouping ID."),
+    ] = None,
+    source_id: Annotated[
+        Optional[str],
+        typer.Option("--source-id", "-s", help="Source identifier (defaults to URL)."),
+    ] = None,
+    batch_size: Annotated[
+        int,
+        typer.Option("--batch-size", "-b", help="Documents per indexing batch."),
+    ] = 100,
+    max_urls: Annotated[
+        int,
+        typer.Option("--max-urls", "-m", help="Maximum URLs to process from sitemap."),
+    ] = 1000,
+    force: Annotated[
+        bool,
+        typer.Option("--force", "-f", help="Delete existing source documents first."),
+    ] = False,
+    dry_run: Annotated[
+        bool,
+        typer.Option("--dry-run", help="Preview without indexing."),
+    ] = False,
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-V", help="Show per-document progress."),
+    ] = False,
+) -> None:
+    """Load and index content from URLs or sitemaps.
+    Fetches content, chunks it for optimal embedding, and indexes
+    into OpenSearch with automatic embedding generation.
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge load https://docs.example.com/intro
+        $ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
+        $ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
+    """
+    from gnosisllm_knowledge.cli.commands.load import load_command
+    asyncio.run(
+        load_command(
+            display=display,
+            source=source,
+            source_type=source_type,
+            index_name=index,
+            account_id=account_id,
+            collection_id=collection_id,
+            source_id=source_id,
+            batch_size=batch_size,
+            max_urls=max_urls,
+            force=force,
+            dry_run=dry_run,
+            verbose=verbose,
+        )
+    )
+# ============================================================================
+# SEARCH COMMAND
+# ============================================================================
+@app.command()
+def search(
+    query: Annotated[
+        Optional[str],
+        typer.Argument(help="Search query text."),
+    ] = None,
+    mode: Annotated[
+        str,
+        typer.Option(
+            "--mode",
+            "-m",
+            help="Search mode: semantic, keyword, hybrid (default), agentic.",
+        ),
+    ] = "hybrid",
+    index: Annotated[
+        str,
+        typer.Option("--index", "-i", help="Index to search."),
+    ] = "knowledge",
+    limit: Annotated[
+        int,
+        typer.Option("--limit", "-l", help="Maximum results to return."),
+    ] = 5,
+    offset: Annotated[
+        int,
+        typer.Option("--offset", "-o", help="Pagination offset."),
+    ] = 0,
+    account_id: Annotated[
+        Optional[str],
+        typer.Option("--account-id", "-a", help="Filter by account ID."),
+    ] = None,
+    collection_ids: Annotated[
+        Optional[str],
+        typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
+    ] = None,
+    source_ids: Annotated[
+        Optional[str],
+        typer.Option("--source-ids", "-s", help="Filter by source IDs (comma-separated)."),
+    ] = None,
+    min_score: Annotated[
+        float,
+        typer.Option("--min-score", help="Minimum score threshold (0.0-1.0)."),
+    ] = 0.0,
+    explain: Annotated[
+        bool,
+        typer.Option("--explain", "-e", help="Show score explanation."),
+    ] = False,
+    json_output: Annotated[
+        bool,
+        typer.Option("--json", "-j", help="Output as JSON for scripting."),
+    ] = False,
+    interactive: Annotated[
+        bool,
+        typer.Option("--interactive", "-I", help="Interactive search session."),
+    ] = False,
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-V", help="Show full content (not truncated)."),
+    ] = False,
+) -> None:
+    """Search indexed content with semantic, keyword, or hybrid modes.
+    Supports multiple search strategies:
+    - [cyan]semantic[/cyan]: Meaning-based vector search using embeddings
+    - [cyan]keyword[/cyan]: Traditional BM25 text matching
+    - [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
+    - [cyan]agentic[/cyan]: AI-powered search with reasoning
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge search "how to configure auth"
+        $ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
+        $ gnosisllm-knowledge search --interactive
+    """
+    from gnosisllm_knowledge.cli.commands.search import search_command
+    asyncio.run(
+        search_command(
+            display=display,
+            query=query,
+            mode=mode,
+            index_name=index,
+            limit=limit,
+            offset=offset,
+            account_id=account_id,
+            collection_ids=collection_ids,
+            source_ids=source_ids,
+            min_score=min_score,
+            explain=explain,
+            json_output=json_output,
+            interactive=interactive,
+            verbose=verbose,
+        )
+    )
+# ============================================================================
+# INFO COMMAND
+# ============================================================================
+@app.command()
+def info() -> None:
+    """Display configuration and environment information.
+    Shows current settings from environment variables and
+    validates connectivity to required services.
+    """
+    config = CliConfig.from_env()
+    display.header("GnosisLLM Knowledge", "Configuration and Environment Info")
+    display.table(
+        "OpenSearch Configuration",
+        [
+            ("Host", f"{config.opensearch_host}:{config.opensearch_port}"),
+            ("SSL", "Enabled" if config.opensearch_use_ssl else "Disabled"),
+            ("Auth", "Configured" if config.opensearch_username else "None"),
+            ("Model ID", config.opensearch_model_id or "[dim]Not set[/dim]"),
+            ("Index", config.opensearch_index_name),
+        ],
+    )
+    display.newline()
+    display.table(
+        "Embedding Configuration",
+        [
+            ("OpenAI Key", "✓ Set" if config.openai_api_key else "✗ Not set"),
+            ("Model", config.openai_embedding_model),
+            ("Dimension", str(config.openai_embedding_dimension)),
+        ],
+    )
+    display.newline()
+    display.table(
+        "Agentic Search Configuration",
+        [
+            ("Flow Agent", config.opensearch_flow_agent_id or "[dim]Not set[/dim]"),
+            ("Conversational Agent", config.opensearch_conversational_agent_id or "[dim]Not set[/dim]"),
+            ("LLM Model", config.agentic_llm_model),
+        ],
+    )
+    display.newline()
+    display.table(
+        "Content Fetching",
+        [
+            ("Neoreader", config.neoreader_host),
+        ],
+    )
+    # Validation
+    setup_errors = config.validate_for_setup()
+    search_errors = config.validate_for_search()
+    if setup_errors or search_errors:
+        display.newline()
+        display.warning("Configuration Issues:")
+        for error in setup_errors + search_errors:
+            display.error(f"  {error}")
+# ============================================================================
+# AGENTIC SUBCOMMAND GROUP
+# ============================================================================
+agentic_app = typer.Typer(
+    name="agentic",
+    help="AI-powered agentic search commands.",
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+)
+app.add_typer(agentic_app, name="agentic")
+@agentic_app.command("setup")
+def agentic_setup(
+    agent_type: Annotated[
+        str,
+        typer.Option(
+            "--type",
+            "-t",
+            help="Agent type to setup: flow, conversational, or all (default).",
+        ),
+    ] = "all",
+    force: Annotated[
+        bool,
+        typer.Option("--force", "-f", help="Force recreate existing agents."),
+    ] = False,
+) -> None:
+    """Setup agentic search agents in OpenSearch.
+    Creates and deploys AI agents for intelligent search:
+    - [cyan]flow[/cyan]: Fast RAG for single-turn queries
+    - [cyan]conversational[/cyan]: Multi-turn with memory support
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge agentic setup
+        $ gnosisllm-knowledge agentic setup --type flow
+        $ gnosisllm-knowledge agentic setup --force
+    """
+    from gnosisllm_knowledge.cli.commands.agentic import agentic_setup_command
+    asyncio.run(
+        agentic_setup_command(
+            display=display,
+            agent_type=agent_type,
+            force=force,
+        )
+    )
+@agentic_app.command("chat")
+def agentic_chat(
+    index: Annotated[
+        str,
+        typer.Option("--index", "-i", help="Index to search."),
+    ] = "knowledge",
+    agent_type: Annotated[
+        str,
+        typer.Option(
+            "--type",
+            "-t",
+            help="Agent type: flow or conversational (default).",
+        ),
+    ] = "conversational",
+    account_id: Annotated[
+        Optional[str],
+        typer.Option("--account-id", "-a", help="Filter by account ID."),
+    ] = None,
+    collection_ids: Annotated[
+        Optional[str],
+        typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
+    ] = None,
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-V", help="Show reasoning steps."),
+    ] = False,
+) -> None:
+    """Interactive agentic chat session.
+    Start a conversation with the AI-powered knowledge assistant.
+    The agent remembers context for multi-turn dialogue.
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge agentic chat
+        $ gnosisllm-knowledge agentic chat --type flow
+        $ gnosisllm-knowledge agentic chat --verbose
+    """
+    from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
+    asyncio.run(
+        agentic_chat_command(
+            display=display,
+            index_name=index,
+            agent_type=agent_type,
+            account_id=account_id,
+            collection_ids=collection_ids,
+            verbose=verbose,
+        )
+    )
+@agentic_app.command("status")
+def agentic_status() -> None:
+    """Show agentic search configuration status.
+    Displays configured agents and their health status.
+    [bold]Example:[/bold]
+        $ gnosisllm-knowledge agentic status
+    """
+    from gnosisllm_knowledge.cli.commands.agentic import agentic_status_command
+    asyncio.run(agentic_status_command(display=display))
+def main() -> None:
+    """CLI entry point."""
+    app()
+if __name__ == "__main__":
+    main()

gnosisllm_knowledge/cli/commands/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""CLI commands for gnosisllm-knowledge."""
+from gnosisllm_knowledge.cli.commands.load import load_command
+from gnosisllm_knowledge.cli.commands.search import search_command
+from gnosisllm_knowledge.cli.commands.setup import setup_command
+__all__ = ["setup_command", "load_command", "search_command"]