PyPI - gnosisllm-knowledge - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

gnosisllm-knowledge 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

gnosisllm_knowledge/__init__.py +91 -39
gnosisllm_knowledge/api/__init__.py +3 -2
gnosisllm_knowledge/api/knowledge.py +287 -7
gnosisllm_knowledge/api/memory.py +966 -0
gnosisllm_knowledge/backends/__init__.py +14 -5
gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
gnosisllm_knowledge/backends/opensearch/config.py +49 -28
gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
gnosisllm_knowledge/cli/app.py +378 -12
gnosisllm_knowledge/cli/commands/agentic.py +11 -0
gnosisllm_knowledge/cli/commands/memory.py +723 -0
gnosisllm_knowledge/cli/commands/setup.py +24 -22
gnosisllm_knowledge/cli/display/service.py +43 -0
gnosisllm_knowledge/cli/utils/config.py +58 -0
gnosisllm_knowledge/core/domain/__init__.py +41 -0
gnosisllm_knowledge/core/domain/document.py +5 -0
gnosisllm_knowledge/core/domain/memory.py +440 -0
gnosisllm_knowledge/core/domain/result.py +11 -3
gnosisllm_knowledge/core/domain/search.py +2 -0
gnosisllm_knowledge/core/events/types.py +76 -0
gnosisllm_knowledge/core/exceptions.py +134 -0
gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
gnosisllm_knowledge/core/interfaces/memory.py +524 -0
gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
gnosisllm_knowledge/core/streaming/__init__.py +36 -0
gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
gnosisllm_knowledge/loaders/base.py +3 -4
gnosisllm_knowledge/loaders/sitemap.py +129 -1
gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
gnosisllm_knowledge/services/indexing.py +67 -75
gnosisllm_knowledge/services/search.py +47 -11
gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
{gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0

gnosisllm_knowledge/core/domain/memory.py ADDED Viewed

@@ -0,0 +1,440 @@
+"""Memory domain models for Agentic Memory."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Literal
+class MemoryStrategy(str, Enum):
+    """Memory extraction strategies.
+    Attributes:
+        SEMANTIC: General facts and knowledge extraction.
+        USER_PREFERENCE: User preferences and choices.
+        SUMMARY: Conversation summaries.
+    """
+    SEMANTIC = "SEMANTIC"
+    USER_PREFERENCE = "USER_PREFERENCE"
+    SUMMARY = "SUMMARY"
+class MemoryType(str, Enum):
+    """Memory storage types.
+    Attributes:
+        WORKING: Raw conversation messages (short-term).
+        LONG_TERM: Extracted facts with embeddings.
+        SESSIONS: Session metadata.
+        HISTORY: Audit trail of operations.
+    """
+    WORKING = "working"
+    LONG_TERM = "long-term"
+    SESSIONS = "sessions"
+    HISTORY = "history"
+class PayloadType(str, Enum):
+    """Memory payload types.
+    Attributes:
+        CONVERSATIONAL: Conversation messages.
+        DATA: Structured data (agent state, traces).
+    """
+    CONVERSATIONAL = "conversational"
+    DATA = "data"
+class EmbeddingModelType(str, Enum):
+    """Embedding model types supported by OpenSearch.
+    Attributes:
+        TEXT_EMBEDDING: Dense vector embeddings (default).
+        SPARSE_ENCODING: Sparse vector encoding.
+    """
+    TEXT_EMBEDDING = "TEXT_EMBEDDING"
+    SPARSE_ENCODING = "SPARSE_ENCODING"
+class HistoryAction(str, Enum):
+    """History audit trail action types.
+    Attributes:
+        ADD: Memory was added.
+        UPDATE: Memory was updated.
+        DELETE: Memory was deleted.
+    """
+    ADD = "ADD"
+    UPDATE = "UPDATE"
+    DELETE = "DELETE"
+@dataclass
+class StrategyConfig:
+    """Configuration for a memory extraction strategy.
+    Each strategy MUST be scoped to namespace fields.
+    When storing memory, only strategies whose namespace fields are
+    present in the request will run.
+    Attributes:
+        type: Strategy type (SEMANTIC, USER_PREFERENCE, SUMMARY).
+        namespace: Fields used to scope this strategy (REQUIRED).
+        llm_result_path: JSONPath to extract LLM response.
+        system_prompt: Optional custom system prompt.
+        llm_id: Optional strategy-specific LLM override.
+    """
+    type: MemoryStrategy
+    namespace: list[str]  # REQUIRED - no default
+    llm_result_path: str | None = None
+    system_prompt: str | None = None
+    llm_id: str | None = None
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to OpenSearch API format."""
+        config: dict[str, Any] = {}
+        if self.llm_result_path:
+            config["llm_result_path"] = self.llm_result_path
+        if self.system_prompt:
+            config["system_prompt"] = self.system_prompt
+        if self.llm_id:
+            config["llm_id"] = self.llm_id
+        return {
+            "type": self.type.value,
+            "namespace": self.namespace,
+            "configuration": config,
+        }
+@dataclass
+class IndexSettings:
+    """Index-level settings for memory container indexes.
+    Attributes:
+        number_of_shards: Number of shards for the index.
+        number_of_replicas: Number of replicas for the index.
+    """
+    number_of_shards: int = 1
+    number_of_replicas: int = 1
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to OpenSearch index settings format."""
+        return {
+            "index": {
+                "number_of_shards": str(self.number_of_shards),
+                "number_of_replicas": str(self.number_of_replicas),
+            }
+        }
+@dataclass
+class ContainerIndexSettings:
+    """Settings for all memory container indexes.
+    Attributes:
+        session_index: Settings for session index.
+        short_term_memory_index: Settings for working memory index.
+        long_term_memory_index: Settings for long-term memory index.
+        long_term_memory_history_index: Settings for history index.
+    """
+    session_index: IndexSettings | None = None
+    short_term_memory_index: IndexSettings | None = None
+    long_term_memory_index: IndexSettings | None = None
+    long_term_memory_history_index: IndexSettings | None = None
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to OpenSearch API format."""
+        result: dict[str, Any] = {}
+        if self.session_index:
+            result["session_index"] = self.session_index.to_dict()
+        if self.short_term_memory_index:
+            result["short_term_memory_index"] = self.short_term_memory_index.to_dict()
+        if self.long_term_memory_index:
+            result["long_term_memory_index"] = self.long_term_memory_index.to_dict()
+        if self.long_term_memory_history_index:
+            result["long_term_memory_history_index"] = (
+                self.long_term_memory_history_index.to_dict()
+            )
+        return result
+@dataclass
+class ContainerConfig:
+    """Memory container configuration.
+    Attributes:
+        name: Container name.
+        description: Optional description.
+        strategies: List of extraction strategies.
+        embedding_model_id: OpenSearch embedding model ID.
+        embedding_model_type: Type of embedding model (TEXT_EMBEDDING or SPARSE_ENCODING).
+        llm_model_id: OpenSearch LLM model ID for inference.
+        llm_result_path: JSONPath to extract LLM response.
+        embedding_dimension: Embedding vector dimension.
+        index_prefix: Custom index prefix (optional).
+        use_system_index: Whether to use system indexes (default: True).
+        index_settings: Optional index-level settings (shards, replicas).
+    """
+    name: str
+    description: str | None = None
+    strategies: list[StrategyConfig] = field(default_factory=list)
+    embedding_model_id: str | None = None
+    embedding_model_type: EmbeddingModelType = EmbeddingModelType.TEXT_EMBEDDING
+    llm_model_id: str | None = None
+    llm_result_path: str = "$.choices[0].message.content"
+    embedding_dimension: int = 1536
+    index_prefix: str | None = None
+    use_system_index: bool = True
+    index_settings: ContainerIndexSettings | None = None
+@dataclass
+class ContainerInfo:
+    """Memory container information.
+    Attributes:
+        id: Container ID.
+        name: Container name.
+        description: Container description.
+        strategies: Configured strategies.
+        embedding_model_id: Embedding model ID.
+        llm_model_id: LLM model ID.
+        created_at: Creation timestamp.
+        updated_at: Last update timestamp.
+    """
+    id: str
+    name: str
+    description: str | None = None
+    strategies: list[MemoryStrategy] = field(default_factory=list)
+    embedding_model_id: str | None = None
+    llm_model_id: str | None = None
+    created_at: datetime | None = None
+    updated_at: datetime | None = None
+@dataclass
+class Message:
+    """A conversation message.
+    Attributes:
+        role: Message role (user, assistant, system).
+        content: Message content.
+        timestamp: Optional timestamp.
+    """
+    role: Literal["user", "assistant", "system"]
+    content: str
+    timestamp: datetime | None = None
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to OpenSearch API format."""
+        return {
+            "role": self.role,
+            "content": [{"text": self.content, "type": "text"}],
+        }
+@dataclass
+class Namespace:
+    """Memory namespace for partitioning and strategy scoping.
+    Completely configurable key-value pairs for memory isolation.
+    Common fields: user_id, session_id, agent_id, org_id.
+    When creating a container, strategies are scoped to namespace fields.
+    When adding memory with `infer=True`, OpenSearch automatically runs
+    strategies based on which namespace fields are present.
+    Attributes:
+        values: Namespace key-value pairs.
+    """
+    values: dict[str, str] = field(default_factory=dict)
+    def __getitem__(self, key: str) -> str | None:
+        """Get namespace value by key."""
+        return self.values.get(key)
+    def __setitem__(self, key: str, value: str) -> None:
+        """Set namespace value by key."""
+        self.values[key] = value
+    def to_dict(self) -> dict[str, str]:
+        """Get namespace as dictionary for API calls."""
+        return dict(self.values)
+@dataclass
+class StoreRequest:
+    """Request to store memory.
+    Attributes:
+        messages: Conversation messages (for conversational payload).
+        structured_data: Structured data (for data payload).
+        namespace: Namespace for partitioning and strategy scoping.
+        payload_type: Type of payload.
+        infer: Whether to apply LLM inference for fact extraction.
+        metadata: Optional custom metadata.
+        tags: Optional custom tags.
+    """
+    messages: list[Message] | None = None
+    structured_data: dict[str, Any] | None = None
+    namespace: Namespace = field(default_factory=Namespace)
+    payload_type: PayloadType = PayloadType.CONVERSATIONAL
+    infer: bool = True
+    metadata: dict[str, Any] = field(default_factory=dict)
+    tags: dict[str, str] = field(default_factory=dict)
+@dataclass
+class StoreResult:
+    """Result of a store operation.
+    Attributes:
+        session_id: Session ID (for conversational).
+        working_memory_id: Working memory document ID.
+        long_term_count: Number of facts extracted (if infer=True).
+        extraction_time_ms: Time taken for extraction.
+    """
+    session_id: str | None = None
+    working_memory_id: str | None = None
+    long_term_count: int = 0
+    extraction_time_ms: int | None = None
+@dataclass
+class MemoryEntry:
+    """A memory entry from long-term storage.
+    Attributes:
+        id: Memory document ID.
+        content: The memory content (extracted fact).
+        strategy: Which strategy extracted this.
+        score: Similarity score (for search results).
+        namespace: Namespace values.
+        created_at: Creation timestamp.
+        metadata: Custom metadata.
+    """
+    id: str
+    content: str
+    strategy: MemoryStrategy | None = None
+    score: float = 0.0
+    namespace: dict[str, str] = field(default_factory=dict)
+    created_at: datetime | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class RecallResult:
+    """Result of a recall (search) operation.
+    Attributes:
+        items: List of memory entries.
+        total: Total number of matches.
+        query: The search query.
+        took_ms: Time taken in milliseconds.
+    """
+    items: list[MemoryEntry]
+    total: int
+    query: str
+    took_ms: int = 0
+@dataclass
+class SessionInfo:
+    """Session information.
+    Attributes:
+        id: Session ID.
+        container_id: Parent container ID.
+        summary: Session summary text.
+        namespace: Session namespace.
+        started_at: Session start time.
+        ended_at: Session end time (if ended).
+        message_count: Number of messages in session.
+        messages: Session messages (if requested).
+        metadata: Custom session metadata.
+    """
+    id: str
+    container_id: str
+    summary: str | None = None
+    namespace: dict[str, str] = field(default_factory=dict)
+    started_at: datetime | None = None
+    ended_at: datetime | None = None
+    message_count: int = 0
+    messages: list[Message] | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class HistoryEntry:
+    """Audit trail entry for memory operations. READ-ONLY.
+    History is READ-ONLY and cannot be updated or deleted.
+    Attributes:
+        id: History entry ID.
+        memory_id: ID of the affected memory.
+        container_id: Parent container ID.
+        action: Operation type (ADD, UPDATE, DELETE).
+        owner_id: User who performed the action.
+        before: State before change (for UPDATE/DELETE).
+        after: State after change.
+        namespace: Namespace at time of operation.
+        tags: Tags at time of operation.
+        created_at: Operation timestamp.
+    """
+    id: str
+    memory_id: str
+    container_id: str
+    action: HistoryAction
+    owner_id: str | None = None
+    before: dict[str, Any] | None = None
+    after: dict[str, Any] | None = None
+    namespace: dict[str, str] = field(default_factory=dict)
+    tags: dict[str, str] = field(default_factory=dict)
+    created_at: datetime | None = None
+@dataclass
+class MemoryStats:
+    """Memory usage statistics.
+    Attributes:
+        container_id: Container ID.
+        container_name: Container name.
+        working_memory_count: Messages in working memory.
+        long_term_memory_count: Facts in long-term memory.
+        session_count: Number of sessions.
+        strategies_breakdown: Count per strategy.
+        storage_size_bytes: Estimated storage size.
+        last_updated: Last update timestamp.
+    """
+    container_id: str
+    container_name: str
+    working_memory_count: int = 0
+    long_term_memory_count: int = 0
+    session_count: int = 0
+    strategies_breakdown: dict[MemoryStrategy, int] = field(default_factory=dict)
+    storage_size_bytes: int = 0
+    last_updated: datetime | None = None

gnosisllm_knowledge/core/domain/result.py CHANGED Viewed

@@ -3,7 +3,10 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
-from typing import Any
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from gnosisllm_knowledge.core.domain.document import Document
 @dataclass
@@ -13,7 +16,7 @@ class LoadResult:
     Attributes:
         source: The source that was loaded (URL, file path, etc.).
         source_type: Type of source (website, sitemap, file, etc.).
-        document_count: Number of documents loaded.
+        documents: List of loaded documents.
         success: Whether the operation succeeded.
         error_message: Error message if operation failed.
         duration_ms: Duration of the operation in milliseconds.
@@ -25,8 +28,8 @@ class LoadResult:
     source: str
     source_type: str
-    document_count: int
     success: bool
+    documents: list[Document] = field(default_factory=list)
     error_message: str | None = None
     duration_ms: float = 0.0
     metadata: dict[str, Any] = field(default_factory=dict)
@@ -34,6 +37,11 @@ class LoadResult:
     urls_failed: int = 0
     bytes_loaded: int = 0
+    @property
+    def document_count(self) -> int:
+        """Return the number of loaded documents."""
+        return len(self.documents)
     @property
     def success_rate(self) -> float:
         """Calculate the success rate for multi-URL loads."""

gnosisllm_knowledge/core/domain/search.py CHANGED Viewed

@@ -276,6 +276,7 @@ class AgenticSearchResult:
         total_tokens: Total tokens consumed.
         prompt_tokens: Tokens used in prompts.
         completion_tokens: Tokens used in completions.
+        generated_query: The DSL query generated by QueryPlanningTool (if applicable).
     """
     query: str
@@ -292,6 +293,7 @@ class AgenticSearchResult:
     total_tokens: int = 0
     prompt_tokens: int = 0
     completion_tokens: int = 0
+    generated_query: str | None = None  # DSL generated by QueryPlanningTool
     @property
     def has_answer(self) -> bool:

gnosisllm_knowledge/core/events/types.py CHANGED Viewed

@@ -14,6 +14,7 @@ class EventType(str, Enum):
     Events are organized by category:
     - Loading events: Document and content loading
     - Indexing events: Document indexing operations
+    - Streaming events: Streaming pipeline progress
     - Search events: Search and retrieval operations
     - Agentic events: AI-powered operations
     - Setup events: Backend setup operations
@@ -33,6 +34,11 @@ class EventType(str, Enum):
     LOAD_FAILED = "load_failed"
     SITEMAP_DISCOVERED = "sitemap_discovered"
+    # Streaming events
+    STREAMING_PROGRESS = "streaming_progress"
+    URL_BATCH_PROCESSED = "url_batch_processed"
+    STREAMING_COMPLETED = "streaming_completed"
     # Indexing events
     INDEX_STARTED = "index_started"
     DOCUMENT_INDEXED = "document_indexed"
@@ -224,3 +230,73 @@ class BatchCompletedEvent(Event):
             "failure_count": self.failure_count,
             "duration_ms": self.duration_ms,
         }
+@dataclass
+class StreamingProgressEvent(Event):
+    """Progress event for streaming operations.
+    Emitted periodically during streaming pipeline execution to
+    provide visibility into progress.
+    """
+    urls_discovered: int = 0
+    urls_processed: int = 0
+    documents_indexed: int = 0
+    documents_failed: int = 0
+    phase: str = "unknown"
+    memory_mb: float | None = None
+    def __post_init__(self) -> None:
+        """Set event type."""
+        self.event_type = EventType.STREAMING_PROGRESS
+        self.data = {
+            "urls_discovered": self.urls_discovered,
+            "urls_processed": self.urls_processed,
+            "documents_indexed": self.documents_indexed,
+            "documents_failed": self.documents_failed,
+            "phase": self.phase,
+            "memory_mb": self.memory_mb,
+        }
+@dataclass
+class UrlBatchProcessedEvent(Event):
+    """Event emitted when a batch of URLs is processed."""
+    batch_index: int = 0
+    urls_in_batch: int = 0
+    documents_created: int = 0
+    total_urls_processed: int = 0
+    def __post_init__(self) -> None:
+        """Set event type."""
+        self.event_type = EventType.URL_BATCH_PROCESSED
+        self.data = {
+            "batch_index": self.batch_index,
+            "urls_in_batch": self.urls_in_batch,
+            "documents_created": self.documents_created,
+            "total_urls_processed": self.total_urls_processed,
+        }
+@dataclass
+class StreamingCompletedEvent(Event):
+    """Event emitted when streaming pipeline completes."""
+    total_urls: int = 0
+    total_documents: int = 0
+    indexed_count: int = 0
+    failed_count: int = 0
+    duration_ms: float = 0.0
+    def __post_init__(self) -> None:
+        """Set event type."""
+        self.event_type = EventType.STREAMING_COMPLETED
+        self.data = {
+            "total_urls": self.total_urls,
+            "total_documents": self.total_documents,
+            "indexed_count": self.indexed_count,
+            "failed_count": self.failed_count,
+            "duration_ms": self.duration_ms,
+        }

gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

gnosisllm-knowledge 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl