gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +91 -39
- gnosisllm_knowledge/api/__init__.py +3 -2
- gnosisllm_knowledge/api/knowledge.py +502 -32
- gnosisllm_knowledge/api/memory.py +966 -0
- gnosisllm_knowledge/backends/__init__.py +14 -5
- gnosisllm_knowledge/backends/memory/indexer.py +27 -2
- gnosisllm_knowledge/backends/memory/searcher.py +111 -10
- gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
- gnosisllm_knowledge/backends/opensearch/config.py +49 -28
- gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
- gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
- gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
- gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
- gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
- gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
- gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
- gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
- gnosisllm_knowledge/cli/app.py +436 -31
- gnosisllm_knowledge/cli/commands/agentic.py +26 -9
- gnosisllm_knowledge/cli/commands/load.py +169 -19
- gnosisllm_knowledge/cli/commands/memory.py +733 -0
- gnosisllm_knowledge/cli/commands/search.py +9 -10
- gnosisllm_knowledge/cli/commands/setup.py +49 -23
- gnosisllm_knowledge/cli/display/service.py +43 -0
- gnosisllm_knowledge/cli/utils/config.py +62 -4
- gnosisllm_knowledge/core/domain/__init__.py +54 -0
- gnosisllm_knowledge/core/domain/discovery.py +166 -0
- gnosisllm_knowledge/core/domain/document.py +19 -19
- gnosisllm_knowledge/core/domain/memory.py +440 -0
- gnosisllm_knowledge/core/domain/result.py +11 -3
- gnosisllm_knowledge/core/domain/search.py +12 -25
- gnosisllm_knowledge/core/domain/source.py +11 -12
- gnosisllm_knowledge/core/events/__init__.py +8 -0
- gnosisllm_knowledge/core/events/types.py +198 -5
- gnosisllm_knowledge/core/exceptions.py +227 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
- gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
- gnosisllm_knowledge/core/interfaces/memory.py +524 -0
- gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
- gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
- gnosisllm_knowledge/core/streaming/__init__.py +36 -0
- gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
- gnosisllm_knowledge/fetchers/__init__.py +8 -0
- gnosisllm_knowledge/fetchers/config.py +27 -0
- gnosisllm_knowledge/fetchers/neoreader.py +31 -3
- gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
- gnosisllm_knowledge/loaders/__init__.py +5 -1
- gnosisllm_knowledge/loaders/base.py +3 -4
- gnosisllm_knowledge/loaders/discovery.py +338 -0
- gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
- gnosisllm_knowledge/loaders/factory.py +46 -0
- gnosisllm_knowledge/loaders/sitemap.py +129 -1
- gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
- gnosisllm_knowledge/services/indexing.py +100 -93
- gnosisllm_knowledge/services/search.py +84 -31
- gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
- gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -26,17 +26,22 @@ class Document:
|
|
|
26
26
|
This is the core domain object that flows through the knowledge pipeline.
|
|
27
27
|
Documents are created by loaders, processed by chunkers, and stored by indexers.
|
|
28
28
|
|
|
29
|
+
Note:
|
|
30
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
31
|
+
isolation (e.g., `knowledge-{account_id}`). Tenant information like account_id
|
|
32
|
+
should be passed in the metadata dictionary if needed for audit purposes.
|
|
33
|
+
|
|
29
34
|
Attributes:
|
|
30
35
|
content: The main text content of the document.
|
|
31
36
|
source: Source identifier (URL, file path, etc.).
|
|
32
37
|
doc_id: Unique identifier. Auto-generated from content hash if not provided.
|
|
33
38
|
title: Optional document title.
|
|
34
39
|
url: URL where the document was fetched from.
|
|
35
|
-
metadata: Arbitrary metadata dictionary.
|
|
40
|
+
metadata: Arbitrary metadata dictionary (can include tenant info for audit).
|
|
36
41
|
|
|
37
|
-
|
|
38
|
-
account_id: Account/tenant identifier.
|
|
42
|
+
Collection fields:
|
|
39
43
|
collection_id: Collection the document belongs to.
|
|
44
|
+
collection_name: Collection name for display in aggregations.
|
|
40
45
|
source_id: Source identifier within the collection.
|
|
41
46
|
|
|
42
47
|
Chunking info:
|
|
@@ -70,9 +75,9 @@ class Document:
|
|
|
70
75
|
url: str | None = None
|
|
71
76
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
72
77
|
|
|
73
|
-
#
|
|
74
|
-
account_id: str | None = None
|
|
78
|
+
# Collection fields
|
|
75
79
|
collection_id: str | None = None
|
|
80
|
+
collection_name: str | None = None # For display in aggregations
|
|
76
81
|
source_id: str | None = None
|
|
77
82
|
|
|
78
83
|
# Chunking info
|
|
@@ -138,8 +143,8 @@ class Document:
|
|
|
138
143
|
title=self.title,
|
|
139
144
|
url=self.url,
|
|
140
145
|
metadata=self.metadata.copy(),
|
|
141
|
-
account_id=self.account_id,
|
|
142
146
|
collection_id=self.collection_id,
|
|
147
|
+
collection_name=self.collection_name,
|
|
143
148
|
source_id=self.source_id,
|
|
144
149
|
chunk_index=chunk_index,
|
|
145
150
|
total_chunks=total_chunks,
|
|
@@ -152,21 +157,21 @@ class Document:
|
|
|
152
157
|
created_at=self.created_at,
|
|
153
158
|
)
|
|
154
159
|
|
|
155
|
-
def
|
|
160
|
+
def with_collection(
|
|
156
161
|
self,
|
|
157
|
-
|
|
158
|
-
|
|
162
|
+
collection_id: str,
|
|
163
|
+
collection_name: str | None = None,
|
|
159
164
|
source_id: str | None = None,
|
|
160
165
|
) -> Document:
|
|
161
|
-
"""Create a new document with
|
|
166
|
+
"""Create a new document with collection information.
|
|
162
167
|
|
|
163
168
|
Args:
|
|
164
|
-
account_id: Account/tenant identifier.
|
|
165
169
|
collection_id: Collection identifier.
|
|
170
|
+
collection_name: Collection name for display.
|
|
166
171
|
source_id: Source identifier.
|
|
167
172
|
|
|
168
173
|
Returns:
|
|
169
|
-
New Document instance with
|
|
174
|
+
New Document instance with collection information set.
|
|
170
175
|
"""
|
|
171
176
|
return Document(
|
|
172
177
|
content=self.content,
|
|
@@ -175,8 +180,8 @@ class Document:
|
|
|
175
180
|
title=self.title,
|
|
176
181
|
url=self.url,
|
|
177
182
|
metadata=self.metadata.copy(),
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
collection_id=collection_id,
|
|
184
|
+
collection_name=collection_name or self.collection_name,
|
|
180
185
|
source_id=source_id or self.source_id,
|
|
181
186
|
chunk_index=self.chunk_index,
|
|
182
187
|
total_chunks=self.total_chunks,
|
|
@@ -198,11 +203,6 @@ class Document:
|
|
|
198
203
|
"""Check if this document is a chunk of a larger document."""
|
|
199
204
|
return self.chunk_index is not None and self.total_chunks is not None
|
|
200
205
|
|
|
201
|
-
@property
|
|
202
|
-
def is_multi_tenant(self) -> bool:
|
|
203
|
-
"""Check if this document has tenant information."""
|
|
204
|
-
return self.account_id is not None
|
|
205
|
-
|
|
206
206
|
|
|
207
207
|
@dataclass
|
|
208
208
|
class TextChunk:
|
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""Memory domain models for Agentic Memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MemoryStrategy(str, Enum):
|
|
12
|
+
"""Memory extraction strategies.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
SEMANTIC: General facts and knowledge extraction.
|
|
16
|
+
USER_PREFERENCE: User preferences and choices.
|
|
17
|
+
SUMMARY: Conversation summaries.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
SEMANTIC = "SEMANTIC"
|
|
21
|
+
USER_PREFERENCE = "USER_PREFERENCE"
|
|
22
|
+
SUMMARY = "SUMMARY"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MemoryType(str, Enum):
|
|
26
|
+
"""Memory storage types.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
WORKING: Raw conversation messages (short-term).
|
|
30
|
+
LONG_TERM: Extracted facts with embeddings.
|
|
31
|
+
SESSIONS: Session metadata.
|
|
32
|
+
HISTORY: Audit trail of operations.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
WORKING = "working"
|
|
36
|
+
LONG_TERM = "long-term"
|
|
37
|
+
SESSIONS = "sessions"
|
|
38
|
+
HISTORY = "history"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class PayloadType(str, Enum):
|
|
42
|
+
"""Memory payload types.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
CONVERSATIONAL: Conversation messages.
|
|
46
|
+
DATA: Structured data (agent state, traces).
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
CONVERSATIONAL = "conversational"
|
|
50
|
+
DATA = "data"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class EmbeddingModelType(str, Enum):
|
|
54
|
+
"""Embedding model types supported by OpenSearch.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
TEXT_EMBEDDING: Dense vector embeddings (default).
|
|
58
|
+
SPARSE_ENCODING: Sparse vector encoding.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
TEXT_EMBEDDING = "TEXT_EMBEDDING"
|
|
62
|
+
SPARSE_ENCODING = "SPARSE_ENCODING"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class HistoryAction(str, Enum):
|
|
66
|
+
"""History audit trail action types.
|
|
67
|
+
|
|
68
|
+
Attributes:
|
|
69
|
+
ADD: Memory was added.
|
|
70
|
+
UPDATE: Memory was updated.
|
|
71
|
+
DELETE: Memory was deleted.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
ADD = "ADD"
|
|
75
|
+
UPDATE = "UPDATE"
|
|
76
|
+
DELETE = "DELETE"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class StrategyConfig:
|
|
81
|
+
"""Configuration for a memory extraction strategy.
|
|
82
|
+
|
|
83
|
+
Each strategy MUST be scoped to namespace fields.
|
|
84
|
+
When storing memory, only strategies whose namespace fields are
|
|
85
|
+
present in the request will run.
|
|
86
|
+
|
|
87
|
+
Attributes:
|
|
88
|
+
type: Strategy type (SEMANTIC, USER_PREFERENCE, SUMMARY).
|
|
89
|
+
namespace: Fields used to scope this strategy (REQUIRED).
|
|
90
|
+
llm_result_path: JSONPath to extract LLM response.
|
|
91
|
+
system_prompt: Optional custom system prompt.
|
|
92
|
+
llm_id: Optional strategy-specific LLM override.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
type: MemoryStrategy
|
|
96
|
+
namespace: list[str] # REQUIRED - no default
|
|
97
|
+
llm_result_path: str | None = None
|
|
98
|
+
system_prompt: str | None = None
|
|
99
|
+
llm_id: str | None = None
|
|
100
|
+
|
|
101
|
+
def to_dict(self) -> dict[str, Any]:
|
|
102
|
+
"""Convert to OpenSearch API format."""
|
|
103
|
+
config: dict[str, Any] = {}
|
|
104
|
+
if self.llm_result_path:
|
|
105
|
+
config["llm_result_path"] = self.llm_result_path
|
|
106
|
+
if self.system_prompt:
|
|
107
|
+
config["system_prompt"] = self.system_prompt
|
|
108
|
+
if self.llm_id:
|
|
109
|
+
config["llm_id"] = self.llm_id
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
"type": self.type.value,
|
|
113
|
+
"namespace": self.namespace,
|
|
114
|
+
"configuration": config,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclass
|
|
119
|
+
class IndexSettings:
|
|
120
|
+
"""Index-level settings for memory container indexes.
|
|
121
|
+
|
|
122
|
+
Attributes:
|
|
123
|
+
number_of_shards: Number of shards for the index.
|
|
124
|
+
number_of_replicas: Number of replicas for the index.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
number_of_shards: int = 1
|
|
128
|
+
number_of_replicas: int = 1
|
|
129
|
+
|
|
130
|
+
def to_dict(self) -> dict[str, Any]:
|
|
131
|
+
"""Convert to OpenSearch index settings format."""
|
|
132
|
+
return {
|
|
133
|
+
"index": {
|
|
134
|
+
"number_of_shards": str(self.number_of_shards),
|
|
135
|
+
"number_of_replicas": str(self.number_of_replicas),
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@dataclass
|
|
141
|
+
class ContainerIndexSettings:
|
|
142
|
+
"""Settings for all memory container indexes.
|
|
143
|
+
|
|
144
|
+
Attributes:
|
|
145
|
+
session_index: Settings for session index.
|
|
146
|
+
short_term_memory_index: Settings for working memory index.
|
|
147
|
+
long_term_memory_index: Settings for long-term memory index.
|
|
148
|
+
long_term_memory_history_index: Settings for history index.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
session_index: IndexSettings | None = None
|
|
152
|
+
short_term_memory_index: IndexSettings | None = None
|
|
153
|
+
long_term_memory_index: IndexSettings | None = None
|
|
154
|
+
long_term_memory_history_index: IndexSettings | None = None
|
|
155
|
+
|
|
156
|
+
def to_dict(self) -> dict[str, Any]:
|
|
157
|
+
"""Convert to OpenSearch API format."""
|
|
158
|
+
result: dict[str, Any] = {}
|
|
159
|
+
if self.session_index:
|
|
160
|
+
result["session_index"] = self.session_index.to_dict()
|
|
161
|
+
if self.short_term_memory_index:
|
|
162
|
+
result["short_term_memory_index"] = self.short_term_memory_index.to_dict()
|
|
163
|
+
if self.long_term_memory_index:
|
|
164
|
+
result["long_term_memory_index"] = self.long_term_memory_index.to_dict()
|
|
165
|
+
if self.long_term_memory_history_index:
|
|
166
|
+
result["long_term_memory_history_index"] = (
|
|
167
|
+
self.long_term_memory_history_index.to_dict()
|
|
168
|
+
)
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class ContainerConfig:
|
|
174
|
+
"""Memory container configuration.
|
|
175
|
+
|
|
176
|
+
Attributes:
|
|
177
|
+
name: Container name.
|
|
178
|
+
description: Optional description.
|
|
179
|
+
strategies: List of extraction strategies.
|
|
180
|
+
embedding_model_id: OpenSearch embedding model ID.
|
|
181
|
+
embedding_model_type: Type of embedding model (TEXT_EMBEDDING or SPARSE_ENCODING).
|
|
182
|
+
llm_model_id: OpenSearch LLM model ID for inference.
|
|
183
|
+
llm_result_path: JSONPath to extract LLM response.
|
|
184
|
+
embedding_dimension: Embedding vector dimension.
|
|
185
|
+
index_prefix: Custom index prefix (optional).
|
|
186
|
+
use_system_index: Whether to use system indexes (default: True).
|
|
187
|
+
index_settings: Optional index-level settings (shards, replicas).
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
name: str
|
|
191
|
+
description: str | None = None
|
|
192
|
+
strategies: list[StrategyConfig] = field(default_factory=list)
|
|
193
|
+
embedding_model_id: str | None = None
|
|
194
|
+
embedding_model_type: EmbeddingModelType = EmbeddingModelType.TEXT_EMBEDDING
|
|
195
|
+
llm_model_id: str | None = None
|
|
196
|
+
llm_result_path: str = "$.choices[0].message.content"
|
|
197
|
+
embedding_dimension: int = 1536
|
|
198
|
+
index_prefix: str | None = None
|
|
199
|
+
use_system_index: bool = True
|
|
200
|
+
index_settings: ContainerIndexSettings | None = None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass
|
|
204
|
+
class ContainerInfo:
|
|
205
|
+
"""Memory container information.
|
|
206
|
+
|
|
207
|
+
Attributes:
|
|
208
|
+
id: Container ID.
|
|
209
|
+
name: Container name.
|
|
210
|
+
description: Container description.
|
|
211
|
+
strategies: Configured strategies.
|
|
212
|
+
embedding_model_id: Embedding model ID.
|
|
213
|
+
llm_model_id: LLM model ID.
|
|
214
|
+
created_at: Creation timestamp.
|
|
215
|
+
updated_at: Last update timestamp.
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
id: str
|
|
219
|
+
name: str
|
|
220
|
+
description: str | None = None
|
|
221
|
+
strategies: list[MemoryStrategy] = field(default_factory=list)
|
|
222
|
+
embedding_model_id: str | None = None
|
|
223
|
+
llm_model_id: str | None = None
|
|
224
|
+
created_at: datetime | None = None
|
|
225
|
+
updated_at: datetime | None = None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@dataclass
|
|
229
|
+
class Message:
|
|
230
|
+
"""A conversation message.
|
|
231
|
+
|
|
232
|
+
Attributes:
|
|
233
|
+
role: Message role (user, assistant, system).
|
|
234
|
+
content: Message content.
|
|
235
|
+
timestamp: Optional timestamp.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
role: Literal["user", "assistant", "system"]
|
|
239
|
+
content: str
|
|
240
|
+
timestamp: datetime | None = None
|
|
241
|
+
|
|
242
|
+
def to_dict(self) -> dict[str, Any]:
|
|
243
|
+
"""Convert to OpenSearch API format."""
|
|
244
|
+
return {
|
|
245
|
+
"role": self.role,
|
|
246
|
+
"content": [{"text": self.content, "type": "text"}],
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass
|
|
251
|
+
class Namespace:
|
|
252
|
+
"""Memory namespace for partitioning and strategy scoping.
|
|
253
|
+
|
|
254
|
+
Completely configurable key-value pairs for memory isolation.
|
|
255
|
+
Common fields: user_id, session_id, agent_id, org_id.
|
|
256
|
+
|
|
257
|
+
When creating a container, strategies are scoped to namespace fields.
|
|
258
|
+
When adding memory with `infer=True`, OpenSearch automatically runs
|
|
259
|
+
strategies based on which namespace fields are present.
|
|
260
|
+
|
|
261
|
+
Attributes:
|
|
262
|
+
values: Namespace key-value pairs.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
values: dict[str, str] = field(default_factory=dict)
|
|
266
|
+
|
|
267
|
+
def __getitem__(self, key: str) -> str | None:
|
|
268
|
+
"""Get namespace value by key."""
|
|
269
|
+
return self.values.get(key)
|
|
270
|
+
|
|
271
|
+
def __setitem__(self, key: str, value: str) -> None:
|
|
272
|
+
"""Set namespace value by key."""
|
|
273
|
+
self.values[key] = value
|
|
274
|
+
|
|
275
|
+
def to_dict(self) -> dict[str, str]:
|
|
276
|
+
"""Get namespace as dictionary for API calls."""
|
|
277
|
+
return dict(self.values)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@dataclass
|
|
281
|
+
class StoreRequest:
|
|
282
|
+
"""Request to store memory.
|
|
283
|
+
|
|
284
|
+
Attributes:
|
|
285
|
+
messages: Conversation messages (for conversational payload).
|
|
286
|
+
structured_data: Structured data (for data payload).
|
|
287
|
+
namespace: Namespace for partitioning and strategy scoping.
|
|
288
|
+
payload_type: Type of payload.
|
|
289
|
+
infer: Whether to apply LLM inference for fact extraction.
|
|
290
|
+
metadata: Optional custom metadata.
|
|
291
|
+
tags: Optional custom tags.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
messages: list[Message] | None = None
|
|
295
|
+
structured_data: dict[str, Any] | None = None
|
|
296
|
+
namespace: Namespace = field(default_factory=Namespace)
|
|
297
|
+
payload_type: PayloadType = PayloadType.CONVERSATIONAL
|
|
298
|
+
infer: bool = True
|
|
299
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
300
|
+
tags: dict[str, str] = field(default_factory=dict)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@dataclass
|
|
304
|
+
class StoreResult:
|
|
305
|
+
"""Result of a store operation.
|
|
306
|
+
|
|
307
|
+
Attributes:
|
|
308
|
+
session_id: Session ID (for conversational).
|
|
309
|
+
working_memory_id: Working memory document ID.
|
|
310
|
+
long_term_count: Number of facts extracted (if infer=True).
|
|
311
|
+
extraction_time_ms: Time taken for extraction.
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
session_id: str | None = None
|
|
315
|
+
working_memory_id: str | None = None
|
|
316
|
+
long_term_count: int = 0
|
|
317
|
+
extraction_time_ms: int | None = None
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
@dataclass
|
|
321
|
+
class MemoryEntry:
|
|
322
|
+
"""A memory entry from long-term storage.
|
|
323
|
+
|
|
324
|
+
Attributes:
|
|
325
|
+
id: Memory document ID.
|
|
326
|
+
content: The memory content (extracted fact).
|
|
327
|
+
strategy: Which strategy extracted this.
|
|
328
|
+
score: Similarity score (for search results).
|
|
329
|
+
namespace: Namespace values.
|
|
330
|
+
created_at: Creation timestamp.
|
|
331
|
+
metadata: Custom metadata.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
id: str
|
|
335
|
+
content: str
|
|
336
|
+
strategy: MemoryStrategy | None = None
|
|
337
|
+
score: float = 0.0
|
|
338
|
+
namespace: dict[str, str] = field(default_factory=dict)
|
|
339
|
+
created_at: datetime | None = None
|
|
340
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
@dataclass
|
|
344
|
+
class RecallResult:
|
|
345
|
+
"""Result of a recall (search) operation.
|
|
346
|
+
|
|
347
|
+
Attributes:
|
|
348
|
+
items: List of memory entries.
|
|
349
|
+
total: Total number of matches.
|
|
350
|
+
query: The search query.
|
|
351
|
+
took_ms: Time taken in milliseconds.
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
items: list[MemoryEntry]
|
|
355
|
+
total: int
|
|
356
|
+
query: str
|
|
357
|
+
took_ms: int = 0
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@dataclass
|
|
361
|
+
class SessionInfo:
|
|
362
|
+
"""Session information.
|
|
363
|
+
|
|
364
|
+
Attributes:
|
|
365
|
+
id: Session ID.
|
|
366
|
+
container_id: Parent container ID.
|
|
367
|
+
summary: Session summary text.
|
|
368
|
+
namespace: Session namespace.
|
|
369
|
+
started_at: Session start time.
|
|
370
|
+
ended_at: Session end time (if ended).
|
|
371
|
+
message_count: Number of messages in session.
|
|
372
|
+
messages: Session messages (if requested).
|
|
373
|
+
metadata: Custom session metadata.
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
id: str
|
|
377
|
+
container_id: str
|
|
378
|
+
summary: str | None = None
|
|
379
|
+
namespace: dict[str, str] = field(default_factory=dict)
|
|
380
|
+
started_at: datetime | None = None
|
|
381
|
+
ended_at: datetime | None = None
|
|
382
|
+
message_count: int = 0
|
|
383
|
+
messages: list[Message] | None = None
|
|
384
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@dataclass
|
|
388
|
+
class HistoryEntry:
|
|
389
|
+
"""Audit trail entry for memory operations. READ-ONLY.
|
|
390
|
+
|
|
391
|
+
History is READ-ONLY and cannot be updated or deleted.
|
|
392
|
+
|
|
393
|
+
Attributes:
|
|
394
|
+
id: History entry ID.
|
|
395
|
+
memory_id: ID of the affected memory.
|
|
396
|
+
container_id: Parent container ID.
|
|
397
|
+
action: Operation type (ADD, UPDATE, DELETE).
|
|
398
|
+
owner_id: User who performed the action.
|
|
399
|
+
before: State before change (for UPDATE/DELETE).
|
|
400
|
+
after: State after change.
|
|
401
|
+
namespace: Namespace at time of operation.
|
|
402
|
+
tags: Tags at time of operation.
|
|
403
|
+
created_at: Operation timestamp.
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
id: str
|
|
407
|
+
memory_id: str
|
|
408
|
+
container_id: str
|
|
409
|
+
action: HistoryAction
|
|
410
|
+
owner_id: str | None = None
|
|
411
|
+
before: dict[str, Any] | None = None
|
|
412
|
+
after: dict[str, Any] | None = None
|
|
413
|
+
namespace: dict[str, str] = field(default_factory=dict)
|
|
414
|
+
tags: dict[str, str] = field(default_factory=dict)
|
|
415
|
+
created_at: datetime | None = None
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@dataclass
|
|
419
|
+
class MemoryStats:
|
|
420
|
+
"""Memory usage statistics.
|
|
421
|
+
|
|
422
|
+
Attributes:
|
|
423
|
+
container_id: Container ID.
|
|
424
|
+
container_name: Container name.
|
|
425
|
+
working_memory_count: Messages in working memory.
|
|
426
|
+
long_term_memory_count: Facts in long-term memory.
|
|
427
|
+
session_count: Number of sessions.
|
|
428
|
+
strategies_breakdown: Count per strategy.
|
|
429
|
+
storage_size_bytes: Estimated storage size.
|
|
430
|
+
last_updated: Last update timestamp.
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
container_id: str
|
|
434
|
+
container_name: str
|
|
435
|
+
working_memory_count: int = 0
|
|
436
|
+
long_term_memory_count: int = 0
|
|
437
|
+
session_count: int = 0
|
|
438
|
+
strategies_breakdown: dict[MemoryStrategy, int] = field(default_factory=dict)
|
|
439
|
+
storage_size_bytes: int = 0
|
|
440
|
+
last_updated: datetime | None = None
|
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from gnosisllm_knowledge.core.domain.document import Document
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
@dataclass
|
|
@@ -13,7 +16,7 @@ class LoadResult:
|
|
|
13
16
|
Attributes:
|
|
14
17
|
source: The source that was loaded (URL, file path, etc.).
|
|
15
18
|
source_type: Type of source (website, sitemap, file, etc.).
|
|
16
|
-
|
|
19
|
+
documents: List of loaded documents.
|
|
17
20
|
success: Whether the operation succeeded.
|
|
18
21
|
error_message: Error message if operation failed.
|
|
19
22
|
duration_ms: Duration of the operation in milliseconds.
|
|
@@ -25,8 +28,8 @@ class LoadResult:
|
|
|
25
28
|
|
|
26
29
|
source: str
|
|
27
30
|
source_type: str
|
|
28
|
-
document_count: int
|
|
29
31
|
success: bool
|
|
32
|
+
documents: list[Document] = field(default_factory=list)
|
|
30
33
|
error_message: str | None = None
|
|
31
34
|
duration_ms: float = 0.0
|
|
32
35
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
@@ -34,6 +37,11 @@ class LoadResult:
|
|
|
34
37
|
urls_failed: int = 0
|
|
35
38
|
bytes_loaded: int = 0
|
|
36
39
|
|
|
40
|
+
@property
|
|
41
|
+
def document_count(self) -> int:
|
|
42
|
+
"""Return the number of loaded documents."""
|
|
43
|
+
return len(self.documents)
|
|
44
|
+
|
|
37
45
|
@property
|
|
38
46
|
def success_rate(self) -> float:
|
|
39
47
|
"""Calculate the success rate for multi-URL loads."""
|
|
@@ -39,6 +39,11 @@ class AgentType(str, Enum):
|
|
|
39
39
|
class SearchQuery:
|
|
40
40
|
"""Search query with filters and options.
|
|
41
41
|
|
|
42
|
+
Note:
|
|
43
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
44
|
+
isolation (e.g., `knowledge-{account_id}`). Callers should ensure they're
|
|
45
|
+
searching the correct tenant-specific index.
|
|
46
|
+
|
|
42
47
|
Attributes:
|
|
43
48
|
text: The search query text.
|
|
44
49
|
mode: Search mode to use.
|
|
@@ -49,7 +54,6 @@ class SearchQuery:
|
|
|
49
54
|
Filters:
|
|
50
55
|
collection_ids: Filter by collection IDs.
|
|
51
56
|
source_ids: Filter by source IDs.
|
|
52
|
-
account_id: Multi-tenant account filter.
|
|
53
57
|
metadata_filters: Custom metadata filters.
|
|
54
58
|
|
|
55
59
|
Advanced options:
|
|
@@ -69,7 +73,6 @@ class SearchQuery:
|
|
|
69
73
|
# Filters
|
|
70
74
|
collection_ids: list[str] | None = None
|
|
71
75
|
source_ids: list[str] | None = None
|
|
72
|
-
account_id: str | None = None
|
|
73
76
|
metadata_filters: dict[str, Any] = field(default_factory=dict)
|
|
74
77
|
|
|
75
78
|
# Advanced options
|
|
@@ -89,26 +92,6 @@ class SearchQuery:
|
|
|
89
92
|
min_score=self.min_score,
|
|
90
93
|
collection_ids=self.collection_ids,
|
|
91
94
|
source_ids=self.source_ids,
|
|
92
|
-
account_id=self.account_id,
|
|
93
|
-
metadata_filters=self.metadata_filters.copy(),
|
|
94
|
-
field_boosts=self.field_boosts.copy() if self.field_boosts else None,
|
|
95
|
-
include_highlights=self.include_highlights,
|
|
96
|
-
include_fields=self.include_fields,
|
|
97
|
-
exclude_fields=self.exclude_fields,
|
|
98
|
-
explain=self.explain,
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
def with_tenant(self, account_id: str) -> SearchQuery:
|
|
102
|
-
"""Create a copy with tenant information."""
|
|
103
|
-
return SearchQuery(
|
|
104
|
-
text=self.text,
|
|
105
|
-
mode=self.mode,
|
|
106
|
-
limit=self.limit,
|
|
107
|
-
offset=self.offset,
|
|
108
|
-
min_score=self.min_score,
|
|
109
|
-
collection_ids=self.collection_ids,
|
|
110
|
-
source_ids=self.source_ids,
|
|
111
|
-
account_id=account_id,
|
|
112
95
|
metadata_filters=self.metadata_filters.copy(),
|
|
113
96
|
field_boosts=self.field_boosts.copy() if self.field_boosts else None,
|
|
114
97
|
include_highlights=self.include_highlights,
|
|
@@ -216,13 +199,17 @@ class ReasoningStep:
|
|
|
216
199
|
class AgenticSearchQuery:
|
|
217
200
|
"""Query for agentic search with conversation support.
|
|
218
201
|
|
|
202
|
+
Note:
|
|
203
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
204
|
+
isolation (e.g., `knowledge-{account_id}`). Callers should ensure they're
|
|
205
|
+
searching the correct tenant-specific index.
|
|
206
|
+
|
|
219
207
|
Attributes:
|
|
220
208
|
text: The search query text.
|
|
221
209
|
agent_type: Type of agent to use.
|
|
222
210
|
conversation_id: ID for continuing a conversation.
|
|
223
211
|
collection_ids: Filter by collection IDs.
|
|
224
212
|
source_ids: Filter by source IDs.
|
|
225
|
-
account_id: Multi-tenant account filter.
|
|
226
213
|
limit: Maximum number of source documents to retrieve.
|
|
227
214
|
include_reasoning: Whether to include reasoning steps.
|
|
228
215
|
metadata_filters: Custom metadata filters.
|
|
@@ -235,7 +222,6 @@ class AgenticSearchQuery:
|
|
|
235
222
|
conversation_id: str | None = None
|
|
236
223
|
collection_ids: list[str] | None = None
|
|
237
224
|
source_ids: list[str] | None = None
|
|
238
|
-
account_id: str | None = None
|
|
239
225
|
limit: int = 10
|
|
240
226
|
include_reasoning: bool = True
|
|
241
227
|
metadata_filters: dict[str, Any] = field(default_factory=dict)
|
|
@@ -250,7 +236,6 @@ class AgenticSearchQuery:
|
|
|
250
236
|
limit=self.limit,
|
|
251
237
|
collection_ids=self.collection_ids,
|
|
252
238
|
source_ids=self.source_ids,
|
|
253
|
-
account_id=self.account_id,
|
|
254
239
|
metadata_filters=self.metadata_filters.copy(),
|
|
255
240
|
)
|
|
256
241
|
|
|
@@ -276,6 +261,7 @@ class AgenticSearchResult:
|
|
|
276
261
|
total_tokens: Total tokens consumed.
|
|
277
262
|
prompt_tokens: Tokens used in prompts.
|
|
278
263
|
completion_tokens: Tokens used in completions.
|
|
264
|
+
generated_query: The DSL query generated by QueryPlanningTool (if applicable).
|
|
279
265
|
"""
|
|
280
266
|
|
|
281
267
|
query: str
|
|
@@ -292,6 +278,7 @@ class AgenticSearchResult:
|
|
|
292
278
|
total_tokens: int = 0
|
|
293
279
|
prompt_tokens: int = 0
|
|
294
280
|
completion_tokens: int = 0
|
|
281
|
+
generated_query: str | None = None # DSL generated by QueryPlanningTool
|
|
295
282
|
|
|
296
283
|
@property
|
|
297
284
|
def has_answer(self) -> bool:
|