gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +502 -32
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  7. gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  8. gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
  9. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  10. gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
  11. gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
  12. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  14. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  15. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  16. gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  17. gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
  18. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  19. gnosisllm_knowledge/cli/app.py +436 -31
  20. gnosisllm_knowledge/cli/commands/agentic.py +26 -9
  21. gnosisllm_knowledge/cli/commands/load.py +169 -19
  22. gnosisllm_knowledge/cli/commands/memory.py +733 -0
  23. gnosisllm_knowledge/cli/commands/search.py +9 -10
  24. gnosisllm_knowledge/cli/commands/setup.py +49 -23
  25. gnosisllm_knowledge/cli/display/service.py +43 -0
  26. gnosisllm_knowledge/cli/utils/config.py +62 -4
  27. gnosisllm_knowledge/core/domain/__init__.py +54 -0
  28. gnosisllm_knowledge/core/domain/discovery.py +166 -0
  29. gnosisllm_knowledge/core/domain/document.py +19 -19
  30. gnosisllm_knowledge/core/domain/memory.py +440 -0
  31. gnosisllm_knowledge/core/domain/result.py +11 -3
  32. gnosisllm_knowledge/core/domain/search.py +12 -25
  33. gnosisllm_knowledge/core/domain/source.py +11 -12
  34. gnosisllm_knowledge/core/events/__init__.py +8 -0
  35. gnosisllm_knowledge/core/events/types.py +198 -5
  36. gnosisllm_knowledge/core/exceptions.py +227 -0
  37. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  38. gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  39. gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  40. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  41. gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  42. gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
  43. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  44. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  45. gnosisllm_knowledge/fetchers/__init__.py +8 -0
  46. gnosisllm_knowledge/fetchers/config.py +27 -0
  47. gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  48. gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  49. gnosisllm_knowledge/loaders/__init__.py +5 -1
  50. gnosisllm_knowledge/loaders/base.py +3 -4
  51. gnosisllm_knowledge/loaders/discovery.py +338 -0
  52. gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  53. gnosisllm_knowledge/loaders/factory.py +46 -0
  54. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  55. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  56. gnosisllm_knowledge/services/indexing.py +100 -93
  57. gnosisllm_knowledge/services/search.py +84 -31
  58. gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
  59. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
  60. gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  62. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
  63. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -26,17 +26,22 @@ class Document:
26
26
  This is the core domain object that flows through the knowledge pipeline.
27
27
  Documents are created by loaders, processed by chunkers, and stored by indexers.
28
28
 
29
+ Note:
30
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
31
+ isolation (e.g., `knowledge-{account_id}`). Tenant information like account_id
32
+ should be passed in the metadata dictionary if needed for audit purposes.
33
+
29
34
  Attributes:
30
35
  content: The main text content of the document.
31
36
  source: Source identifier (URL, file path, etc.).
32
37
  doc_id: Unique identifier. Auto-generated from content hash if not provided.
33
38
  title: Optional document title.
34
39
  url: URL where the document was fetched from.
35
- metadata: Arbitrary metadata dictionary.
40
+ metadata: Arbitrary metadata dictionary (can include tenant info for audit).
36
41
 
37
- Multi-tenancy fields:
38
- account_id: Account/tenant identifier.
42
+ Collection fields:
39
43
  collection_id: Collection the document belongs to.
44
+ collection_name: Collection name for display in aggregations.
40
45
  source_id: Source identifier within the collection.
41
46
 
42
47
  Chunking info:
@@ -70,9 +75,9 @@ class Document:
70
75
  url: str | None = None
71
76
  metadata: dict[str, Any] = field(default_factory=dict)
72
77
 
73
- # Multi-tenancy fields
74
- account_id: str | None = None
78
+ # Collection fields
75
79
  collection_id: str | None = None
80
+ collection_name: str | None = None # For display in aggregations
76
81
  source_id: str | None = None
77
82
 
78
83
  # Chunking info
@@ -138,8 +143,8 @@ class Document:
138
143
  title=self.title,
139
144
  url=self.url,
140
145
  metadata=self.metadata.copy(),
141
- account_id=self.account_id,
142
146
  collection_id=self.collection_id,
147
+ collection_name=self.collection_name,
143
148
  source_id=self.source_id,
144
149
  chunk_index=chunk_index,
145
150
  total_chunks=total_chunks,
@@ -152,21 +157,21 @@ class Document:
152
157
  created_at=self.created_at,
153
158
  )
154
159
 
155
- def with_tenant(
160
+ def with_collection(
156
161
  self,
157
- account_id: str,
158
- collection_id: str | None = None,
162
+ collection_id: str,
163
+ collection_name: str | None = None,
159
164
  source_id: str | None = None,
160
165
  ) -> Document:
161
- """Create a new document with tenant information.
166
+ """Create a new document with collection information.
162
167
 
163
168
  Args:
164
- account_id: Account/tenant identifier.
165
169
  collection_id: Collection identifier.
170
+ collection_name: Collection name for display.
166
171
  source_id: Source identifier.
167
172
 
168
173
  Returns:
169
- New Document instance with tenant information set.
174
+ New Document instance with collection information set.
170
175
  """
171
176
  return Document(
172
177
  content=self.content,
@@ -175,8 +180,8 @@ class Document:
175
180
  title=self.title,
176
181
  url=self.url,
177
182
  metadata=self.metadata.copy(),
178
- account_id=account_id,
179
- collection_id=collection_id or self.collection_id,
183
+ collection_id=collection_id,
184
+ collection_name=collection_name or self.collection_name,
180
185
  source_id=source_id or self.source_id,
181
186
  chunk_index=self.chunk_index,
182
187
  total_chunks=self.total_chunks,
@@ -198,11 +203,6 @@ class Document:
198
203
  """Check if this document is a chunk of a larger document."""
199
204
  return self.chunk_index is not None and self.total_chunks is not None
200
205
 
201
- @property
202
- def is_multi_tenant(self) -> bool:
203
- """Check if this document has tenant information."""
204
- return self.account_id is not None
205
-
206
206
 
207
207
  @dataclass
208
208
  class TextChunk:
@@ -0,0 +1,440 @@
1
+ """Memory domain models for Agentic Memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Literal
9
+
10
+
11
+ class MemoryStrategy(str, Enum):
12
+ """Memory extraction strategies.
13
+
14
+ Attributes:
15
+ SEMANTIC: General facts and knowledge extraction.
16
+ USER_PREFERENCE: User preferences and choices.
17
+ SUMMARY: Conversation summaries.
18
+ """
19
+
20
+ SEMANTIC = "SEMANTIC"
21
+ USER_PREFERENCE = "USER_PREFERENCE"
22
+ SUMMARY = "SUMMARY"
23
+
24
+
25
+ class MemoryType(str, Enum):
26
+ """Memory storage types.
27
+
28
+ Attributes:
29
+ WORKING: Raw conversation messages (short-term).
30
+ LONG_TERM: Extracted facts with embeddings.
31
+ SESSIONS: Session metadata.
32
+ HISTORY: Audit trail of operations.
33
+ """
34
+
35
+ WORKING = "working"
36
+ LONG_TERM = "long-term"
37
+ SESSIONS = "sessions"
38
+ HISTORY = "history"
39
+
40
+
41
+ class PayloadType(str, Enum):
42
+ """Memory payload types.
43
+
44
+ Attributes:
45
+ CONVERSATIONAL: Conversation messages.
46
+ DATA: Structured data (agent state, traces).
47
+ """
48
+
49
+ CONVERSATIONAL = "conversational"
50
+ DATA = "data"
51
+
52
+
53
+ class EmbeddingModelType(str, Enum):
54
+ """Embedding model types supported by OpenSearch.
55
+
56
+ Attributes:
57
+ TEXT_EMBEDDING: Dense vector embeddings (default).
58
+ SPARSE_ENCODING: Sparse vector encoding.
59
+ """
60
+
61
+ TEXT_EMBEDDING = "TEXT_EMBEDDING"
62
+ SPARSE_ENCODING = "SPARSE_ENCODING"
63
+
64
+
65
+ class HistoryAction(str, Enum):
66
+ """History audit trail action types.
67
+
68
+ Attributes:
69
+ ADD: Memory was added.
70
+ UPDATE: Memory was updated.
71
+ DELETE: Memory was deleted.
72
+ """
73
+
74
+ ADD = "ADD"
75
+ UPDATE = "UPDATE"
76
+ DELETE = "DELETE"
77
+
78
+
79
+ @dataclass
80
+ class StrategyConfig:
81
+ """Configuration for a memory extraction strategy.
82
+
83
+ Each strategy MUST be scoped to namespace fields.
84
+ When storing memory, only strategies whose namespace fields are
85
+ present in the request will run.
86
+
87
+ Attributes:
88
+ type: Strategy type (SEMANTIC, USER_PREFERENCE, SUMMARY).
89
+ namespace: Fields used to scope this strategy (REQUIRED).
90
+ llm_result_path: JSONPath to extract LLM response.
91
+ system_prompt: Optional custom system prompt.
92
+ llm_id: Optional strategy-specific LLM override.
93
+ """
94
+
95
+ type: MemoryStrategy
96
+ namespace: list[str] # REQUIRED - no default
97
+ llm_result_path: str | None = None
98
+ system_prompt: str | None = None
99
+ llm_id: str | None = None
100
+
101
+ def to_dict(self) -> dict[str, Any]:
102
+ """Convert to OpenSearch API format."""
103
+ config: dict[str, Any] = {}
104
+ if self.llm_result_path:
105
+ config["llm_result_path"] = self.llm_result_path
106
+ if self.system_prompt:
107
+ config["system_prompt"] = self.system_prompt
108
+ if self.llm_id:
109
+ config["llm_id"] = self.llm_id
110
+
111
+ return {
112
+ "type": self.type.value,
113
+ "namespace": self.namespace,
114
+ "configuration": config,
115
+ }
116
+
117
+
118
+ @dataclass
119
+ class IndexSettings:
120
+ """Index-level settings for memory container indexes.
121
+
122
+ Attributes:
123
+ number_of_shards: Number of shards for the index.
124
+ number_of_replicas: Number of replicas for the index.
125
+ """
126
+
127
+ number_of_shards: int = 1
128
+ number_of_replicas: int = 1
129
+
130
+ def to_dict(self) -> dict[str, Any]:
131
+ """Convert to OpenSearch index settings format."""
132
+ return {
133
+ "index": {
134
+ "number_of_shards": str(self.number_of_shards),
135
+ "number_of_replicas": str(self.number_of_replicas),
136
+ }
137
+ }
138
+
139
+
140
+ @dataclass
141
+ class ContainerIndexSettings:
142
+ """Settings for all memory container indexes.
143
+
144
+ Attributes:
145
+ session_index: Settings for session index.
146
+ short_term_memory_index: Settings for working memory index.
147
+ long_term_memory_index: Settings for long-term memory index.
148
+ long_term_memory_history_index: Settings for history index.
149
+ """
150
+
151
+ session_index: IndexSettings | None = None
152
+ short_term_memory_index: IndexSettings | None = None
153
+ long_term_memory_index: IndexSettings | None = None
154
+ long_term_memory_history_index: IndexSettings | None = None
155
+
156
+ def to_dict(self) -> dict[str, Any]:
157
+ """Convert to OpenSearch API format."""
158
+ result: dict[str, Any] = {}
159
+ if self.session_index:
160
+ result["session_index"] = self.session_index.to_dict()
161
+ if self.short_term_memory_index:
162
+ result["short_term_memory_index"] = self.short_term_memory_index.to_dict()
163
+ if self.long_term_memory_index:
164
+ result["long_term_memory_index"] = self.long_term_memory_index.to_dict()
165
+ if self.long_term_memory_history_index:
166
+ result["long_term_memory_history_index"] = (
167
+ self.long_term_memory_history_index.to_dict()
168
+ )
169
+ return result
170
+
171
+
172
+ @dataclass
173
+ class ContainerConfig:
174
+ """Memory container configuration.
175
+
176
+ Attributes:
177
+ name: Container name.
178
+ description: Optional description.
179
+ strategies: List of extraction strategies.
180
+ embedding_model_id: OpenSearch embedding model ID.
181
+ embedding_model_type: Type of embedding model (TEXT_EMBEDDING or SPARSE_ENCODING).
182
+ llm_model_id: OpenSearch LLM model ID for inference.
183
+ llm_result_path: JSONPath to extract LLM response.
184
+ embedding_dimension: Embedding vector dimension.
185
+ index_prefix: Custom index prefix (optional).
186
+ use_system_index: Whether to use system indexes (default: True).
187
+ index_settings: Optional index-level settings (shards, replicas).
188
+ """
189
+
190
+ name: str
191
+ description: str | None = None
192
+ strategies: list[StrategyConfig] = field(default_factory=list)
193
+ embedding_model_id: str | None = None
194
+ embedding_model_type: EmbeddingModelType = EmbeddingModelType.TEXT_EMBEDDING
195
+ llm_model_id: str | None = None
196
+ llm_result_path: str = "$.choices[0].message.content"
197
+ embedding_dimension: int = 1536
198
+ index_prefix: str | None = None
199
+ use_system_index: bool = True
200
+ index_settings: ContainerIndexSettings | None = None
201
+
202
+
203
+ @dataclass
204
+ class ContainerInfo:
205
+ """Memory container information.
206
+
207
+ Attributes:
208
+ id: Container ID.
209
+ name: Container name.
210
+ description: Container description.
211
+ strategies: Configured strategies.
212
+ embedding_model_id: Embedding model ID.
213
+ llm_model_id: LLM model ID.
214
+ created_at: Creation timestamp.
215
+ updated_at: Last update timestamp.
216
+ """
217
+
218
+ id: str
219
+ name: str
220
+ description: str | None = None
221
+ strategies: list[MemoryStrategy] = field(default_factory=list)
222
+ embedding_model_id: str | None = None
223
+ llm_model_id: str | None = None
224
+ created_at: datetime | None = None
225
+ updated_at: datetime | None = None
226
+
227
+
228
+ @dataclass
229
+ class Message:
230
+ """A conversation message.
231
+
232
+ Attributes:
233
+ role: Message role (user, assistant, system).
234
+ content: Message content.
235
+ timestamp: Optional timestamp.
236
+ """
237
+
238
+ role: Literal["user", "assistant", "system"]
239
+ content: str
240
+ timestamp: datetime | None = None
241
+
242
+ def to_dict(self) -> dict[str, Any]:
243
+ """Convert to OpenSearch API format."""
244
+ return {
245
+ "role": self.role,
246
+ "content": [{"text": self.content, "type": "text"}],
247
+ }
248
+
249
+
250
+ @dataclass
251
+ class Namespace:
252
+ """Memory namespace for partitioning and strategy scoping.
253
+
254
+ Completely configurable key-value pairs for memory isolation.
255
+ Common fields: user_id, session_id, agent_id, org_id.
256
+
257
+ When creating a container, strategies are scoped to namespace fields.
258
+ When adding memory with `infer=True`, OpenSearch automatically runs
259
+ strategies based on which namespace fields are present.
260
+
261
+ Attributes:
262
+ values: Namespace key-value pairs.
263
+ """
264
+
265
+ values: dict[str, str] = field(default_factory=dict)
266
+
267
+ def __getitem__(self, key: str) -> str | None:
268
+ """Get namespace value by key."""
269
+ return self.values.get(key)
270
+
271
+ def __setitem__(self, key: str, value: str) -> None:
272
+ """Set namespace value by key."""
273
+ self.values[key] = value
274
+
275
+ def to_dict(self) -> dict[str, str]:
276
+ """Get namespace as dictionary for API calls."""
277
+ return dict(self.values)
278
+
279
+
280
+ @dataclass
281
+ class StoreRequest:
282
+ """Request to store memory.
283
+
284
+ Attributes:
285
+ messages: Conversation messages (for conversational payload).
286
+ structured_data: Structured data (for data payload).
287
+ namespace: Namespace for partitioning and strategy scoping.
288
+ payload_type: Type of payload.
289
+ infer: Whether to apply LLM inference for fact extraction.
290
+ metadata: Optional custom metadata.
291
+ tags: Optional custom tags.
292
+ """
293
+
294
+ messages: list[Message] | None = None
295
+ structured_data: dict[str, Any] | None = None
296
+ namespace: Namespace = field(default_factory=Namespace)
297
+ payload_type: PayloadType = PayloadType.CONVERSATIONAL
298
+ infer: bool = True
299
+ metadata: dict[str, Any] = field(default_factory=dict)
300
+ tags: dict[str, str] = field(default_factory=dict)
301
+
302
+
303
+ @dataclass
304
+ class StoreResult:
305
+ """Result of a store operation.
306
+
307
+ Attributes:
308
+ session_id: Session ID (for conversational).
309
+ working_memory_id: Working memory document ID.
310
+ long_term_count: Number of facts extracted (if infer=True).
311
+ extraction_time_ms: Time taken for extraction.
312
+ """
313
+
314
+ session_id: str | None = None
315
+ working_memory_id: str | None = None
316
+ long_term_count: int = 0
317
+ extraction_time_ms: int | None = None
318
+
319
+
320
+ @dataclass
321
+ class MemoryEntry:
322
+ """A memory entry from long-term storage.
323
+
324
+ Attributes:
325
+ id: Memory document ID.
326
+ content: The memory content (extracted fact).
327
+ strategy: Which strategy extracted this.
328
+ score: Similarity score (for search results).
329
+ namespace: Namespace values.
330
+ created_at: Creation timestamp.
331
+ metadata: Custom metadata.
332
+ """
333
+
334
+ id: str
335
+ content: str
336
+ strategy: MemoryStrategy | None = None
337
+ score: float = 0.0
338
+ namespace: dict[str, str] = field(default_factory=dict)
339
+ created_at: datetime | None = None
340
+ metadata: dict[str, Any] = field(default_factory=dict)
341
+
342
+
343
+ @dataclass
344
+ class RecallResult:
345
+ """Result of a recall (search) operation.
346
+
347
+ Attributes:
348
+ items: List of memory entries.
349
+ total: Total number of matches.
350
+ query: The search query.
351
+ took_ms: Time taken in milliseconds.
352
+ """
353
+
354
+ items: list[MemoryEntry]
355
+ total: int
356
+ query: str
357
+ took_ms: int = 0
358
+
359
+
360
+ @dataclass
361
+ class SessionInfo:
362
+ """Session information.
363
+
364
+ Attributes:
365
+ id: Session ID.
366
+ container_id: Parent container ID.
367
+ summary: Session summary text.
368
+ namespace: Session namespace.
369
+ started_at: Session start time.
370
+ ended_at: Session end time (if ended).
371
+ message_count: Number of messages in session.
372
+ messages: Session messages (if requested).
373
+ metadata: Custom session metadata.
374
+ """
375
+
376
+ id: str
377
+ container_id: str
378
+ summary: str | None = None
379
+ namespace: dict[str, str] = field(default_factory=dict)
380
+ started_at: datetime | None = None
381
+ ended_at: datetime | None = None
382
+ message_count: int = 0
383
+ messages: list[Message] | None = None
384
+ metadata: dict[str, Any] = field(default_factory=dict)
385
+
386
+
387
+ @dataclass
388
+ class HistoryEntry:
389
+ """Audit trail entry for memory operations. READ-ONLY.
390
+
391
+ History is READ-ONLY and cannot be updated or deleted.
392
+
393
+ Attributes:
394
+ id: History entry ID.
395
+ memory_id: ID of the affected memory.
396
+ container_id: Parent container ID.
397
+ action: Operation type (ADD, UPDATE, DELETE).
398
+ owner_id: User who performed the action.
399
+ before: State before change (for UPDATE/DELETE).
400
+ after: State after change.
401
+ namespace: Namespace at time of operation.
402
+ tags: Tags at time of operation.
403
+ created_at: Operation timestamp.
404
+ """
405
+
406
+ id: str
407
+ memory_id: str
408
+ container_id: str
409
+ action: HistoryAction
410
+ owner_id: str | None = None
411
+ before: dict[str, Any] | None = None
412
+ after: dict[str, Any] | None = None
413
+ namespace: dict[str, str] = field(default_factory=dict)
414
+ tags: dict[str, str] = field(default_factory=dict)
415
+ created_at: datetime | None = None
416
+
417
+
418
+ @dataclass
419
+ class MemoryStats:
420
+ """Memory usage statistics.
421
+
422
+ Attributes:
423
+ container_id: Container ID.
424
+ container_name: Container name.
425
+ working_memory_count: Messages in working memory.
426
+ long_term_memory_count: Facts in long-term memory.
427
+ session_count: Number of sessions.
428
+ strategies_breakdown: Count per strategy.
429
+ storage_size_bytes: Estimated storage size.
430
+ last_updated: Last update timestamp.
431
+ """
432
+
433
+ container_id: str
434
+ container_name: str
435
+ working_memory_count: int = 0
436
+ long_term_memory_count: int = 0
437
+ session_count: int = 0
438
+ strategies_breakdown: dict[MemoryStrategy, int] = field(default_factory=dict)
439
+ storage_size_bytes: int = 0
440
+ last_updated: datetime | None = None
@@ -3,7 +3,10 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass, field
6
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from gnosisllm_knowledge.core.domain.document import Document
7
10
 
8
11
 
9
12
  @dataclass
@@ -13,7 +16,7 @@ class LoadResult:
13
16
  Attributes:
14
17
  source: The source that was loaded (URL, file path, etc.).
15
18
  source_type: Type of source (website, sitemap, file, etc.).
16
- document_count: Number of documents loaded.
19
+ documents: List of loaded documents.
17
20
  success: Whether the operation succeeded.
18
21
  error_message: Error message if operation failed.
19
22
  duration_ms: Duration of the operation in milliseconds.
@@ -25,8 +28,8 @@ class LoadResult:
25
28
 
26
29
  source: str
27
30
  source_type: str
28
- document_count: int
29
31
  success: bool
32
+ documents: list[Document] = field(default_factory=list)
30
33
  error_message: str | None = None
31
34
  duration_ms: float = 0.0
32
35
  metadata: dict[str, Any] = field(default_factory=dict)
@@ -34,6 +37,11 @@ class LoadResult:
34
37
  urls_failed: int = 0
35
38
  bytes_loaded: int = 0
36
39
 
40
+ @property
41
+ def document_count(self) -> int:
42
+ """Return the number of loaded documents."""
43
+ return len(self.documents)
44
+
37
45
  @property
38
46
  def success_rate(self) -> float:
39
47
  """Calculate the success rate for multi-URL loads."""
@@ -39,6 +39,11 @@ class AgentType(str, Enum):
39
39
  class SearchQuery:
40
40
  """Search query with filters and options.
41
41
 
42
+ Note:
43
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
44
+ isolation (e.g., `knowledge-{account_id}`). Callers should ensure they're
45
+ searching the correct tenant-specific index.
46
+
42
47
  Attributes:
43
48
  text: The search query text.
44
49
  mode: Search mode to use.
@@ -49,7 +54,6 @@ class SearchQuery:
49
54
  Filters:
50
55
  collection_ids: Filter by collection IDs.
51
56
  source_ids: Filter by source IDs.
52
- account_id: Multi-tenant account filter.
53
57
  metadata_filters: Custom metadata filters.
54
58
 
55
59
  Advanced options:
@@ -69,7 +73,6 @@ class SearchQuery:
69
73
  # Filters
70
74
  collection_ids: list[str] | None = None
71
75
  source_ids: list[str] | None = None
72
- account_id: str | None = None
73
76
  metadata_filters: dict[str, Any] = field(default_factory=dict)
74
77
 
75
78
  # Advanced options
@@ -89,26 +92,6 @@ class SearchQuery:
89
92
  min_score=self.min_score,
90
93
  collection_ids=self.collection_ids,
91
94
  source_ids=self.source_ids,
92
- account_id=self.account_id,
93
- metadata_filters=self.metadata_filters.copy(),
94
- field_boosts=self.field_boosts.copy() if self.field_boosts else None,
95
- include_highlights=self.include_highlights,
96
- include_fields=self.include_fields,
97
- exclude_fields=self.exclude_fields,
98
- explain=self.explain,
99
- )
100
-
101
- def with_tenant(self, account_id: str) -> SearchQuery:
102
- """Create a copy with tenant information."""
103
- return SearchQuery(
104
- text=self.text,
105
- mode=self.mode,
106
- limit=self.limit,
107
- offset=self.offset,
108
- min_score=self.min_score,
109
- collection_ids=self.collection_ids,
110
- source_ids=self.source_ids,
111
- account_id=account_id,
112
95
  metadata_filters=self.metadata_filters.copy(),
113
96
  field_boosts=self.field_boosts.copy() if self.field_boosts else None,
114
97
  include_highlights=self.include_highlights,
@@ -216,13 +199,17 @@ class ReasoningStep:
216
199
  class AgenticSearchQuery:
217
200
  """Query for agentic search with conversation support.
218
201
 
202
+ Note:
203
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
204
+ isolation (e.g., `knowledge-{account_id}`). Callers should ensure they're
205
+ searching the correct tenant-specific index.
206
+
219
207
  Attributes:
220
208
  text: The search query text.
221
209
  agent_type: Type of agent to use.
222
210
  conversation_id: ID for continuing a conversation.
223
211
  collection_ids: Filter by collection IDs.
224
212
  source_ids: Filter by source IDs.
225
- account_id: Multi-tenant account filter.
226
213
  limit: Maximum number of source documents to retrieve.
227
214
  include_reasoning: Whether to include reasoning steps.
228
215
  metadata_filters: Custom metadata filters.
@@ -235,7 +222,6 @@ class AgenticSearchQuery:
235
222
  conversation_id: str | None = None
236
223
  collection_ids: list[str] | None = None
237
224
  source_ids: list[str] | None = None
238
- account_id: str | None = None
239
225
  limit: int = 10
240
226
  include_reasoning: bool = True
241
227
  metadata_filters: dict[str, Any] = field(default_factory=dict)
@@ -250,7 +236,6 @@ class AgenticSearchQuery:
250
236
  limit=self.limit,
251
237
  collection_ids=self.collection_ids,
252
238
  source_ids=self.source_ids,
253
- account_id=self.account_id,
254
239
  metadata_filters=self.metadata_filters.copy(),
255
240
  )
256
241
 
@@ -276,6 +261,7 @@ class AgenticSearchResult:
276
261
  total_tokens: Total tokens consumed.
277
262
  prompt_tokens: Tokens used in prompts.
278
263
  completion_tokens: Tokens used in completions.
264
+ generated_query: The DSL query generated by QueryPlanningTool (if applicable).
279
265
  """
280
266
 
281
267
  query: str
@@ -292,6 +278,7 @@ class AgenticSearchResult:
292
278
  total_tokens: int = 0
293
279
  prompt_tokens: int = 0
294
280
  completion_tokens: int = 0
281
+ generated_query: str | None = None # DSL generated by QueryPlanningTool
295
282
 
296
283
  @property
297
284
  def has_answer(self) -> bool: