gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +91 -39
- gnosisllm_knowledge/api/__init__.py +3 -2
- gnosisllm_knowledge/api/knowledge.py +287 -7
- gnosisllm_knowledge/api/memory.py +966 -0
- gnosisllm_knowledge/backends/__init__.py +14 -5
- gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
- gnosisllm_knowledge/backends/opensearch/config.py +49 -28
- gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
- gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
- gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
- gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
- gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
- gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
- gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
- gnosisllm_knowledge/cli/app.py +378 -12
- gnosisllm_knowledge/cli/commands/agentic.py +11 -0
- gnosisllm_knowledge/cli/commands/memory.py +723 -0
- gnosisllm_knowledge/cli/commands/setup.py +24 -22
- gnosisllm_knowledge/cli/display/service.py +43 -0
- gnosisllm_knowledge/cli/utils/config.py +58 -0
- gnosisllm_knowledge/core/domain/__init__.py +41 -0
- gnosisllm_knowledge/core/domain/document.py +5 -0
- gnosisllm_knowledge/core/domain/memory.py +440 -0
- gnosisllm_knowledge/core/domain/result.py +11 -3
- gnosisllm_knowledge/core/domain/search.py +2 -0
- gnosisllm_knowledge/core/events/types.py +76 -0
- gnosisllm_knowledge/core/exceptions.py +134 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
- gnosisllm_knowledge/core/interfaces/memory.py +524 -0
- gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
- gnosisllm_knowledge/core/streaming/__init__.py +36 -0
- gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
- gnosisllm_knowledge/loaders/base.py +3 -4
- gnosisllm_knowledge/loaders/sitemap.py +129 -1
- gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
- gnosisllm_knowledge/services/indexing.py +67 -75
- gnosisllm_knowledge/services/search.py +47 -11
- gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
- gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
gnosisllm_knowledge/__init__.py
CHANGED
|
@@ -38,7 +38,7 @@ Features:
|
|
|
38
38
|
- SOLID principles throughout
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
|
-
from gnosisllm_knowledge.api import Knowledge
|
|
41
|
+
from gnosisllm_knowledge.api import Knowledge, Memory
|
|
42
42
|
from gnosisllm_knowledge.backends import (
|
|
43
43
|
AgenticSearchFallback,
|
|
44
44
|
MemoryIndexer,
|
|
@@ -51,6 +51,20 @@ from gnosisllm_knowledge.backends import (
|
|
|
51
51
|
)
|
|
52
52
|
from gnosisllm_knowledge.chunking import FixedSizeChunker, SentenceChunker
|
|
53
53
|
from gnosisllm_knowledge.core.domain.document import Document, DocumentStatus, TextChunk
|
|
54
|
+
from gnosisllm_knowledge.core.domain.memory import (
|
|
55
|
+
ContainerConfig,
|
|
56
|
+
ContainerInfo,
|
|
57
|
+
HistoryEntry,
|
|
58
|
+
MemoryEntry,
|
|
59
|
+
MemoryStats,
|
|
60
|
+
MemoryStrategy,
|
|
61
|
+
MemoryType,
|
|
62
|
+
Message,
|
|
63
|
+
Namespace,
|
|
64
|
+
RecallResult,
|
|
65
|
+
SessionInfo,
|
|
66
|
+
StrategyConfig,
|
|
67
|
+
)
|
|
54
68
|
from gnosisllm_knowledge.core.domain.result import (
|
|
55
69
|
BatchResult,
|
|
56
70
|
IndexResult,
|
|
@@ -58,9 +72,9 @@ from gnosisllm_knowledge.core.domain.result import (
|
|
|
58
72
|
ValidationResult,
|
|
59
73
|
)
|
|
60
74
|
from gnosisllm_knowledge.core.domain.search import (
|
|
61
|
-
AgentType,
|
|
62
75
|
AgenticSearchQuery,
|
|
63
76
|
AgenticSearchResult,
|
|
77
|
+
AgentType,
|
|
64
78
|
ReasoningStep,
|
|
65
79
|
SearchMode,
|
|
66
80
|
SearchQuery,
|
|
@@ -72,15 +86,27 @@ from gnosisllm_knowledge.core.exceptions import (
|
|
|
72
86
|
AgenticSearchError,
|
|
73
87
|
ConfigurationError,
|
|
74
88
|
ConnectionError,
|
|
89
|
+
ContainerExistsError,
|
|
90
|
+
ContainerNotFoundError,
|
|
75
91
|
IndexError,
|
|
92
|
+
InferenceError,
|
|
93
|
+
InferenceTimeoutError,
|
|
76
94
|
KnowledgeError,
|
|
77
95
|
LoadError,
|
|
96
|
+
MemoryConfigurationError,
|
|
97
|
+
MemoryError,
|
|
78
98
|
SearchError,
|
|
99
|
+
SessionNotFoundError,
|
|
100
|
+
)
|
|
101
|
+
from gnosisllm_knowledge.core.streaming import (
|
|
102
|
+
BatchCollector,
|
|
103
|
+
BoundedQueue,
|
|
104
|
+
PipelineConfig,
|
|
79
105
|
)
|
|
80
106
|
from gnosisllm_knowledge.fetchers import (
|
|
81
107
|
HTTPContentFetcher,
|
|
82
|
-
NeoreaderContentFetcher,
|
|
83
108
|
NeoreaderConfig,
|
|
109
|
+
NeoreaderContentFetcher,
|
|
84
110
|
)
|
|
85
111
|
from gnosisllm_knowledge.loaders import (
|
|
86
112
|
LoaderFactory,
|
|
@@ -95,58 +121,84 @@ from gnosisllm_knowledge.services import (
|
|
|
95
121
|
__version__ = "0.2.0"
|
|
96
122
|
|
|
97
123
|
__all__ = [
|
|
98
|
-
# Main API
|
|
99
|
-
"Knowledge",
|
|
100
|
-
# Domain Models
|
|
101
|
-
"Document",
|
|
102
|
-
"DocumentStatus",
|
|
103
|
-
"TextChunk",
|
|
104
|
-
"SearchQuery",
|
|
105
|
-
"SearchResult",
|
|
106
|
-
"SearchResultItem",
|
|
107
|
-
"SearchMode",
|
|
108
124
|
"AgentType",
|
|
125
|
+
"AgenticSearchError",
|
|
126
|
+
"AgenticSearchFallback",
|
|
109
127
|
"AgenticSearchQuery",
|
|
110
128
|
"AgenticSearchResult",
|
|
111
|
-
"
|
|
112
|
-
"LoadResult",
|
|
113
|
-
"IndexResult",
|
|
129
|
+
"BatchCollector",
|
|
114
130
|
"BatchResult",
|
|
115
|
-
"
|
|
131
|
+
"BoundedQueue",
|
|
132
|
+
"ConfigurationError",
|
|
133
|
+
"ConnectionError",
|
|
134
|
+
"ContainerConfig",
|
|
135
|
+
"ContainerExistsError",
|
|
136
|
+
"ContainerInfo",
|
|
137
|
+
"ContainerNotFoundError",
|
|
138
|
+
# Domain Models
|
|
139
|
+
"Document",
|
|
140
|
+
"DocumentStatus",
|
|
116
141
|
# Events
|
|
117
142
|
"Event",
|
|
118
|
-
"EventType",
|
|
119
143
|
"EventEmitter",
|
|
144
|
+
"EventType",
|
|
145
|
+
"FixedSizeChunker",
|
|
146
|
+
# Fetchers
|
|
147
|
+
"HTTPContentFetcher",
|
|
148
|
+
"HistoryEntry",
|
|
149
|
+
"IndexError",
|
|
150
|
+
"IndexResult",
|
|
151
|
+
"InferenceError",
|
|
152
|
+
"InferenceTimeoutError",
|
|
153
|
+
# Main API
|
|
154
|
+
"Knowledge",
|
|
120
155
|
# Exceptions
|
|
121
156
|
"KnowledgeError",
|
|
122
|
-
|
|
123
|
-
"
|
|
157
|
+
# Services
|
|
158
|
+
"KnowledgeIndexingService",
|
|
159
|
+
"KnowledgeSearchService",
|
|
124
160
|
"LoadError",
|
|
125
|
-
"
|
|
126
|
-
"SearchError",
|
|
127
|
-
"AgenticSearchError",
|
|
161
|
+
"LoadResult",
|
|
128
162
|
# Loaders
|
|
129
163
|
"LoaderFactory",
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"
|
|
164
|
+
"Memory",
|
|
165
|
+
"MemoryConfigurationError",
|
|
166
|
+
"MemoryEntry",
|
|
167
|
+
# Memory Exceptions
|
|
168
|
+
"MemoryError",
|
|
169
|
+
# Memory Backend (for testing)
|
|
170
|
+
"MemoryIndexer",
|
|
171
|
+
"MemorySearcher",
|
|
172
|
+
"MemoryStats",
|
|
173
|
+
# Memory Domain Models
|
|
174
|
+
"MemoryStrategy",
|
|
175
|
+
"MemoryType",
|
|
176
|
+
"Message",
|
|
177
|
+
"Namespace",
|
|
135
178
|
"NeoreaderConfig",
|
|
136
|
-
|
|
137
|
-
"
|
|
138
|
-
"FixedSizeChunker",
|
|
179
|
+
"NeoreaderContentFetcher",
|
|
180
|
+
"OpenSearchAgenticSearcher",
|
|
139
181
|
# OpenSearch Backend
|
|
140
182
|
"OpenSearchConfig",
|
|
141
183
|
"OpenSearchIndexer",
|
|
142
184
|
"OpenSearchKnowledgeSearcher",
|
|
143
185
|
"OpenSearchSetupAdapter",
|
|
144
|
-
|
|
145
|
-
"
|
|
146
|
-
|
|
147
|
-
"
|
|
148
|
-
"
|
|
149
|
-
|
|
150
|
-
"
|
|
151
|
-
"
|
|
186
|
+
# Streaming Pipeline
|
|
187
|
+
"PipelineConfig",
|
|
188
|
+
"ReasoningStep",
|
|
189
|
+
"RecallResult",
|
|
190
|
+
"SearchError",
|
|
191
|
+
"SearchMode",
|
|
192
|
+
"SearchQuery",
|
|
193
|
+
"SearchResult",
|
|
194
|
+
"SearchResultItem",
|
|
195
|
+
# Chunkers
|
|
196
|
+
"SentenceChunker",
|
|
197
|
+
"SessionInfo",
|
|
198
|
+
"SessionNotFoundError",
|
|
199
|
+
"SitemapLoader",
|
|
200
|
+
"StrategyConfig",
|
|
201
|
+
"TextChunk",
|
|
202
|
+
"ValidationResult",
|
|
203
|
+
"WebsiteLoader",
|
|
152
204
|
]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
"""High-level API for knowledge operations."""
|
|
1
|
+
"""High-level API for knowledge and memory operations."""
|
|
2
2
|
|
|
3
3
|
from gnosisllm_knowledge.api.knowledge import Knowledge
|
|
4
|
+
from gnosisllm_knowledge.api.memory import Memory
|
|
4
5
|
|
|
5
|
-
__all__ = ["Knowledge"]
|
|
6
|
+
__all__ = ["Knowledge", "Memory"]
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
from collections.abc import Callable
|
|
6
7
|
from typing import TYPE_CHECKING, Any
|
|
7
8
|
|
|
8
9
|
from gnosisllm_knowledge.backends.opensearch import (
|
|
@@ -11,15 +12,24 @@ from gnosisllm_knowledge.backends.opensearch import (
|
|
|
11
12
|
OpenSearchKnowledgeSearcher,
|
|
12
13
|
OpenSearchSetupAdapter,
|
|
13
14
|
)
|
|
15
|
+
from gnosisllm_knowledge.backends.opensearch.agentic import OpenSearchAgenticSearcher
|
|
14
16
|
from gnosisllm_knowledge.chunking import SentenceChunker
|
|
15
17
|
from gnosisllm_knowledge.core.domain.result import IndexResult
|
|
16
|
-
from gnosisllm_knowledge.core.domain.search import
|
|
18
|
+
from gnosisllm_knowledge.core.domain.search import (
|
|
19
|
+
AgentType,
|
|
20
|
+
AgenticSearchQuery,
|
|
21
|
+
AgenticSearchResult,
|
|
22
|
+
SearchMode,
|
|
23
|
+
SearchResult,
|
|
24
|
+
)
|
|
17
25
|
from gnosisllm_knowledge.core.events.emitter import EventEmitter
|
|
18
26
|
from gnosisllm_knowledge.core.interfaces.setup import DiagnosticReport, HealthReport
|
|
27
|
+
from gnosisllm_knowledge.core.streaming.pipeline import PipelineConfig
|
|
19
28
|
from gnosisllm_knowledge.fetchers import NeoreaderContentFetcher
|
|
20
29
|
from gnosisllm_knowledge.fetchers.config import NeoreaderConfig
|
|
21
30
|
from gnosisllm_knowledge.loaders import LoaderFactory
|
|
22
31
|
from gnosisllm_knowledge.services import KnowledgeIndexingService, KnowledgeSearchService
|
|
32
|
+
from gnosisllm_knowledge.services.streaming_pipeline import StreamingIndexingPipeline
|
|
23
33
|
|
|
24
34
|
if TYPE_CHECKING:
|
|
25
35
|
from opensearchpy import AsyncOpenSearch
|
|
@@ -159,11 +169,12 @@ class Knowledge:
|
|
|
159
169
|
**kwargs,
|
|
160
170
|
)
|
|
161
171
|
|
|
162
|
-
# Create client
|
|
172
|
+
# Create client with proper timeout settings
|
|
163
173
|
client_kwargs: dict[str, Any] = {
|
|
164
174
|
"hosts": [{"host": config.host, "port": config.port}],
|
|
165
175
|
"use_ssl": config.use_ssl,
|
|
166
176
|
"verify_certs": config.verify_certs,
|
|
177
|
+
"timeout": max(config.read_timeout, config.agentic_timeout_seconds),
|
|
167
178
|
}
|
|
168
179
|
|
|
169
180
|
if config.username and config.password:
|
|
@@ -181,11 +192,16 @@ class Knowledge:
|
|
|
181
192
|
# Create fetcher
|
|
182
193
|
fetcher = None
|
|
183
194
|
if neoreader_url:
|
|
184
|
-
neoreader_config = NeoreaderConfig(
|
|
195
|
+
neoreader_config = NeoreaderConfig(host=neoreader_url)
|
|
185
196
|
fetcher = NeoreaderContentFetcher(neoreader_config)
|
|
186
197
|
|
|
187
|
-
# Create
|
|
188
|
-
|
|
198
|
+
# Create chunker
|
|
199
|
+
chunker = SentenceChunker()
|
|
200
|
+
|
|
201
|
+
# Create loader factory (fetcher is optional, defaults will be used if None)
|
|
202
|
+
loader_factory = None
|
|
203
|
+
if fetcher:
|
|
204
|
+
loader_factory = LoaderFactory(fetcher=fetcher, chunker=chunker)
|
|
189
205
|
|
|
190
206
|
return cls(
|
|
191
207
|
indexer=indexer,
|
|
@@ -335,9 +351,9 @@ class Knowledge:
|
|
|
335
351
|
|
|
336
352
|
# Auto-detect or use explicit source type
|
|
337
353
|
if source_type:
|
|
338
|
-
loader = self._loader_factory.create(source_type
|
|
354
|
+
loader = self._loader_factory.create(source_type)
|
|
339
355
|
else:
|
|
340
|
-
loader = self._loader_factory.create_for_source(source
|
|
356
|
+
loader = self._loader_factory.create_for_source(source)
|
|
341
357
|
|
|
342
358
|
# Create service for this load operation
|
|
343
359
|
service = KnowledgeIndexingService(
|
|
@@ -356,6 +372,95 @@ class Knowledge:
|
|
|
356
372
|
**options,
|
|
357
373
|
)
|
|
358
374
|
|
|
375
|
+
async def load_streaming(
|
|
376
|
+
self,
|
|
377
|
+
source: str,
|
|
378
|
+
*,
|
|
379
|
+
index_name: str | None = None,
|
|
380
|
+
account_id: str | None = None,
|
|
381
|
+
collection_id: str | None = None,
|
|
382
|
+
collection_name: str | None = None,
|
|
383
|
+
source_id: str | None = None,
|
|
384
|
+
url_batch_size: int = 50,
|
|
385
|
+
fetch_concurrency: int = 10,
|
|
386
|
+
index_batch_size: int = 100,
|
|
387
|
+
on_progress: Callable[[int, int], None] | None = None,
|
|
388
|
+
**options: Any,
|
|
389
|
+
) -> IndexResult:
|
|
390
|
+
"""Load and index content using streaming pipeline with bounded memory.
|
|
391
|
+
|
|
392
|
+
This method is optimized for large sitemaps (10,000+ URLs) that would
|
|
393
|
+
otherwise exhaust memory. It processes URLs in batches, indexing
|
|
394
|
+
documents immediately rather than loading all content first.
|
|
395
|
+
|
|
396
|
+
Memory usage is bounded and independent of sitemap size:
|
|
397
|
+
- URL storage: O(url_batch_size)
|
|
398
|
+
- Document storage: O(index_batch_size)
|
|
399
|
+
- In-flight fetches: O(fetch_concurrency * avg_page_size)
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
source: Sitemap URL.
|
|
403
|
+
index_name: Target index (uses default if not provided).
|
|
404
|
+
account_id: Account ID for multi-tenancy.
|
|
405
|
+
collection_id: Collection ID.
|
|
406
|
+
collection_name: Collection name for display.
|
|
407
|
+
source_id: Source ID (auto-generated if not provided).
|
|
408
|
+
url_batch_size: URLs to discover per batch (default 50).
|
|
409
|
+
fetch_concurrency: Parallel URL fetches (default 10).
|
|
410
|
+
index_batch_size: Documents per index batch (default 100).
|
|
411
|
+
on_progress: Optional progress callback (urls_processed, docs_indexed).
|
|
412
|
+
**options: Additional loading options (max_urls, patterns, etc.).
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Index result with counts.
|
|
416
|
+
|
|
417
|
+
Example:
|
|
418
|
+
```python
|
|
419
|
+
# Efficiently load 100k+ URL sitemap
|
|
420
|
+
result = await knowledge.load_streaming(
|
|
421
|
+
"https://large-site.com/sitemap.xml",
|
|
422
|
+
url_batch_size=100,
|
|
423
|
+
fetch_concurrency=20,
|
|
424
|
+
max_urls=50000,
|
|
425
|
+
)
|
|
426
|
+
print(f"Indexed {result.indexed_count} documents")
|
|
427
|
+
```
|
|
428
|
+
"""
|
|
429
|
+
if self._loader_factory is None:
|
|
430
|
+
raise ValueError("Loader factory not configured")
|
|
431
|
+
|
|
432
|
+
index = index_name or self._default_index
|
|
433
|
+
if not index:
|
|
434
|
+
raise ValueError("No index specified and no default index configured")
|
|
435
|
+
|
|
436
|
+
# Create sitemap loader specifically for streaming
|
|
437
|
+
loader = self._loader_factory.create("sitemap")
|
|
438
|
+
|
|
439
|
+
# Configure pipeline
|
|
440
|
+
config = PipelineConfig(
|
|
441
|
+
url_batch_size=url_batch_size,
|
|
442
|
+
fetch_concurrency=fetch_concurrency,
|
|
443
|
+
index_batch_size=index_batch_size,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Create streaming pipeline
|
|
447
|
+
pipeline = StreamingIndexingPipeline(
|
|
448
|
+
loader=loader,
|
|
449
|
+
indexer=self._indexer,
|
|
450
|
+
config=config,
|
|
451
|
+
events=self._events,
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
return await pipeline.execute(
|
|
455
|
+
source=source,
|
|
456
|
+
index_name=index,
|
|
457
|
+
account_id=account_id,
|
|
458
|
+
collection_id=collection_id,
|
|
459
|
+
collection_name=collection_name,
|
|
460
|
+
source_id=source_id,
|
|
461
|
+
**options,
|
|
462
|
+
)
|
|
463
|
+
|
|
359
464
|
# === Search Methods ===
|
|
360
465
|
|
|
361
466
|
async def search(
|
|
@@ -542,6 +647,181 @@ class Knowledge:
|
|
|
542
647
|
collection_id=collection_id,
|
|
543
648
|
)
|
|
544
649
|
|
|
650
|
+
# === Collection and Stats Methods ===
|
|
651
|
+
|
|
652
|
+
async def get_collections(self) -> list[dict[str, Any]]:
|
|
653
|
+
"""Get all collections with document counts.
|
|
654
|
+
|
|
655
|
+
Aggregates unique collection_ids from indexed documents.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
List of collection dictionaries with id, name, and document_count.
|
|
659
|
+
"""
|
|
660
|
+
return await self.search_service.get_collections()
|
|
661
|
+
|
|
662
|
+
async def get_stats(self) -> dict[str, Any]:
|
|
663
|
+
"""Get index statistics.
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
Dictionary with document_count, index_name, and other stats.
|
|
667
|
+
"""
|
|
668
|
+
return await self.search_service.get_stats()
|
|
669
|
+
|
|
670
|
+
async def list_documents(
|
|
671
|
+
self,
|
|
672
|
+
*,
|
|
673
|
+
source_id: str | None = None,
|
|
674
|
+
collection_id: str | None = None,
|
|
675
|
+
limit: int = 50,
|
|
676
|
+
offset: int = 0,
|
|
677
|
+
) -> dict[str, Any]:
|
|
678
|
+
"""List documents with optional filters.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
source_id: Optional source ID filter.
|
|
682
|
+
collection_id: Optional collection ID filter.
|
|
683
|
+
limit: Maximum documents to return (max 100).
|
|
684
|
+
offset: Number of documents to skip.
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
Dictionary with documents, total, limit, offset.
|
|
688
|
+
"""
|
|
689
|
+
index = self._default_index
|
|
690
|
+
if not index:
|
|
691
|
+
raise ValueError("No default index configured")
|
|
692
|
+
|
|
693
|
+
# Clamp limit to reasonable bounds
|
|
694
|
+
limit = min(max(1, limit), 100)
|
|
695
|
+
offset = max(0, offset)
|
|
696
|
+
|
|
697
|
+
return await self._searcher.list_documents(
|
|
698
|
+
index_name=index,
|
|
699
|
+
source_id=source_id,
|
|
700
|
+
collection_id=collection_id,
|
|
701
|
+
limit=limit,
|
|
702
|
+
offset=offset,
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
# === Agentic Search Status ===
|
|
706
|
+
|
|
707
|
+
@property
|
|
708
|
+
def is_agentic_configured(self) -> bool:
|
|
709
|
+
"""Check if agentic search is configured.
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
True if at least one agent type is configured.
|
|
713
|
+
"""
|
|
714
|
+
if not hasattr(self, '_searcher') or not hasattr(self._searcher, '_config'):
|
|
715
|
+
return False
|
|
716
|
+
config = self._searcher._config
|
|
717
|
+
return bool(config.flow_agent_id or config.conversational_agent_id)
|
|
718
|
+
|
|
719
|
+
async def get_agentic_status(self) -> dict[str, Any]:
|
|
720
|
+
"""Get status of agentic search configuration.
|
|
721
|
+
|
|
722
|
+
Returns:
|
|
723
|
+
Dictionary with agent availability status:
|
|
724
|
+
- available: True if any agent is configured
|
|
725
|
+
- flow_agent: True if flow agent is configured
|
|
726
|
+
- conversational_agent: True if conversational agent is configured
|
|
727
|
+
"""
|
|
728
|
+
if not hasattr(self, '_searcher') or not hasattr(self._searcher, '_config'):
|
|
729
|
+
return {
|
|
730
|
+
"available": False,
|
|
731
|
+
"flow_agent": False,
|
|
732
|
+
"conversational_agent": False,
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
config = self._searcher._config
|
|
736
|
+
return {
|
|
737
|
+
"available": bool(config.flow_agent_id or config.conversational_agent_id),
|
|
738
|
+
"flow_agent": bool(config.flow_agent_id),
|
|
739
|
+
"conversational_agent": bool(config.conversational_agent_id),
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
async def agentic_search(
|
|
743
|
+
self,
|
|
744
|
+
query: str,
|
|
745
|
+
*,
|
|
746
|
+
agent_type: AgentType = AgentType.FLOW,
|
|
747
|
+
index_name: str | None = None,
|
|
748
|
+
collection_ids: list[str] | None = None,
|
|
749
|
+
source_ids: list[str] | None = None,
|
|
750
|
+
conversation_id: str | None = None,
|
|
751
|
+
include_reasoning: bool = True,
|
|
752
|
+
limit: int = 10,
|
|
753
|
+
**options: Any,
|
|
754
|
+
) -> AgenticSearchResult:
|
|
755
|
+
"""Execute agentic search with AI-powered reasoning.
|
|
756
|
+
|
|
757
|
+
Uses OpenSearch ML agents to understand queries, retrieve relevant
|
|
758
|
+
documents, and generate natural language answers.
|
|
759
|
+
|
|
760
|
+
Args:
|
|
761
|
+
query: Search query text.
|
|
762
|
+
agent_type: Type of agent (FLOW for fast RAG, CONVERSATIONAL for multi-turn).
|
|
763
|
+
index_name: Index to search (uses default if not provided).
|
|
764
|
+
collection_ids: Filter by collection IDs.
|
|
765
|
+
source_ids: Filter by source IDs.
|
|
766
|
+
conversation_id: Conversation ID for multi-turn (conversational agent).
|
|
767
|
+
include_reasoning: Include reasoning steps in response.
|
|
768
|
+
limit: Maximum source documents to retrieve.
|
|
769
|
+
**options: Additional agent options.
|
|
770
|
+
|
|
771
|
+
Returns:
|
|
772
|
+
AgenticSearchResult with answer, reasoning steps, and sources.
|
|
773
|
+
|
|
774
|
+
Raises:
|
|
775
|
+
AgenticSearchError: If agent execution fails.
|
|
776
|
+
ValueError: If agentic search is not configured.
|
|
777
|
+
|
|
778
|
+
Example:
|
|
779
|
+
```python
|
|
780
|
+
result = await knowledge.agentic_search(
|
|
781
|
+
"How does authentication work?",
|
|
782
|
+
agent_type=AgentType.FLOW,
|
|
783
|
+
)
|
|
784
|
+
print(result.answer)
|
|
785
|
+
for source in result.items:
|
|
786
|
+
print(f"- {source.title}")
|
|
787
|
+
```
|
|
788
|
+
"""
|
|
789
|
+
# Check if agentic search is configured
|
|
790
|
+
if not self.is_agentic_configured:
|
|
791
|
+
raise ValueError(
|
|
792
|
+
"Agentic search is not configured. "
|
|
793
|
+
"Run 'gnosisllm-knowledge agentic setup' and set agent IDs in environment."
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
# Get client and config from the searcher
|
|
797
|
+
if not hasattr(self._searcher, '_client') or not hasattr(self._searcher, '_config'):
|
|
798
|
+
raise ValueError("Searcher does not have OpenSearch client/config")
|
|
799
|
+
|
|
800
|
+
client = self._searcher._client
|
|
801
|
+
config = self._searcher._config
|
|
802
|
+
|
|
803
|
+
# Create agentic searcher
|
|
804
|
+
agentic_searcher = OpenSearchAgenticSearcher(client, config)
|
|
805
|
+
|
|
806
|
+
# Build agentic query
|
|
807
|
+
agentic_query = AgenticSearchQuery(
|
|
808
|
+
text=query,
|
|
809
|
+
agent_type=agent_type,
|
|
810
|
+
collection_ids=collection_ids,
|
|
811
|
+
source_ids=source_ids,
|
|
812
|
+
conversation_id=conversation_id,
|
|
813
|
+
include_reasoning=include_reasoning,
|
|
814
|
+
limit=limit,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
# Determine index name
|
|
818
|
+
index = index_name or self._default_index
|
|
819
|
+
if not index:
|
|
820
|
+
raise ValueError("No index specified and no default index configured")
|
|
821
|
+
|
|
822
|
+
# Execute agentic search
|
|
823
|
+
return await agentic_searcher.agentic_search(agentic_query, index, **options)
|
|
824
|
+
|
|
545
825
|
async def close(self) -> None:
|
|
546
826
|
"""Close connections and clean up resources."""
|
|
547
827
|
# Subclasses or future implementations can override this
|