gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,152 @@
1
+ """GnosisLLM Knowledge - Enterprise-grade knowledge loading, indexing, and search.
2
+
3
+ This library provides a comprehensive solution for building knowledge-powered
4
+ applications with semantic search capabilities.
5
+
6
+ Quick Start:
7
+ ```python
8
+ from gnosisllm_knowledge import Knowledge
9
+
10
+ # Create instance with OpenSearch backend
11
+ knowledge = Knowledge.from_opensearch(
12
+ host="localhost",
13
+ port=9200,
14
+ )
15
+
16
+ # Setup backend (creates indices)
17
+ await knowledge.setup()
18
+
19
+ # Load and index a sitemap
20
+ await knowledge.load(
21
+ "https://docs.example.com/sitemap.xml",
22
+ collection_id="docs",
23
+ )
24
+
25
+ # Search
26
+ results = await knowledge.search("how to configure")
27
+ for item in results.items:
28
+ print(f"{item.title}: {item.score}")
29
+ ```
30
+
31
+ Features:
32
+ - Semantic, keyword, and hybrid search
33
+ - Multiple content loaders (website, sitemap, files)
34
+ - Intelligent text chunking
35
+ - OpenSearch backend with k-NN vectors
36
+ - Multi-tenancy support
37
+ - Event-driven architecture
38
+ - SOLID principles throughout
39
+ """
40
+
41
+ from gnosisllm_knowledge.api import Knowledge
42
+ from gnosisllm_knowledge.backends import (
43
+ AgenticSearchFallback,
44
+ MemoryIndexer,
45
+ MemorySearcher,
46
+ OpenSearchAgenticSearcher,
47
+ OpenSearchConfig,
48
+ OpenSearchIndexer,
49
+ OpenSearchKnowledgeSearcher,
50
+ OpenSearchSetupAdapter,
51
+ )
52
+ from gnosisllm_knowledge.chunking import FixedSizeChunker, SentenceChunker
53
+ from gnosisllm_knowledge.core.domain.document import Document, DocumentStatus, TextChunk
54
+ from gnosisllm_knowledge.core.domain.result import (
55
+ BatchResult,
56
+ IndexResult,
57
+ LoadResult,
58
+ ValidationResult,
59
+ )
60
+ from gnosisllm_knowledge.core.domain.search import (
61
+ AgentType,
62
+ AgenticSearchQuery,
63
+ AgenticSearchResult,
64
+ ReasoningStep,
65
+ SearchMode,
66
+ SearchQuery,
67
+ SearchResult,
68
+ SearchResultItem,
69
+ )
70
+ from gnosisllm_knowledge.core.events import Event, EventEmitter, EventType
71
+ from gnosisllm_knowledge.core.exceptions import (
72
+ AgenticSearchError,
73
+ ConfigurationError,
74
+ ConnectionError,
75
+ IndexError,
76
+ KnowledgeError,
77
+ LoadError,
78
+ SearchError,
79
+ )
80
+ from gnosisllm_knowledge.fetchers import (
81
+ HTTPContentFetcher,
82
+ NeoreaderContentFetcher,
83
+ NeoreaderConfig,
84
+ )
85
+ from gnosisllm_knowledge.loaders import (
86
+ LoaderFactory,
87
+ SitemapLoader,
88
+ WebsiteLoader,
89
+ )
90
+ from gnosisllm_knowledge.services import (
91
+ KnowledgeIndexingService,
92
+ KnowledgeSearchService,
93
+ )
94
+
95
+ __version__ = "0.2.0"
96
+
97
+ __all__ = [
98
+ # Main API
99
+ "Knowledge",
100
+ # Domain Models
101
+ "Document",
102
+ "DocumentStatus",
103
+ "TextChunk",
104
+ "SearchQuery",
105
+ "SearchResult",
106
+ "SearchResultItem",
107
+ "SearchMode",
108
+ "AgentType",
109
+ "AgenticSearchQuery",
110
+ "AgenticSearchResult",
111
+ "ReasoningStep",
112
+ "LoadResult",
113
+ "IndexResult",
114
+ "BatchResult",
115
+ "ValidationResult",
116
+ # Events
117
+ "Event",
118
+ "EventType",
119
+ "EventEmitter",
120
+ # Exceptions
121
+ "KnowledgeError",
122
+ "ConfigurationError",
123
+ "ConnectionError",
124
+ "LoadError",
125
+ "IndexError",
126
+ "SearchError",
127
+ "AgenticSearchError",
128
+ # Loaders
129
+ "LoaderFactory",
130
+ "WebsiteLoader",
131
+ "SitemapLoader",
132
+ # Fetchers
133
+ "HTTPContentFetcher",
134
+ "NeoreaderContentFetcher",
135
+ "NeoreaderConfig",
136
+ # Chunkers
137
+ "SentenceChunker",
138
+ "FixedSizeChunker",
139
+ # OpenSearch Backend
140
+ "OpenSearchConfig",
141
+ "OpenSearchIndexer",
142
+ "OpenSearchKnowledgeSearcher",
143
+ "OpenSearchSetupAdapter",
144
+ "OpenSearchAgenticSearcher",
145
+ "AgenticSearchFallback",
146
+ # Memory Backend (for testing)
147
+ "MemoryIndexer",
148
+ "MemorySearcher",
149
+ # Services
150
+ "KnowledgeIndexingService",
151
+ "KnowledgeSearchService",
152
+ ]
@@ -0,0 +1,5 @@
1
+ """High-level API for knowledge operations."""
2
+
3
+ from gnosisllm_knowledge.api.knowledge import Knowledge
4
+
5
+ __all__ = ["Knowledge"]
@@ -0,0 +1,548 @@
1
+ """High-level Knowledge API facade."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from gnosisllm_knowledge.backends.opensearch import (
9
+ OpenSearchConfig,
10
+ OpenSearchIndexer,
11
+ OpenSearchKnowledgeSearcher,
12
+ OpenSearchSetupAdapter,
13
+ )
14
+ from gnosisllm_knowledge.chunking import SentenceChunker
15
+ from gnosisllm_knowledge.core.domain.result import IndexResult
16
+ from gnosisllm_knowledge.core.domain.search import SearchMode, SearchResult
17
+ from gnosisllm_knowledge.core.events.emitter import EventEmitter
18
+ from gnosisllm_knowledge.core.interfaces.setup import DiagnosticReport, HealthReport
19
+ from gnosisllm_knowledge.fetchers import NeoreaderContentFetcher
20
+ from gnosisllm_knowledge.fetchers.config import NeoreaderConfig
21
+ from gnosisllm_knowledge.loaders import LoaderFactory
22
+ from gnosisllm_knowledge.services import KnowledgeIndexingService, KnowledgeSearchService
23
+
24
+ if TYPE_CHECKING:
25
+ from opensearchpy import AsyncOpenSearch
26
+
27
+ from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
28
+ from gnosisllm_knowledge.core.interfaces.fetcher import IContentFetcher
29
+ from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
30
+ from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
31
+ from gnosisllm_knowledge.core.interfaces.setup import ISetupAdapter
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class Knowledge:
37
+ """High-level facade for knowledge operations.
38
+
39
+ Provides a simple, unified interface for loading, indexing, and
40
+ searching knowledge documents.
41
+
42
+ Example:
43
+ ```python
44
+ # Quick start with OpenSearch
45
+ knowledge = Knowledge.from_opensearch(
46
+ host="localhost",
47
+ port=9200,
48
+ )
49
+
50
+ # Setup the backend
51
+ await knowledge.setup()
52
+
53
+ # Load and index a sitemap
54
+ await knowledge.load(
55
+ "https://docs.example.com/sitemap.xml",
56
+ collection_id="docs",
57
+ )
58
+
59
+ # Search
60
+ results = await knowledge.search("how to configure")
61
+ for item in results.items:
62
+ print(f"{item.title}: {item.score}")
63
+ ```
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ *,
69
+ indexer: IDocumentIndexer,
70
+ searcher: IKnowledgeSearcher,
71
+ setup: ISetupAdapter | None = None,
72
+ fetcher: IContentFetcher | None = None,
73
+ chunker: ITextChunker | None = None,
74
+ loader_factory: LoaderFactory | None = None,
75
+ default_index: str | None = None,
76
+ events: EventEmitter | None = None,
77
+ ) -> None:
78
+ """Initialize Knowledge with components.
79
+
80
+ Args:
81
+ indexer: Document indexer.
82
+ searcher: Knowledge searcher.
83
+ setup: Optional setup adapter.
84
+ fetcher: Optional content fetcher.
85
+ chunker: Optional text chunker.
86
+ loader_factory: Optional loader factory.
87
+ default_index: Default index name.
88
+ events: Optional event emitter.
89
+
90
+ Note:
91
+ Embeddings are generated automatically by OpenSearch ingest pipeline.
92
+ No Python-side embedding function is needed.
93
+ """
94
+ self._indexer = indexer
95
+ self._searcher = searcher
96
+ self._setup = setup
97
+ self._fetcher = fetcher
98
+ self._chunker = chunker or SentenceChunker()
99
+ self._loader_factory = loader_factory
100
+ self._default_index = default_index
101
+ self._events = events or EventEmitter()
102
+
103
+ # Initialize services lazily
104
+ self._indexing_service: KnowledgeIndexingService | None = None
105
+ self._search_service: KnowledgeSearchService | None = None
106
+
107
+ @classmethod
108
+ def from_opensearch(
109
+ cls,
110
+ host: str = "localhost",
111
+ port: int = 9200,
112
+ *,
113
+ username: str | None = None,
114
+ password: str | None = None,
115
+ use_ssl: bool = False,
116
+ verify_certs: bool = True,
117
+ neoreader_url: str | None = None,
118
+ config: OpenSearchConfig | None = None,
119
+ **kwargs: Any,
120
+ ) -> Knowledge:
121
+ """Create Knowledge instance with OpenSearch backend.
122
+
123
+ Args:
124
+ host: OpenSearch host.
125
+ port: OpenSearch port.
126
+ username: Optional username.
127
+ password: Optional password.
128
+ use_ssl: Use SSL connection.
129
+ verify_certs: Verify SSL certificates.
130
+ neoreader_url: Optional Neoreader URL for content fetching.
131
+ config: Optional OpenSearchConfig (overrides other params).
132
+ **kwargs: Additional config options.
133
+
134
+ Returns:
135
+ Configured Knowledge instance.
136
+
137
+ Note:
138
+ Embeddings are generated automatically by OpenSearch ingest pipeline.
139
+ Run 'gnosisllm-knowledge setup' to configure the ML model.
140
+ """
141
+ # Import OpenSearch client
142
+ try:
143
+ from opensearchpy import AsyncOpenSearch
144
+ except ImportError as e:
145
+ raise ImportError(
146
+ "opensearch-py is required for OpenSearch backend. "
147
+ "Install with: pip install gnosisllm-knowledge[opensearch]"
148
+ ) from e
149
+
150
+ # Build config
151
+ if config is None:
152
+ config = OpenSearchConfig(
153
+ host=host,
154
+ port=port,
155
+ username=username,
156
+ password=password,
157
+ use_ssl=use_ssl,
158
+ verify_certs=verify_certs,
159
+ **kwargs,
160
+ )
161
+
162
+ # Create client
163
+ client_kwargs: dict[str, Any] = {
164
+ "hosts": [{"host": config.host, "port": config.port}],
165
+ "use_ssl": config.use_ssl,
166
+ "verify_certs": config.verify_certs,
167
+ }
168
+
169
+ if config.username and config.password:
170
+ client_kwargs["http_auth"] = (config.username, config.password)
171
+
172
+ client = AsyncOpenSearch(**client_kwargs)
173
+
174
+ # Create components
175
+ # Embeddings are generated automatically by OpenSearch ingest pipeline.
176
+ # No Python-side embedding function needed.
177
+ indexer = OpenSearchIndexer(client, config)
178
+ searcher = OpenSearchKnowledgeSearcher(client, config)
179
+ setup = OpenSearchSetupAdapter(client, config)
180
+
181
+ # Create fetcher
182
+ fetcher = None
183
+ if neoreader_url:
184
+ neoreader_config = NeoreaderConfig(base_url=neoreader_url)
185
+ fetcher = NeoreaderContentFetcher(neoreader_config)
186
+
187
+ # Create loader factory
188
+ loader_factory = LoaderFactory(default_fetcher=fetcher)
189
+
190
+ return cls(
191
+ indexer=indexer,
192
+ searcher=searcher,
193
+ setup=setup,
194
+ fetcher=fetcher,
195
+ loader_factory=loader_factory,
196
+ default_index=config.knowledge_index_name,
197
+ )
198
+
199
+ @classmethod
200
+ def from_env(cls) -> Knowledge:
201
+ """Create Knowledge instance from environment variables.
202
+
203
+ Returns:
204
+ Configured Knowledge instance.
205
+ """
206
+ config = OpenSearchConfig.from_env()
207
+ neoreader_config = NeoreaderConfig.from_env()
208
+
209
+ return cls.from_opensearch(
210
+ config=config,
211
+ neoreader_url=neoreader_config.base_url if neoreader_config.base_url else None,
212
+ )
213
+
214
+ @property
215
+ def events(self) -> EventEmitter:
216
+ """Get the event emitter."""
217
+ return self._events
218
+
219
+ @property
220
+ def indexing(self) -> KnowledgeIndexingService:
221
+ """Get the indexing service."""
222
+ if self._indexing_service is None:
223
+ if self._loader_factory is None:
224
+ raise ValueError("Loader factory not configured")
225
+
226
+ # Get a default loader
227
+ loader = self._loader_factory.create("sitemap")
228
+
229
+ self._indexing_service = KnowledgeIndexingService(
230
+ loader=loader,
231
+ chunker=self._chunker,
232
+ indexer=self._indexer,
233
+ events=self._events,
234
+ )
235
+
236
+ return self._indexing_service
237
+
238
+ @property
239
+ def search_service(self) -> KnowledgeSearchService:
240
+ """Get the search service."""
241
+ if self._search_service is None:
242
+ self._search_service = KnowledgeSearchService(
243
+ searcher=self._searcher,
244
+ default_index=self._default_index,
245
+ events=self._events,
246
+ )
247
+
248
+ return self._search_service
249
+
250
+ # === Setup Methods ===
251
+
252
+ async def setup(self, **options: Any) -> bool:
253
+ """Set up the backend (create indices, pipelines, etc.).
254
+
255
+ Args:
256
+ **options: Setup options.
257
+
258
+ Returns:
259
+ True if setup succeeded.
260
+ """
261
+ if not self._setup:
262
+ logger.warning("No setup adapter configured")
263
+ return False
264
+
265
+ result = await self._setup.setup(**options)
266
+ return result.success
267
+
268
+ async def health_check(self) -> bool:
269
+ """Quick health check.
270
+
271
+ Returns:
272
+ True if backend is healthy.
273
+ """
274
+ if not self._setup:
275
+ return False
276
+ return await self._setup.health_check()
277
+
278
+ async def deep_health_check(self) -> HealthReport:
279
+ """Comprehensive health check.
280
+
281
+ Returns:
282
+ Detailed health report.
283
+ """
284
+ if not self._setup:
285
+ raise ValueError("No setup adapter configured")
286
+ return await self._setup.deep_health_check()
287
+
288
+ async def diagnose(self) -> DiagnosticReport:
289
+ """Run diagnostics.
290
+
291
+ Returns:
292
+ Diagnostic report with recommendations.
293
+ """
294
+ if not self._setup:
295
+ raise ValueError("No setup adapter configured")
296
+ return await self._setup.diagnose()
297
+
298
+ # === Loading Methods ===
299
+
300
+ async def load(
301
+ self,
302
+ source: str,
303
+ *,
304
+ index_name: str | None = None,
305
+ account_id: str | None = None,
306
+ collection_id: str | None = None,
307
+ source_id: str | None = None,
308
+ source_type: str | None = None,
309
+ on_progress: Callable[[int, int], None] | None = None,
310
+ **options: Any,
311
+ ) -> IndexResult:
312
+ """Load and index content from a source.
313
+
314
+ Automatically detects source type (sitemap, website, etc.).
315
+
316
+ Args:
317
+ source: Source URL or path.
318
+ index_name: Target index (uses default if not provided).
319
+ account_id: Account ID for multi-tenancy.
320
+ collection_id: Collection ID.
321
+ source_id: Source ID (auto-generated if not provided).
322
+ source_type: Explicit source type (auto-detected if not provided).
323
+ on_progress: Optional progress callback (current, total).
324
+ **options: Additional loading options.
325
+
326
+ Returns:
327
+ Index result with counts.
328
+ """
329
+ if self._loader_factory is None:
330
+ raise ValueError("Loader factory not configured")
331
+
332
+ index = index_name or self._default_index
333
+ if not index:
334
+ raise ValueError("No index specified and no default index configured")
335
+
336
+ # Auto-detect or use explicit source type
337
+ if source_type:
338
+ loader = self._loader_factory.create(source_type, self._fetcher)
339
+ else:
340
+ loader = self._loader_factory.create_for_source(source, self._fetcher)
341
+
342
+ # Create service for this load operation
343
+ service = KnowledgeIndexingService(
344
+ loader=loader,
345
+ chunker=self._chunker,
346
+ indexer=self._indexer,
347
+ events=self._events,
348
+ )
349
+
350
+ return await service.load_and_index(
351
+ source=source,
352
+ index_name=index,
353
+ account_id=account_id,
354
+ collection_id=collection_id,
355
+ source_id=source_id,
356
+ **options,
357
+ )
358
+
359
+ # === Search Methods ===
360
+
361
+ async def search(
362
+ self,
363
+ query: str,
364
+ *,
365
+ index_name: str | None = None,
366
+ mode: SearchMode = SearchMode.HYBRID,
367
+ limit: int = 10,
368
+ offset: int = 0,
369
+ account_id: str | None = None,
370
+ collection_ids: list[str] | None = None,
371
+ source_ids: list[str] | None = None,
372
+ min_score: float | None = None,
373
+ **options: Any,
374
+ ) -> SearchResult:
375
+ """Search for knowledge documents.
376
+
377
+ Args:
378
+ query: Search query text.
379
+ index_name: Index to search (uses default if not provided).
380
+ mode: Search mode (semantic, keyword, hybrid).
381
+ limit: Maximum results.
382
+ offset: Result offset for pagination.
383
+ account_id: Account ID for multi-tenancy.
384
+ collection_ids: Filter by collection IDs.
385
+ source_ids: Filter by source IDs.
386
+ min_score: Minimum score threshold.
387
+ **options: Additional search options.
388
+
389
+ Returns:
390
+ Search results.
391
+ """
392
+ return await self.search_service.search(
393
+ query=query,
394
+ index_name=index_name,
395
+ mode=mode,
396
+ limit=limit,
397
+ offset=offset,
398
+ account_id=account_id,
399
+ collection_ids=collection_ids,
400
+ source_ids=source_ids,
401
+ min_score=min_score,
402
+ **options,
403
+ )
404
+
405
+ async def semantic_search(
406
+ self,
407
+ query: str,
408
+ *,
409
+ limit: int = 10,
410
+ **options: Any,
411
+ ) -> SearchResult:
412
+ """Execute semantic (vector) search.
413
+
414
+ Args:
415
+ query: Search query.
416
+ limit: Maximum results.
417
+ **options: Additional options.
418
+
419
+ Returns:
420
+ Search results.
421
+ """
422
+ return await self.search_service.semantic_search(
423
+ query=query,
424
+ limit=limit,
425
+ **options,
426
+ )
427
+
428
+ async def keyword_search(
429
+ self,
430
+ query: str,
431
+ *,
432
+ limit: int = 10,
433
+ **options: Any,
434
+ ) -> SearchResult:
435
+ """Execute keyword (BM25) search.
436
+
437
+ Args:
438
+ query: Search query.
439
+ limit: Maximum results.
440
+ **options: Additional options.
441
+
442
+ Returns:
443
+ Search results.
444
+ """
445
+ return await self.search_service.keyword_search(
446
+ query=query,
447
+ limit=limit,
448
+ **options,
449
+ )
450
+
451
+ async def find_similar(
452
+ self,
453
+ doc_id: str,
454
+ *,
455
+ limit: int = 10,
456
+ **options: Any,
457
+ ) -> SearchResult:
458
+ """Find documents similar to a given document.
459
+
460
+ Args:
461
+ doc_id: Document ID.
462
+ limit: Maximum results.
463
+ **options: Additional options.
464
+
465
+ Returns:
466
+ Search results.
467
+ """
468
+ return await self.search_service.find_similar(
469
+ doc_id=doc_id,
470
+ limit=limit,
471
+ **options,
472
+ )
473
+
474
+ # === Management Methods ===
475
+
476
+ async def delete_source(
477
+ self,
478
+ source_id: str,
479
+ *,
480
+ index_name: str | None = None,
481
+ account_id: str | None = None,
482
+ ) -> int:
483
+ """Delete all documents from a source.
484
+
485
+ Args:
486
+ source_id: Source ID to delete.
487
+ index_name: Index name.
488
+ account_id: Account ID for multi-tenancy.
489
+
490
+ Returns:
491
+ Count of deleted documents.
492
+ """
493
+ index = index_name or self._default_index
494
+ if not index:
495
+ raise ValueError("No index specified")
496
+
497
+ return await self.indexing.delete_source(source_id, index, account_id)
498
+
499
+ async def delete_collection(
500
+ self,
501
+ collection_id: str,
502
+ *,
503
+ index_name: str | None = None,
504
+ account_id: str | None = None,
505
+ ) -> int:
506
+ """Delete all documents from a collection.
507
+
508
+ Args:
509
+ collection_id: Collection ID to delete.
510
+ index_name: Index name.
511
+ account_id: Account ID for multi-tenancy.
512
+
513
+ Returns:
514
+ Count of deleted documents.
515
+ """
516
+ index = index_name or self._default_index
517
+ if not index:
518
+ raise ValueError("No index specified")
519
+
520
+ return await self.indexing.delete_collection(collection_id, index, account_id)
521
+
522
+ async def count(
523
+ self,
524
+ *,
525
+ index_name: str | None = None,
526
+ account_id: str | None = None,
527
+ collection_id: str | None = None,
528
+ ) -> int:
529
+ """Count documents.
530
+
531
+ Args:
532
+ index_name: Index to count.
533
+ account_id: Filter by account.
534
+ collection_id: Filter by collection.
535
+
536
+ Returns:
537
+ Document count.
538
+ """
539
+ return await self.search_service.count(
540
+ index_name=index_name,
541
+ account_id=account_id,
542
+ collection_id=collection_id,
543
+ )
544
+
545
+ async def close(self) -> None:
546
+ """Close connections and clean up resources."""
547
+ # Subclasses or future implementations can override this
548
+ pass