gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,349 @@
1
+ """Knowledge search service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from gnosisllm_knowledge.core.domain.search import (
9
+ SearchMode,
10
+ SearchQuery,
11
+ SearchResult,
12
+ )
13
+ from gnosisllm_knowledge.core.events.emitter import EventEmitter
14
+ from gnosisllm_knowledge.core.events.types import EventType
15
+ from gnosisllm_knowledge.core.exceptions import SearchError
16
+
17
+ if TYPE_CHECKING:
18
+ from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class KnowledgeSearchService:
24
+ """Service for searching knowledge documents.
25
+
26
+ Provides a high-level interface for semantic, keyword, and hybrid search.
27
+
28
+ Example:
29
+ ```python
30
+ service = KnowledgeSearchService(
31
+ searcher=OpenSearchKnowledgeSearcher(client, config, get_embedding),
32
+ )
33
+
34
+ # Semantic search
35
+ results = await service.search(
36
+ query="how to configure authentication",
37
+ mode=SearchMode.HYBRID,
38
+ collection_ids=["docs"],
39
+ )
40
+ ```
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ searcher: IKnowledgeSearcher,
46
+ default_index: str | None = None,
47
+ events: EventEmitter | None = None,
48
+ ) -> None:
49
+ """Initialize the search service.
50
+
51
+ Args:
52
+ searcher: Knowledge searcher implementation.
53
+ default_index: Default index name for searches.
54
+ events: Optional event emitter for tracking.
55
+ """
56
+ self._searcher = searcher
57
+ self._default_index = default_index
58
+ self._events = events or EventEmitter()
59
+
60
+ @property
61
+ def events(self) -> EventEmitter:
62
+ """Get the event emitter."""
63
+ return self._events
64
+
65
+ async def search(
66
+ self,
67
+ query: str,
68
+ *,
69
+ index_name: str | None = None,
70
+ mode: SearchMode = SearchMode.HYBRID,
71
+ limit: int = 10,
72
+ offset: int = 0,
73
+ account_id: str | None = None,
74
+ collection_ids: list[str] | None = None,
75
+ source_ids: list[str] | None = None,
76
+ min_score: float | None = None,
77
+ **options: Any,
78
+ ) -> SearchResult:
79
+ """Search for knowledge documents.
80
+
81
+ Args:
82
+ query: Search query text.
83
+ index_name: Index to search (uses default if not provided).
84
+ mode: Search mode (semantic, keyword, hybrid).
85
+ limit: Maximum results.
86
+ offset: Result offset for pagination.
87
+ account_id: Account ID for multi-tenancy.
88
+ collection_ids: Filter by collection IDs.
89
+ source_ids: Filter by source IDs.
90
+ min_score: Minimum score threshold.
91
+ **options: Additional search options.
92
+
93
+ Returns:
94
+ Search results.
95
+
96
+ Raises:
97
+ SearchError: If search fails.
98
+ """
99
+ index = index_name or self._default_index
100
+ if not index:
101
+ raise SearchError(message="No index specified and no default index configured")
102
+
103
+ search_query = SearchQuery(
104
+ text=query,
105
+ mode=mode,
106
+ limit=limit,
107
+ offset=offset,
108
+ account_id=account_id,
109
+ collection_ids=collection_ids,
110
+ source_ids=source_ids,
111
+ min_score=min_score,
112
+ )
113
+
114
+ try:
115
+ result = await self._searcher.search(search_query, index, **options)
116
+
117
+ # Emit search event
118
+ await self._events.emit_async(
119
+ EventType.SEARCH_COMPLETED,
120
+ {
121
+ "query": query,
122
+ "mode": mode.value,
123
+ "results_count": len(result.items),
124
+ "total_hits": result.total_hits,
125
+ "duration_ms": result.duration_ms,
126
+ },
127
+ )
128
+
129
+ return result
130
+
131
+ except Exception as e:
132
+ logger.error(f"Search failed: {e}")
133
+ raise SearchError(
134
+ message=f"Search failed: {e}",
135
+ details={"query": query[:100]},
136
+ cause=e,
137
+ ) from e
138
+
139
+ async def semantic_search(
140
+ self,
141
+ query: str,
142
+ *,
143
+ index_name: str | None = None,
144
+ limit: int = 10,
145
+ account_id: str | None = None,
146
+ collection_ids: list[str] | None = None,
147
+ **options: Any,
148
+ ) -> SearchResult:
149
+ """Execute semantic (vector) search.
150
+
151
+ Args:
152
+ query: Search query text.
153
+ index_name: Index to search.
154
+ limit: Maximum results.
155
+ account_id: Account ID for multi-tenancy.
156
+ collection_ids: Filter by collection IDs.
157
+ **options: Additional options.
158
+
159
+ Returns:
160
+ Search results.
161
+ """
162
+ return await self.search(
163
+ query=query,
164
+ index_name=index_name,
165
+ mode=SearchMode.SEMANTIC,
166
+ limit=limit,
167
+ account_id=account_id,
168
+ collection_ids=collection_ids,
169
+ **options,
170
+ )
171
+
172
+ async def keyword_search(
173
+ self,
174
+ query: str,
175
+ *,
176
+ index_name: str | None = None,
177
+ limit: int = 10,
178
+ account_id: str | None = None,
179
+ collection_ids: list[str] | None = None,
180
+ **options: Any,
181
+ ) -> SearchResult:
182
+ """Execute keyword (BM25) search.
183
+
184
+ Args:
185
+ query: Search query text.
186
+ index_name: Index to search.
187
+ limit: Maximum results.
188
+ account_id: Account ID for multi-tenancy.
189
+ collection_ids: Filter by collection IDs.
190
+ **options: Additional options.
191
+
192
+ Returns:
193
+ Search results.
194
+ """
195
+ return await self.search(
196
+ query=query,
197
+ index_name=index_name,
198
+ mode=SearchMode.KEYWORD,
199
+ limit=limit,
200
+ account_id=account_id,
201
+ collection_ids=collection_ids,
202
+ **options,
203
+ )
204
+
205
+ async def hybrid_search(
206
+ self,
207
+ query: str,
208
+ *,
209
+ index_name: str | None = None,
210
+ limit: int = 10,
211
+ account_id: str | None = None,
212
+ collection_ids: list[str] | None = None,
213
+ semantic_weight: float = 0.7,
214
+ keyword_weight: float = 0.3,
215
+ **options: Any,
216
+ ) -> SearchResult:
217
+ """Execute hybrid search (semantic + keyword).
218
+
219
+ Args:
220
+ query: Search query text.
221
+ index_name: Index to search.
222
+ limit: Maximum results.
223
+ account_id: Account ID for multi-tenancy.
224
+ collection_ids: Filter by collection IDs.
225
+ semantic_weight: Weight for semantic score.
226
+ keyword_weight: Weight for keyword score.
227
+ **options: Additional options.
228
+
229
+ Returns:
230
+ Search results.
231
+ """
232
+ return await self.search(
233
+ query=query,
234
+ index_name=index_name,
235
+ mode=SearchMode.HYBRID,
236
+ limit=limit,
237
+ account_id=account_id,
238
+ collection_ids=collection_ids,
239
+ semantic_weight=semantic_weight,
240
+ keyword_weight=keyword_weight,
241
+ **options,
242
+ )
243
+
244
+ async def find_similar(
245
+ self,
246
+ doc_id: str,
247
+ *,
248
+ index_name: str | None = None,
249
+ limit: int = 10,
250
+ **options: Any,
251
+ ) -> SearchResult:
252
+ """Find documents similar to a given document.
253
+
254
+ Args:
255
+ doc_id: Document ID to find similar documents for.
256
+ index_name: Index to search.
257
+ limit: Maximum results.
258
+ **options: Additional options.
259
+
260
+ Returns:
261
+ Search results.
262
+ """
263
+ index = index_name or self._default_index
264
+ if not index:
265
+ raise SearchError(message="No index specified")
266
+
267
+ return await self._searcher.get_similar_documents(doc_id, index, limit)
268
+
269
+ async def multi_search(
270
+ self,
271
+ queries: list[str],
272
+ *,
273
+ index_name: str | None = None,
274
+ mode: SearchMode = SearchMode.HYBRID,
275
+ limit: int = 10,
276
+ account_id: str | None = None,
277
+ **options: Any,
278
+ ) -> list[SearchResult]:
279
+ """Execute multiple searches in parallel.
280
+
281
+ Args:
282
+ queries: List of query texts.
283
+ index_name: Index to search.
284
+ mode: Search mode.
285
+ limit: Maximum results per query.
286
+ account_id: Account ID for multi-tenancy.
287
+ **options: Additional options.
288
+
289
+ Returns:
290
+ List of search results.
291
+ """
292
+ index = index_name or self._default_index
293
+ if not index:
294
+ raise SearchError(message="No index specified")
295
+
296
+ search_queries = [
297
+ SearchQuery(
298
+ text=query,
299
+ mode=mode,
300
+ limit=limit,
301
+ account_id=account_id,
302
+ )
303
+ for query in queries
304
+ ]
305
+
306
+ return await self._searcher.multi_search(search_queries, index)
307
+
308
+ async def get_embedding(self, text: str) -> list[float]:
309
+ """Get embedding vector for text.
310
+
311
+ Args:
312
+ text: Text to embed.
313
+
314
+ Returns:
315
+ Embedding vector.
316
+ """
317
+ return await self._searcher.get_embedding(text)
318
+
319
+ async def count(
320
+ self,
321
+ index_name: str | None = None,
322
+ account_id: str | None = None,
323
+ collection_id: str | None = None,
324
+ ) -> int:
325
+ """Count documents in index.
326
+
327
+ Args:
328
+ index_name: Index to count.
329
+ account_id: Filter by account.
330
+ collection_id: Filter by collection.
331
+
332
+ Returns:
333
+ Document count.
334
+ """
335
+ index = index_name or self._default_index
336
+ if not index:
337
+ raise SearchError(message="No index specified")
338
+
339
+ # Build count query
340
+ query = SearchQuery(
341
+ text="",
342
+ limit=0,
343
+ account_id=account_id,
344
+ collection_ids=[collection_id] if collection_id else None,
345
+ )
346
+
347
+ # Use a simple match_all to get total count
348
+ result = await self._searcher.search(query, index)
349
+ return result.total_hits