gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,407 @@
1
+ """Exception hierarchy for gnosisllm-knowledge."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ class KnowledgeError(Exception):
9
+ """Base exception for gnosisllm-knowledge.
10
+
11
+ All library exceptions inherit from this class.
12
+
13
+ Attributes:
14
+ message: Human-readable error message.
15
+ code: Machine-readable error code.
16
+ details: Additional error details.
17
+ cause: Original exception that caused this error.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ message: str,
23
+ *,
24
+ code: str | None = None,
25
+ details: dict[str, Any] | None = None,
26
+ cause: Exception | None = None,
27
+ ) -> None:
28
+ """Initialize the exception.
29
+
30
+ Args:
31
+ message: Human-readable error message.
32
+ code: Machine-readable error code.
33
+ details: Additional error details.
34
+ cause: Original exception that caused this error.
35
+ """
36
+ super().__init__(message)
37
+ self.message = message
38
+ self.code = code
39
+ self.details = details or {}
40
+ self.cause = cause
41
+
42
+ def __str__(self) -> str:
43
+ """Return string representation."""
44
+ parts = [self.message]
45
+ if self.code:
46
+ parts.append(f"[{self.code}]")
47
+ if self.cause:
48
+ parts.append(f"(caused by: {self.cause})")
49
+ return " ".join(parts)
50
+
51
+ def to_dict(self) -> dict[str, Any]:
52
+ """Convert to dictionary for serialization."""
53
+ return {
54
+ "error": self.__class__.__name__,
55
+ "message": self.message,
56
+ "code": self.code,
57
+ "details": self.details,
58
+ }
59
+
60
+
61
+ class ConfigurationError(KnowledgeError):
62
+ """Invalid or missing configuration.
63
+
64
+ Raised when required configuration is missing or invalid.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ message: str,
70
+ *,
71
+ config_key: str | None = None,
72
+ **kwargs: Any,
73
+ ) -> None:
74
+ super().__init__(message, **kwargs)
75
+ self.config_key = config_key
76
+ if config_key:
77
+ self.details["config_key"] = config_key
78
+
79
+
80
+ class ConnectionError(KnowledgeError):
81
+ """Failed to connect to backend.
82
+
83
+ Raised when unable to establish connection to a service.
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ message: str,
89
+ *,
90
+ host: str | None = None,
91
+ port: int | None = None,
92
+ **kwargs: Any,
93
+ ) -> None:
94
+ super().__init__(message, **kwargs)
95
+ self.host = host
96
+ self.port = port
97
+ if host:
98
+ self.details["host"] = host
99
+ if port:
100
+ self.details["port"] = port
101
+
102
+
103
+ class AuthenticationError(KnowledgeError):
104
+ """Authentication failed.
105
+
106
+ Raised when authentication to a service fails.
107
+ """
108
+
109
+ pass
110
+
111
+
112
+ class AuthorizationError(KnowledgeError):
113
+ """Authorization denied.
114
+
115
+ Raised when a user doesn't have permission to perform an operation.
116
+ """
117
+
118
+ def __init__(
119
+ self,
120
+ message: str,
121
+ *,
122
+ required_permission: str | None = None,
123
+ resource: str | None = None,
124
+ **kwargs: Any,
125
+ ) -> None:
126
+ super().__init__(message, **kwargs)
127
+ self.required_permission = required_permission
128
+ self.resource = resource
129
+ if required_permission:
130
+ self.details["required_permission"] = required_permission
131
+ if resource:
132
+ self.details["resource"] = resource
133
+
134
+
135
+ class LoadError(KnowledgeError):
136
+ """Failed to load content.
137
+
138
+ Raised when content loading fails (fetch error, parse error, etc.).
139
+ """
140
+
141
+ def __init__(
142
+ self,
143
+ message: str,
144
+ *,
145
+ source: str,
146
+ **kwargs: Any,
147
+ ) -> None:
148
+ super().__init__(f"Failed to load '{source}': {message}", **kwargs)
149
+ self.source = source
150
+ self.details["source"] = source
151
+
152
+
153
+ class FetchError(LoadError):
154
+ """Failed to fetch content from URL.
155
+
156
+ More specific than LoadError, for HTTP/network failures.
157
+ """
158
+
159
+ def __init__(
160
+ self,
161
+ message: str,
162
+ *,
163
+ source: str,
164
+ status_code: int | None = None,
165
+ **kwargs: Any,
166
+ ) -> None:
167
+ super().__init__(message, source=source, **kwargs)
168
+ self.status_code = status_code
169
+ if status_code:
170
+ self.details["status_code"] = status_code
171
+
172
+
173
+ class ValidationError(KnowledgeError):
174
+ """Content validation failed.
175
+
176
+ Raised when document content fails validation rules.
177
+ """
178
+
179
+ def __init__(
180
+ self,
181
+ message: str,
182
+ *,
183
+ field: str | None = None,
184
+ value: Any = None,
185
+ errors: list[str] | None = None,
186
+ **kwargs: Any,
187
+ ) -> None:
188
+ super().__init__(message, **kwargs)
189
+ self.field = field
190
+ self.value = value
191
+ self.errors = errors or []
192
+ if field:
193
+ self.details["field"] = field
194
+ if errors:
195
+ self.details["errors"] = errors
196
+
197
+
198
+ class IndexError(KnowledgeError):
199
+ """Failed to index documents.
200
+
201
+ Raised when document indexing fails.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ message: str,
207
+ *,
208
+ index_name: str | None = None,
209
+ doc_count: int = 0,
210
+ failed_count: int = 0,
211
+ **kwargs: Any,
212
+ ) -> None:
213
+ super().__init__(message, **kwargs)
214
+ self.index_name = index_name
215
+ self.doc_count = doc_count
216
+ self.failed_count = failed_count
217
+ if index_name:
218
+ self.details["index_name"] = index_name
219
+ self.details["doc_count"] = doc_count
220
+ self.details["failed_count"] = failed_count
221
+
222
+
223
+ class SearchError(KnowledgeError):
224
+ """Failed to execute search.
225
+
226
+ Raised when search operations fail.
227
+ """
228
+
229
+ def __init__(
230
+ self,
231
+ message: str,
232
+ *,
233
+ query: str | None = None,
234
+ index_name: str | None = None,
235
+ **kwargs: Any,
236
+ ) -> None:
237
+ super().__init__(message, **kwargs)
238
+ self.query = query
239
+ self.index_name = index_name
240
+ if query:
241
+ self.details["query"] = query
242
+ if index_name:
243
+ self.details["index_name"] = index_name
244
+
245
+
246
+ class AgenticSearchError(SearchError):
247
+ """Failed to execute agentic search.
248
+
249
+ Raised when AI agent-powered search operations fail.
250
+ This includes agent execution failures, LLM errors, and timeouts.
251
+ """
252
+
253
+ def __init__(
254
+ self,
255
+ message: str,
256
+ *,
257
+ agent_id: str | None = None,
258
+ agent_type: str | None = None,
259
+ conversation_id: str | None = None,
260
+ iteration: int | None = None,
261
+ **kwargs: Any,
262
+ ) -> None:
263
+ super().__init__(message, **kwargs)
264
+ self.agent_id = agent_id
265
+ self.agent_type = agent_type
266
+ self.conversation_id = conversation_id
267
+ self.iteration = iteration
268
+ if agent_id:
269
+ self.details["agent_id"] = agent_id
270
+ if agent_type:
271
+ self.details["agent_type"] = agent_type
272
+ if conversation_id:
273
+ self.details["conversation_id"] = conversation_id
274
+ if iteration is not None:
275
+ self.details["iteration"] = iteration
276
+
277
+
278
+ class EmbeddingError(KnowledgeError):
279
+ """Failed to generate embeddings.
280
+
281
+ Raised when embedding generation fails.
282
+ """
283
+
284
+ def __init__(
285
+ self,
286
+ message: str,
287
+ *,
288
+ model: str | None = None,
289
+ text_length: int = 0,
290
+ **kwargs: Any,
291
+ ) -> None:
292
+ super().__init__(message, **kwargs)
293
+ self.model = model
294
+ self.text_length = text_length
295
+ if model:
296
+ self.details["model"] = model
297
+ self.details["text_length"] = text_length
298
+
299
+
300
+ class SetupError(KnowledgeError):
301
+ """Failed during setup.
302
+
303
+ Raised when backend setup fails.
304
+ """
305
+
306
+ def __init__(
307
+ self,
308
+ message: str,
309
+ *,
310
+ step: str,
311
+ **kwargs: Any,
312
+ ) -> None:
313
+ super().__init__(f"Setup failed at '{step}': {message}", **kwargs)
314
+ self.step = step
315
+ self.details["step"] = step
316
+
317
+
318
+ class TimeoutError(KnowledgeError):
319
+ """Operation timed out.
320
+
321
+ Raised when an operation exceeds its timeout.
322
+ """
323
+
324
+ def __init__(
325
+ self,
326
+ message: str = "Operation timed out",
327
+ *,
328
+ timeout: float | None = None,
329
+ operation: str | None = None,
330
+ **kwargs: Any,
331
+ ) -> None:
332
+ super().__init__(message, **kwargs)
333
+ self.timeout = timeout
334
+ self.operation = operation
335
+ if timeout:
336
+ self.details["timeout"] = timeout
337
+ if operation:
338
+ self.details["operation"] = operation
339
+
340
+
341
+ class CircuitBreakerOpenError(KnowledgeError):
342
+ """Circuit breaker is open.
343
+
344
+ Raised when a circuit breaker is open and rejecting requests.
345
+ """
346
+
347
+ def __init__(
348
+ self,
349
+ message: str = "Circuit breaker is open",
350
+ *,
351
+ recovery_time: float | None = None,
352
+ component: str | None = None,
353
+ **kwargs: Any,
354
+ ) -> None:
355
+ super().__init__(message, **kwargs)
356
+ self.recovery_time = recovery_time
357
+ self.component = component
358
+ if recovery_time:
359
+ self.details["recovery_time"] = recovery_time
360
+ if component:
361
+ self.details["component"] = component
362
+
363
+
364
+ class RateLimitError(KnowledgeError):
365
+ """Rate limit exceeded.
366
+
367
+ Raised when API rate limits are exceeded.
368
+ """
369
+
370
+ def __init__(
371
+ self,
372
+ message: str = "Rate limit exceeded",
373
+ *,
374
+ retry_after: float | None = None,
375
+ limit: int | None = None,
376
+ **kwargs: Any,
377
+ ) -> None:
378
+ super().__init__(message, **kwargs)
379
+ self.retry_after = retry_after
380
+ self.limit = limit
381
+ if retry_after:
382
+ self.details["retry_after"] = retry_after
383
+ if limit:
384
+ self.details["limit"] = limit
385
+
386
+
387
+ class DocumentNotFoundError(KnowledgeError):
388
+ """Document not found.
389
+
390
+ Raised when a document cannot be found.
391
+ """
392
+
393
+ def __init__(
394
+ self,
395
+ message: str = "Document not found",
396
+ *,
397
+ doc_id: str | None = None,
398
+ index_name: str | None = None,
399
+ **kwargs: Any,
400
+ ) -> None:
401
+ super().__init__(message, **kwargs)
402
+ self.doc_id = doc_id
403
+ self.index_name = index_name
404
+ if doc_id:
405
+ self.details["doc_id"] = doc_id
406
+ if index_name:
407
+ self.details["index_name"] = index_name
@@ -0,0 +1,20 @@
1
+ """Interface definitions (protocols) for dependency injection."""
2
+
3
+ from gnosisllm_knowledge.core.interfaces.agentic import IAgenticSearcher
4
+ from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
5
+ from gnosisllm_knowledge.core.interfaces.fetcher import FetchResult, IContentFetcher
6
+ from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
7
+ from gnosisllm_knowledge.core.interfaces.loader import IContentLoader
8
+ from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
9
+ from gnosisllm_knowledge.core.interfaces.setup import ISetupAdapter
10
+
11
+ __all__ = [
12
+ "IContentLoader",
13
+ "IContentFetcher",
14
+ "FetchResult",
15
+ "ITextChunker",
16
+ "IDocumentIndexer",
17
+ "IKnowledgeSearcher",
18
+ "IAgenticSearcher",
19
+ "ISetupAdapter",
20
+ ]
@@ -0,0 +1,136 @@
1
+ """Agentic searcher protocol - Interface for AI-powered search operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
6
+
7
+ if TYPE_CHECKING:
8
+ from gnosisllm_knowledge.core.domain.search import (
9
+ AgenticSearchQuery,
10
+ AgenticSearchResult,
11
+ )
12
+
13
+
14
+ @runtime_checkable
15
+ class IAgenticSearcher(Protocol):
16
+ """Protocol for agentic search operations using AI agents.
17
+
18
+ Agentic searchers are responsible for:
19
+ - Understanding natural language queries
20
+ - Automatically constructing optimal search strategies
21
+ - Generating context-aware answers from retrieved documents
22
+ - Supporting multi-turn conversations with memory
23
+
24
+ Implementations should provide AI-powered search capabilities
25
+ that go beyond traditional search by understanding user intent
26
+ and generating comprehensive answers.
27
+ """
28
+
29
+ @property
30
+ def is_configured(self) -> bool:
31
+ """Check if agentic search is properly configured.
32
+
33
+ Returns:
34
+ True if all required agents and models are configured.
35
+ """
36
+ ...
37
+
38
+ @property
39
+ def flow_agent_available(self) -> bool:
40
+ """Check if flow agent is available.
41
+
42
+ Returns:
43
+ True if flow agent can be used.
44
+ """
45
+ ...
46
+
47
+ @property
48
+ def conversational_agent_available(self) -> bool:
49
+ """Check if conversational agent is available.
50
+
51
+ Returns:
52
+ True if conversational agent can be used.
53
+ """
54
+ ...
55
+
56
+ async def agentic_search(
57
+ self,
58
+ query: AgenticSearchQuery,
59
+ index_name: str,
60
+ **options: Any,
61
+ ) -> AgenticSearchResult:
62
+ """Execute agentic search with agent orchestration.
63
+
64
+ The agent will:
65
+ 1. Analyze the query to understand user intent
66
+ 2. Search for relevant documents
67
+ 3. Generate a comprehensive answer with citations
68
+ 4. (Optional) Maintain conversation memory
69
+
70
+ Args:
71
+ query: Agentic search query with agent type and context.
72
+ index_name: Target index name.
73
+ **options: Additional agent options.
74
+
75
+ Returns:
76
+ AgenticSearchResult with answer, reasoning, and sources.
77
+ """
78
+ ...
79
+
80
+ async def get_conversation(
81
+ self,
82
+ conversation_id: str,
83
+ ) -> list[dict[str, Any]]:
84
+ """Get conversation history for multi-turn searches.
85
+
86
+ Args:
87
+ conversation_id: Conversation identifier.
88
+
89
+ Returns:
90
+ List of conversation messages with role, content, and metadata.
91
+ """
92
+ ...
93
+
94
+ async def clear_conversation(
95
+ self,
96
+ conversation_id: str,
97
+ ) -> bool:
98
+ """Clear conversation history.
99
+
100
+ Args:
101
+ conversation_id: Conversation to clear.
102
+
103
+ Returns:
104
+ True if cleared successfully, False if not found.
105
+ """
106
+ ...
107
+
108
+ async def list_conversations(
109
+ self,
110
+ account_id: str | None = None,
111
+ limit: int = 100,
112
+ ) -> list[dict[str, Any]]:
113
+ """List active conversations.
114
+
115
+ Args:
116
+ account_id: Filter by account (multi-tenant).
117
+ limit: Maximum number of conversations.
118
+
119
+ Returns:
120
+ List of conversation metadata dicts.
121
+ """
122
+ ...
123
+
124
+ async def get_agent_status(
125
+ self,
126
+ agent_id: str,
127
+ ) -> dict[str, Any] | None:
128
+ """Get status of an agent.
129
+
130
+ Args:
131
+ agent_id: Agent identifier.
132
+
133
+ Returns:
134
+ Agent status info or None if not found.
135
+ """
136
+ ...
@@ -0,0 +1,64 @@
1
+ """Text chunker protocol - Single Responsibility Principle."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
6
+
7
+ if TYPE_CHECKING:
8
+ from gnosisllm_knowledge.core.domain.document import TextChunk
9
+
10
+
11
+ @runtime_checkable
12
+ class ITextChunker(Protocol):
13
+ """Protocol for chunking text into smaller pieces.
14
+
15
+ Text chunkers are responsible for:
16
+ - Splitting large text into embedding-friendly chunks
17
+ - Preserving semantic boundaries (sentences, paragraphs)
18
+ - Handling overlap between chunks
19
+ - Maintaining position information
20
+
21
+ Implementations should follow the Single Responsibility Principle
22
+ and handle only text chunking, not fetching or indexing.
23
+ """
24
+
25
+ @property
26
+ def name(self) -> str:
27
+ """Return the chunker name for identification."""
28
+ ...
29
+
30
+ @property
31
+ def chunk_size(self) -> int:
32
+ """Return the target chunk size in characters."""
33
+ ...
34
+
35
+ @property
36
+ def chunk_overlap(self) -> int:
37
+ """Return the overlap between chunks in characters."""
38
+ ...
39
+
40
+ def chunk(self, text: str, **options: Any) -> list[TextChunk]:
41
+ """Split text into chunks suitable for embedding.
42
+
43
+ Args:
44
+ text: The text to chunk.
45
+ **options: Chunker-specific options like:
46
+ - chunk_size: Override default chunk size
47
+ - chunk_overlap: Override default overlap
48
+ - preserve_sentences: Keep sentences intact
49
+
50
+ Returns:
51
+ List of TextChunk objects with content and position info.
52
+ """
53
+ ...
54
+
55
+ def estimate_chunks(self, text: str) -> int:
56
+ """Estimate the number of chunks that would be created.
57
+
58
+ Args:
59
+ text: The text to estimate.
60
+
61
+ Returns:
62
+ Estimated number of chunks.
63
+ """
64
+ ...