gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,216 @@
1
+ """Event emitter for knowledge module (Observer pattern)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import contextlib
7
+ import logging
8
+ from collections.abc import Awaitable, Callable
9
+ from typing import TYPE_CHECKING, TypeVar
10
+
11
+ if TYPE_CHECKING:
12
+ from gnosisllm_knowledge.core.events.types import Event, EventType
13
+
14
+ T = TypeVar("T")
15
+
16
+ # Event handler types
17
+ SyncEventHandler = Callable[["Event"], None]
18
+ AsyncEventHandler = Callable[["Event"], Awaitable[None]]
19
+ EventHandler = SyncEventHandler | AsyncEventHandler
20
+
21
+
22
+ class EventEmitter:
23
+ """Event emitter for decoupled communication (Observer pattern).
24
+
25
+ Supports both synchronous and asynchronous event handlers.
26
+ Handlers can be registered for specific event types or for all events.
27
+
28
+ Example:
29
+ ```python
30
+ emitter = EventEmitter()
31
+
32
+ @emitter.on(EventType.DOCUMENT_LOADED)
33
+ def on_loaded(event: Event) -> None:
34
+ print(f"Loaded: {event.data['url']}")
35
+
36
+ @emitter.on(EventType.DOCUMENT_INDEXED)
37
+ async def on_indexed(event: Event) -> None:
38
+ await log_to_service(event)
39
+
40
+ # Emit events
41
+ emitter.emit(DocumentLoadedEvent(url="https://example.com"))
42
+ await emitter.emit_async(DocumentIndexedEvent(doc_id="123"))
43
+ ```
44
+ """
45
+
46
+ def __init__(self) -> None:
47
+ """Initialize the event emitter."""
48
+ self._handlers: dict[EventType, list[EventHandler]] = {}
49
+ self._global_handlers: list[EventHandler] = []
50
+ self._logger = logging.getLogger(__name__)
51
+
52
+ def on(
53
+ self,
54
+ *event_types: EventType,
55
+ ) -> Callable[[EventHandler], EventHandler]:
56
+ """Decorator to register an event handler.
57
+
58
+ Can be used with one or more event types.
59
+
60
+ Args:
61
+ *event_types: Event types to listen for.
62
+ If empty, handler is called for all events.
63
+
64
+ Returns:
65
+ Decorator function that registers the handler.
66
+
67
+ Example:
68
+ ```python
69
+ @emitter.on(EventType.DOCUMENT_LOADED)
70
+ def handler(event): ...
71
+
72
+ @emitter.on(EventType.LOAD_STARTED, EventType.LOAD_COMPLETED)
73
+ def multi_handler(event): ...
74
+
75
+ @emitter.on() # All events
76
+ def global_handler(event): ...
77
+ ```
78
+ """
79
+
80
+ def decorator(handler: EventHandler) -> EventHandler:
81
+ if event_types:
82
+ for event_type in event_types:
83
+ self.add_handler(event_type, handler)
84
+ else:
85
+ self._global_handlers.append(handler)
86
+ return handler
87
+
88
+ return decorator
89
+
90
+ def add_handler(self, event_type: EventType, handler: EventHandler) -> None:
91
+ """Register an event handler for a specific event type.
92
+
93
+ Args:
94
+ event_type: Event type to listen for.
95
+ handler: Handler function to call.
96
+ """
97
+ if event_type not in self._handlers:
98
+ self._handlers[event_type] = []
99
+ self._handlers[event_type].append(handler)
100
+
101
+ def remove_handler(self, event_type: EventType, handler: EventHandler) -> None:
102
+ """Remove an event handler.
103
+
104
+ Args:
105
+ event_type: Event type the handler was registered for.
106
+ handler: Handler to remove.
107
+ """
108
+ if event_type in self._handlers:
109
+ with contextlib.suppress(ValueError):
110
+ self._handlers[event_type].remove(handler)
111
+
112
+ def off(self, event_type: EventType, handler: EventHandler) -> None:
113
+ """Alias for remove_handler."""
114
+ self.remove_handler(event_type, handler)
115
+
116
+ def emit(self, event: Event) -> None:
117
+ """Emit an event synchronously.
118
+
119
+ Calls all registered handlers for the event type.
120
+ Async handlers are scheduled but not awaited.
121
+
122
+ Args:
123
+ event: The event to emit.
124
+ """
125
+ handlers = self._get_handlers(event.event_type)
126
+ for handler in handlers:
127
+ try:
128
+ result = handler(event)
129
+ # Schedule async handlers
130
+ if asyncio.iscoroutine(result):
131
+ asyncio.create_task(result)
132
+ except Exception as e:
133
+ self._logger.error(f"Event handler error: {e}")
134
+
135
+ async def emit_async(self, event: Event) -> None:
136
+ """Emit an event asynchronously.
137
+
138
+ Awaits all handlers, including async ones.
139
+
140
+ Args:
141
+ event: The event to emit.
142
+ """
143
+ handlers = self._get_handlers(event.event_type)
144
+ for handler in handlers:
145
+ try:
146
+ result = handler(event)
147
+ if asyncio.iscoroutine(result):
148
+ await result
149
+ except Exception as e:
150
+ self._logger.error(f"Event handler error: {e}")
151
+
152
+ async def emit_parallel(self, event: Event) -> None:
153
+ """Emit an event and run handlers in parallel.
154
+
155
+ All handlers are executed concurrently.
156
+
157
+ Args:
158
+ event: The event to emit.
159
+ """
160
+ handlers = self._get_handlers(event.event_type)
161
+ tasks: list[Awaitable[None]] = []
162
+
163
+ for handler in handlers:
164
+ try:
165
+ result = handler(event)
166
+ if asyncio.iscoroutine(result):
167
+ tasks.append(result)
168
+ except Exception as e:
169
+ self._logger.error(f"Event handler error: {e}")
170
+
171
+ if tasks:
172
+ results = await asyncio.gather(*tasks, return_exceptions=True)
173
+ for result in results:
174
+ if isinstance(result, Exception):
175
+ self._logger.error(f"Async handler error: {result}")
176
+
177
+ def _get_handlers(self, event_type: EventType) -> list[EventHandler]:
178
+ """Get all handlers for an event type.
179
+
180
+ Args:
181
+ event_type: Event type to get handlers for.
182
+
183
+ Returns:
184
+ List of handlers (specific + global).
185
+ """
186
+ specific = self._handlers.get(event_type, [])
187
+ return specific + self._global_handlers
188
+
189
+ def clear(self) -> None:
190
+ """Clear all event handlers."""
191
+ self._handlers.clear()
192
+ self._global_handlers.clear()
193
+
194
+ def clear_type(self, event_type: EventType) -> None:
195
+ """Clear handlers for a specific event type.
196
+
197
+ Args:
198
+ event_type: Event type to clear handlers for.
199
+ """
200
+ if event_type in self._handlers:
201
+ self._handlers[event_type].clear()
202
+
203
+ def handler_count(self, event_type: EventType | None = None) -> int:
204
+ """Get the number of registered handlers.
205
+
206
+ Args:
207
+ event_type: Specific event type, or None for total.
208
+
209
+ Returns:
210
+ Number of handlers.
211
+ """
212
+ if event_type is None:
213
+ return sum(len(h) for h in self._handlers.values()) + len(
214
+ self._global_handlers
215
+ )
216
+ return len(self._handlers.get(event_type, [])) + len(self._global_handlers)
@@ -0,0 +1,226 @@
1
+ """Event type definitions for the knowledge module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import UTC, datetime
7
+ from enum import Enum
8
+ from typing import Any
9
+
10
+
11
+ class EventType(str, Enum):
12
+ """Types of events in the knowledge system.
13
+
14
+ Events are organized by category:
15
+ - Loading events: Document and content loading
16
+ - Indexing events: Document indexing operations
17
+ - Search events: Search and retrieval operations
18
+ - Agentic events: AI-powered operations
19
+ - Setup events: Backend setup operations
20
+ - Resilience events: Fault tolerance events
21
+ - Health events: Health check events
22
+ """
23
+
24
+ # Loading events
25
+ LOAD_STARTED = "load_started"
26
+ LOAD_PROGRESS = "load_progress"
27
+ DOCUMENT_FETCHED = "document_fetched"
28
+ DOCUMENT_CHUNKED = "document_chunked"
29
+ DOCUMENT_LOADED = "document_loaded"
30
+ DOCUMENT_VALIDATED = "document_validated"
31
+ DOCUMENT_REJECTED = "document_rejected"
32
+ LOAD_COMPLETED = "load_completed"
33
+ LOAD_FAILED = "load_failed"
34
+ SITEMAP_DISCOVERED = "sitemap_discovered"
35
+
36
+ # Indexing events
37
+ INDEX_STARTED = "index_started"
38
+ DOCUMENT_INDEXED = "document_indexed"
39
+ DOCUMENT_INDEX_FAILED = "document_index_failed"
40
+ BATCH_STARTED = "batch_started"
41
+ BATCH_COMPLETED = "batch_completed"
42
+ INDEX_COMPLETED = "index_completed"
43
+ INDEX_FAILED = "index_failed"
44
+
45
+ # Search events
46
+ SEARCH_STARTED = "search_started"
47
+ SEARCH_CACHE_HIT = "search_cache_hit"
48
+ SEARCH_CACHE_MISS = "search_cache_miss"
49
+ EMBEDDING_GENERATED = "embedding_generated"
50
+ EMBEDDING_CACHE_HIT = "embedding_cache_hit"
51
+ SEARCH_COMPLETED = "search_completed"
52
+ SEARCH_FAILED = "search_failed"
53
+
54
+ # Agentic events
55
+ AGENT_STARTED = "agent_started"
56
+ AGENT_STEP = "agent_step"
57
+ AGENT_COMPLETED = "agent_completed"
58
+ AGENT_FAILED = "agent_failed"
59
+
60
+ # Setup events
61
+ SETUP_STARTED = "setup_started"
62
+ SETUP_STEP_STARTED = "setup_step_started"
63
+ SETUP_STEP_COMPLETED = "setup_step_completed"
64
+ SETUP_STEP_FAILED = "setup_step_failed"
65
+ SETUP_COMPLETED = "setup_completed"
66
+
67
+ # Resilience events
68
+ RETRY_ATTEMPT = "retry_attempt"
69
+ CIRCUIT_BREAKER_OPENED = "circuit_breaker_opened"
70
+ CIRCUIT_BREAKER_CLOSED = "circuit_breaker_closed"
71
+ CIRCUIT_BREAKER_HALF_OPEN = "circuit_breaker_half_open"
72
+ FALLBACK_TRIGGERED = "fallback_triggered"
73
+
74
+ # Health events
75
+ HEALTH_CHECK_STARTED = "health_check_started"
76
+ HEALTH_CHECK_COMPLETED = "health_check_completed"
77
+ HEALTH_DEGRADED = "health_degraded"
78
+ HEALTH_RECOVERED = "health_recovered"
79
+
80
+ # Error events
81
+ ERROR = "error"
82
+
83
+
84
+ @dataclass
85
+ class Event:
86
+ """Base event class.
87
+
88
+ Attributes:
89
+ event_type: The type of event.
90
+ timestamp: When the event occurred.
91
+ data: Additional event data.
92
+ account_id: Account ID for multi-tenant context.
93
+ user_id: User ID if applicable.
94
+ request_id: Request ID for tracing.
95
+ trace_id: Distributed trace ID.
96
+ span_id: Distributed trace span ID.
97
+ """
98
+
99
+ event_type: EventType = EventType.ERROR # Default, usually overridden
100
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
101
+ data: dict[str, Any] = field(default_factory=dict)
102
+
103
+ # Context
104
+ account_id: str | None = None
105
+ user_id: str | None = None
106
+ request_id: str | None = None
107
+
108
+ # Tracing
109
+ trace_id: str | None = None
110
+ span_id: str | None = None
111
+
112
+ def with_context(
113
+ self,
114
+ account_id: str | None = None,
115
+ user_id: str | None = None,
116
+ request_id: str | None = None,
117
+ ) -> Event:
118
+ """Create a copy with context information."""
119
+ return Event(
120
+ event_type=self.event_type,
121
+ timestamp=self.timestamp,
122
+ data=self.data.copy(),
123
+ account_id=account_id or self.account_id,
124
+ user_id=user_id or self.user_id,
125
+ request_id=request_id or self.request_id,
126
+ trace_id=self.trace_id,
127
+ span_id=self.span_id,
128
+ )
129
+
130
+
131
+ @dataclass
132
+ class DocumentLoadedEvent(Event):
133
+ """Event emitted when a document is loaded."""
134
+
135
+ url: str = ""
136
+ source: str = ""
137
+ chunks_count: int = 0
138
+ content_length: int = 0
139
+
140
+ def __post_init__(self) -> None:
141
+ """Set event type."""
142
+ self.event_type = EventType.DOCUMENT_LOADED
143
+ self.data = {
144
+ "url": self.url,
145
+ "source": self.source,
146
+ "chunks_count": self.chunks_count,
147
+ "content_length": self.content_length,
148
+ }
149
+
150
+
151
+ @dataclass
152
+ class DocumentIndexedEvent(Event):
153
+ """Event emitted when a document is indexed."""
154
+
155
+ doc_id: str = ""
156
+ index_name: str = ""
157
+ success: bool = True
158
+ error_message: str | None = None
159
+
160
+ def __post_init__(self) -> None:
161
+ """Set event type."""
162
+ self.event_type = EventType.DOCUMENT_INDEXED
163
+ self.data = {
164
+ "doc_id": self.doc_id,
165
+ "index_name": self.index_name,
166
+ "success": self.success,
167
+ "error_message": self.error_message,
168
+ }
169
+
170
+
171
+ @dataclass
172
+ class SitemapDiscoveryEvent(Event):
173
+ """Event emitted when URLs are discovered from a sitemap."""
174
+
175
+ sitemap_url: str = ""
176
+ urls_discovered: int = 0
177
+ depth: int = 0
178
+ total_urls: int = 0
179
+
180
+ def __post_init__(self) -> None:
181
+ """Set event type."""
182
+ self.event_type = EventType.SITEMAP_DISCOVERED
183
+ self.data = {
184
+ "sitemap_url": self.sitemap_url,
185
+ "urls_discovered": self.urls_discovered,
186
+ "depth": self.depth,
187
+ "total_urls": self.total_urls,
188
+ }
189
+
190
+
191
+ @dataclass
192
+ class BatchStartedEvent(Event):
193
+ """Event emitted when a batch operation starts."""
194
+
195
+ batch_index: int = 0
196
+ batch_size: int = 0
197
+ total_batches: int = 0
198
+
199
+ def __post_init__(self) -> None:
200
+ """Set event type."""
201
+ self.event_type = EventType.BATCH_STARTED
202
+ self.data = {
203
+ "batch_index": self.batch_index,
204
+ "batch_size": self.batch_size,
205
+ "total_batches": self.total_batches,
206
+ }
207
+
208
+
209
+ @dataclass
210
+ class BatchCompletedEvent(Event):
211
+ """Event emitted when a batch operation completes."""
212
+
213
+ batch_index: int = 0
214
+ success_count: int = 0
215
+ failure_count: int = 0
216
+ duration_ms: float = 0.0
217
+
218
+ def __post_init__(self) -> None:
219
+ """Set event type."""
220
+ self.event_type = EventType.BATCH_COMPLETED
221
+ self.data = {
222
+ "batch_index": self.batch_index,
223
+ "success_count": self.success_count,
224
+ "failure_count": self.failure_count,
225
+ "duration_ms": self.duration_ms,
226
+ }