gnosisllm-knowledge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +152 -0
- gnosisllm_knowledge/api/__init__.py +5 -0
- gnosisllm_knowledge/api/knowledge.py +548 -0
- gnosisllm_knowledge/backends/__init__.py +26 -0
- gnosisllm_knowledge/backends/memory/__init__.py +9 -0
- gnosisllm_knowledge/backends/memory/indexer.py +384 -0
- gnosisllm_knowledge/backends/memory/searcher.py +516 -0
- gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
- gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
- gnosisllm_knowledge/backends/opensearch/config.py +195 -0
- gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
- gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
- gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
- gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
- gnosisllm_knowledge/chunking/__init__.py +9 -0
- gnosisllm_knowledge/chunking/fixed.py +138 -0
- gnosisllm_knowledge/chunking/sentence.py +239 -0
- gnosisllm_knowledge/cli/__init__.py +18 -0
- gnosisllm_knowledge/cli/app.py +509 -0
- gnosisllm_knowledge/cli/commands/__init__.py +7 -0
- gnosisllm_knowledge/cli/commands/agentic.py +529 -0
- gnosisllm_knowledge/cli/commands/load.py +369 -0
- gnosisllm_knowledge/cli/commands/search.py +440 -0
- gnosisllm_knowledge/cli/commands/setup.py +228 -0
- gnosisllm_knowledge/cli/display/__init__.py +5 -0
- gnosisllm_knowledge/cli/display/service.py +555 -0
- gnosisllm_knowledge/cli/utils/__init__.py +5 -0
- gnosisllm_knowledge/cli/utils/config.py +207 -0
- gnosisllm_knowledge/core/__init__.py +87 -0
- gnosisllm_knowledge/core/domain/__init__.py +43 -0
- gnosisllm_knowledge/core/domain/document.py +240 -0
- gnosisllm_knowledge/core/domain/result.py +176 -0
- gnosisllm_knowledge/core/domain/search.py +327 -0
- gnosisllm_knowledge/core/domain/source.py +139 -0
- gnosisllm_knowledge/core/events/__init__.py +23 -0
- gnosisllm_knowledge/core/events/emitter.py +216 -0
- gnosisllm_knowledge/core/events/types.py +226 -0
- gnosisllm_knowledge/core/exceptions.py +407 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
- gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
- gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
- gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
- gnosisllm_knowledge/core/interfaces/loader.py +102 -0
- gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
- gnosisllm_knowledge/core/interfaces/setup.py +164 -0
- gnosisllm_knowledge/fetchers/__init__.py +12 -0
- gnosisllm_knowledge/fetchers/config.py +77 -0
- gnosisllm_knowledge/fetchers/http.py +167 -0
- gnosisllm_knowledge/fetchers/neoreader.py +204 -0
- gnosisllm_knowledge/loaders/__init__.py +13 -0
- gnosisllm_knowledge/loaders/base.py +399 -0
- gnosisllm_knowledge/loaders/factory.py +202 -0
- gnosisllm_knowledge/loaders/sitemap.py +285 -0
- gnosisllm_knowledge/loaders/website.py +57 -0
- gnosisllm_knowledge/py.typed +0 -0
- gnosisllm_knowledge/services/__init__.py +9 -0
- gnosisllm_knowledge/services/indexing.py +387 -0
- gnosisllm_knowledge/services/search.py +349 -0
- gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
- gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
- gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Event emitter for knowledge module (Observer pattern)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextlib
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Awaitable, Callable
|
|
9
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from gnosisllm_knowledge.core.events.types import Event, EventType
|
|
13
|
+
|
|
14
|
+
T = TypeVar("T")
|
|
15
|
+
|
|
16
|
+
# Event handler types
|
|
17
|
+
SyncEventHandler = Callable[["Event"], None]
|
|
18
|
+
AsyncEventHandler = Callable[["Event"], Awaitable[None]]
|
|
19
|
+
EventHandler = SyncEventHandler | AsyncEventHandler
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EventEmitter:
|
|
23
|
+
"""Event emitter for decoupled communication (Observer pattern).
|
|
24
|
+
|
|
25
|
+
Supports both synchronous and asynchronous event handlers.
|
|
26
|
+
Handlers can be registered for specific event types or for all events.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
```python
|
|
30
|
+
emitter = EventEmitter()
|
|
31
|
+
|
|
32
|
+
@emitter.on(EventType.DOCUMENT_LOADED)
|
|
33
|
+
def on_loaded(event: Event) -> None:
|
|
34
|
+
print(f"Loaded: {event.data['url']}")
|
|
35
|
+
|
|
36
|
+
@emitter.on(EventType.DOCUMENT_INDEXED)
|
|
37
|
+
async def on_indexed(event: Event) -> None:
|
|
38
|
+
await log_to_service(event)
|
|
39
|
+
|
|
40
|
+
# Emit events
|
|
41
|
+
emitter.emit(DocumentLoadedEvent(url="https://example.com"))
|
|
42
|
+
await emitter.emit_async(DocumentIndexedEvent(doc_id="123"))
|
|
43
|
+
```
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
"""Initialize the event emitter."""
|
|
48
|
+
self._handlers: dict[EventType, list[EventHandler]] = {}
|
|
49
|
+
self._global_handlers: list[EventHandler] = []
|
|
50
|
+
self._logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
def on(
|
|
53
|
+
self,
|
|
54
|
+
*event_types: EventType,
|
|
55
|
+
) -> Callable[[EventHandler], EventHandler]:
|
|
56
|
+
"""Decorator to register an event handler.
|
|
57
|
+
|
|
58
|
+
Can be used with one or more event types.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
*event_types: Event types to listen for.
|
|
62
|
+
If empty, handler is called for all events.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Decorator function that registers the handler.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
```python
|
|
69
|
+
@emitter.on(EventType.DOCUMENT_LOADED)
|
|
70
|
+
def handler(event): ...
|
|
71
|
+
|
|
72
|
+
@emitter.on(EventType.LOAD_STARTED, EventType.LOAD_COMPLETED)
|
|
73
|
+
def multi_handler(event): ...
|
|
74
|
+
|
|
75
|
+
@emitter.on() # All events
|
|
76
|
+
def global_handler(event): ...
|
|
77
|
+
```
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def decorator(handler: EventHandler) -> EventHandler:
|
|
81
|
+
if event_types:
|
|
82
|
+
for event_type in event_types:
|
|
83
|
+
self.add_handler(event_type, handler)
|
|
84
|
+
else:
|
|
85
|
+
self._global_handlers.append(handler)
|
|
86
|
+
return handler
|
|
87
|
+
|
|
88
|
+
return decorator
|
|
89
|
+
|
|
90
|
+
def add_handler(self, event_type: EventType, handler: EventHandler) -> None:
|
|
91
|
+
"""Register an event handler for a specific event type.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
event_type: Event type to listen for.
|
|
95
|
+
handler: Handler function to call.
|
|
96
|
+
"""
|
|
97
|
+
if event_type not in self._handlers:
|
|
98
|
+
self._handlers[event_type] = []
|
|
99
|
+
self._handlers[event_type].append(handler)
|
|
100
|
+
|
|
101
|
+
def remove_handler(self, event_type: EventType, handler: EventHandler) -> None:
|
|
102
|
+
"""Remove an event handler.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
event_type: Event type the handler was registered for.
|
|
106
|
+
handler: Handler to remove.
|
|
107
|
+
"""
|
|
108
|
+
if event_type in self._handlers:
|
|
109
|
+
with contextlib.suppress(ValueError):
|
|
110
|
+
self._handlers[event_type].remove(handler)
|
|
111
|
+
|
|
112
|
+
def off(self, event_type: EventType, handler: EventHandler) -> None:
|
|
113
|
+
"""Alias for remove_handler."""
|
|
114
|
+
self.remove_handler(event_type, handler)
|
|
115
|
+
|
|
116
|
+
def emit(self, event: Event) -> None:
|
|
117
|
+
"""Emit an event synchronously.
|
|
118
|
+
|
|
119
|
+
Calls all registered handlers for the event type.
|
|
120
|
+
Async handlers are scheduled but not awaited.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
event: The event to emit.
|
|
124
|
+
"""
|
|
125
|
+
handlers = self._get_handlers(event.event_type)
|
|
126
|
+
for handler in handlers:
|
|
127
|
+
try:
|
|
128
|
+
result = handler(event)
|
|
129
|
+
# Schedule async handlers
|
|
130
|
+
if asyncio.iscoroutine(result):
|
|
131
|
+
asyncio.create_task(result)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
self._logger.error(f"Event handler error: {e}")
|
|
134
|
+
|
|
135
|
+
async def emit_async(self, event: Event) -> None:
|
|
136
|
+
"""Emit an event asynchronously.
|
|
137
|
+
|
|
138
|
+
Awaits all handlers, including async ones.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
event: The event to emit.
|
|
142
|
+
"""
|
|
143
|
+
handlers = self._get_handlers(event.event_type)
|
|
144
|
+
for handler in handlers:
|
|
145
|
+
try:
|
|
146
|
+
result = handler(event)
|
|
147
|
+
if asyncio.iscoroutine(result):
|
|
148
|
+
await result
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self._logger.error(f"Event handler error: {e}")
|
|
151
|
+
|
|
152
|
+
async def emit_parallel(self, event: Event) -> None:
|
|
153
|
+
"""Emit an event and run handlers in parallel.
|
|
154
|
+
|
|
155
|
+
All handlers are executed concurrently.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
event: The event to emit.
|
|
159
|
+
"""
|
|
160
|
+
handlers = self._get_handlers(event.event_type)
|
|
161
|
+
tasks: list[Awaitable[None]] = []
|
|
162
|
+
|
|
163
|
+
for handler in handlers:
|
|
164
|
+
try:
|
|
165
|
+
result = handler(event)
|
|
166
|
+
if asyncio.iscoroutine(result):
|
|
167
|
+
tasks.append(result)
|
|
168
|
+
except Exception as e:
|
|
169
|
+
self._logger.error(f"Event handler error: {e}")
|
|
170
|
+
|
|
171
|
+
if tasks:
|
|
172
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
173
|
+
for result in results:
|
|
174
|
+
if isinstance(result, Exception):
|
|
175
|
+
self._logger.error(f"Async handler error: {result}")
|
|
176
|
+
|
|
177
|
+
def _get_handlers(self, event_type: EventType) -> list[EventHandler]:
|
|
178
|
+
"""Get all handlers for an event type.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
event_type: Event type to get handlers for.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of handlers (specific + global).
|
|
185
|
+
"""
|
|
186
|
+
specific = self._handlers.get(event_type, [])
|
|
187
|
+
return specific + self._global_handlers
|
|
188
|
+
|
|
189
|
+
def clear(self) -> None:
|
|
190
|
+
"""Clear all event handlers."""
|
|
191
|
+
self._handlers.clear()
|
|
192
|
+
self._global_handlers.clear()
|
|
193
|
+
|
|
194
|
+
def clear_type(self, event_type: EventType) -> None:
|
|
195
|
+
"""Clear handlers for a specific event type.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
event_type: Event type to clear handlers for.
|
|
199
|
+
"""
|
|
200
|
+
if event_type in self._handlers:
|
|
201
|
+
self._handlers[event_type].clear()
|
|
202
|
+
|
|
203
|
+
def handler_count(self, event_type: EventType | None = None) -> int:
|
|
204
|
+
"""Get the number of registered handlers.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
event_type: Specific event type, or None for total.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Number of handlers.
|
|
211
|
+
"""
|
|
212
|
+
if event_type is None:
|
|
213
|
+
return sum(len(h) for h in self._handlers.values()) + len(
|
|
214
|
+
self._global_handlers
|
|
215
|
+
)
|
|
216
|
+
return len(self._handlers.get(event_type, [])) + len(self._global_handlers)
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Event type definitions for the knowledge module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EventType(str, Enum):
|
|
12
|
+
"""Types of events in the knowledge system.
|
|
13
|
+
|
|
14
|
+
Events are organized by category:
|
|
15
|
+
- Loading events: Document and content loading
|
|
16
|
+
- Indexing events: Document indexing operations
|
|
17
|
+
- Search events: Search and retrieval operations
|
|
18
|
+
- Agentic events: AI-powered operations
|
|
19
|
+
- Setup events: Backend setup operations
|
|
20
|
+
- Resilience events: Fault tolerance events
|
|
21
|
+
- Health events: Health check events
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Loading events
|
|
25
|
+
LOAD_STARTED = "load_started"
|
|
26
|
+
LOAD_PROGRESS = "load_progress"
|
|
27
|
+
DOCUMENT_FETCHED = "document_fetched"
|
|
28
|
+
DOCUMENT_CHUNKED = "document_chunked"
|
|
29
|
+
DOCUMENT_LOADED = "document_loaded"
|
|
30
|
+
DOCUMENT_VALIDATED = "document_validated"
|
|
31
|
+
DOCUMENT_REJECTED = "document_rejected"
|
|
32
|
+
LOAD_COMPLETED = "load_completed"
|
|
33
|
+
LOAD_FAILED = "load_failed"
|
|
34
|
+
SITEMAP_DISCOVERED = "sitemap_discovered"
|
|
35
|
+
|
|
36
|
+
# Indexing events
|
|
37
|
+
INDEX_STARTED = "index_started"
|
|
38
|
+
DOCUMENT_INDEXED = "document_indexed"
|
|
39
|
+
DOCUMENT_INDEX_FAILED = "document_index_failed"
|
|
40
|
+
BATCH_STARTED = "batch_started"
|
|
41
|
+
BATCH_COMPLETED = "batch_completed"
|
|
42
|
+
INDEX_COMPLETED = "index_completed"
|
|
43
|
+
INDEX_FAILED = "index_failed"
|
|
44
|
+
|
|
45
|
+
# Search events
|
|
46
|
+
SEARCH_STARTED = "search_started"
|
|
47
|
+
SEARCH_CACHE_HIT = "search_cache_hit"
|
|
48
|
+
SEARCH_CACHE_MISS = "search_cache_miss"
|
|
49
|
+
EMBEDDING_GENERATED = "embedding_generated"
|
|
50
|
+
EMBEDDING_CACHE_HIT = "embedding_cache_hit"
|
|
51
|
+
SEARCH_COMPLETED = "search_completed"
|
|
52
|
+
SEARCH_FAILED = "search_failed"
|
|
53
|
+
|
|
54
|
+
# Agentic events
|
|
55
|
+
AGENT_STARTED = "agent_started"
|
|
56
|
+
AGENT_STEP = "agent_step"
|
|
57
|
+
AGENT_COMPLETED = "agent_completed"
|
|
58
|
+
AGENT_FAILED = "agent_failed"
|
|
59
|
+
|
|
60
|
+
# Setup events
|
|
61
|
+
SETUP_STARTED = "setup_started"
|
|
62
|
+
SETUP_STEP_STARTED = "setup_step_started"
|
|
63
|
+
SETUP_STEP_COMPLETED = "setup_step_completed"
|
|
64
|
+
SETUP_STEP_FAILED = "setup_step_failed"
|
|
65
|
+
SETUP_COMPLETED = "setup_completed"
|
|
66
|
+
|
|
67
|
+
# Resilience events
|
|
68
|
+
RETRY_ATTEMPT = "retry_attempt"
|
|
69
|
+
CIRCUIT_BREAKER_OPENED = "circuit_breaker_opened"
|
|
70
|
+
CIRCUIT_BREAKER_CLOSED = "circuit_breaker_closed"
|
|
71
|
+
CIRCUIT_BREAKER_HALF_OPEN = "circuit_breaker_half_open"
|
|
72
|
+
FALLBACK_TRIGGERED = "fallback_triggered"
|
|
73
|
+
|
|
74
|
+
# Health events
|
|
75
|
+
HEALTH_CHECK_STARTED = "health_check_started"
|
|
76
|
+
HEALTH_CHECK_COMPLETED = "health_check_completed"
|
|
77
|
+
HEALTH_DEGRADED = "health_degraded"
|
|
78
|
+
HEALTH_RECOVERED = "health_recovered"
|
|
79
|
+
|
|
80
|
+
# Error events
|
|
81
|
+
ERROR = "error"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class Event:
|
|
86
|
+
"""Base event class.
|
|
87
|
+
|
|
88
|
+
Attributes:
|
|
89
|
+
event_type: The type of event.
|
|
90
|
+
timestamp: When the event occurred.
|
|
91
|
+
data: Additional event data.
|
|
92
|
+
account_id: Account ID for multi-tenant context.
|
|
93
|
+
user_id: User ID if applicable.
|
|
94
|
+
request_id: Request ID for tracing.
|
|
95
|
+
trace_id: Distributed trace ID.
|
|
96
|
+
span_id: Distributed trace span ID.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
event_type: EventType = EventType.ERROR # Default, usually overridden
|
|
100
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
101
|
+
data: dict[str, Any] = field(default_factory=dict)
|
|
102
|
+
|
|
103
|
+
# Context
|
|
104
|
+
account_id: str | None = None
|
|
105
|
+
user_id: str | None = None
|
|
106
|
+
request_id: str | None = None
|
|
107
|
+
|
|
108
|
+
# Tracing
|
|
109
|
+
trace_id: str | None = None
|
|
110
|
+
span_id: str | None = None
|
|
111
|
+
|
|
112
|
+
def with_context(
|
|
113
|
+
self,
|
|
114
|
+
account_id: str | None = None,
|
|
115
|
+
user_id: str | None = None,
|
|
116
|
+
request_id: str | None = None,
|
|
117
|
+
) -> Event:
|
|
118
|
+
"""Create a copy with context information."""
|
|
119
|
+
return Event(
|
|
120
|
+
event_type=self.event_type,
|
|
121
|
+
timestamp=self.timestamp,
|
|
122
|
+
data=self.data.copy(),
|
|
123
|
+
account_id=account_id or self.account_id,
|
|
124
|
+
user_id=user_id or self.user_id,
|
|
125
|
+
request_id=request_id or self.request_id,
|
|
126
|
+
trace_id=self.trace_id,
|
|
127
|
+
span_id=self.span_id,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class DocumentLoadedEvent(Event):
|
|
133
|
+
"""Event emitted when a document is loaded."""
|
|
134
|
+
|
|
135
|
+
url: str = ""
|
|
136
|
+
source: str = ""
|
|
137
|
+
chunks_count: int = 0
|
|
138
|
+
content_length: int = 0
|
|
139
|
+
|
|
140
|
+
def __post_init__(self) -> None:
|
|
141
|
+
"""Set event type."""
|
|
142
|
+
self.event_type = EventType.DOCUMENT_LOADED
|
|
143
|
+
self.data = {
|
|
144
|
+
"url": self.url,
|
|
145
|
+
"source": self.source,
|
|
146
|
+
"chunks_count": self.chunks_count,
|
|
147
|
+
"content_length": self.content_length,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass
|
|
152
|
+
class DocumentIndexedEvent(Event):
|
|
153
|
+
"""Event emitted when a document is indexed."""
|
|
154
|
+
|
|
155
|
+
doc_id: str = ""
|
|
156
|
+
index_name: str = ""
|
|
157
|
+
success: bool = True
|
|
158
|
+
error_message: str | None = None
|
|
159
|
+
|
|
160
|
+
def __post_init__(self) -> None:
|
|
161
|
+
"""Set event type."""
|
|
162
|
+
self.event_type = EventType.DOCUMENT_INDEXED
|
|
163
|
+
self.data = {
|
|
164
|
+
"doc_id": self.doc_id,
|
|
165
|
+
"index_name": self.index_name,
|
|
166
|
+
"success": self.success,
|
|
167
|
+
"error_message": self.error_message,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass
|
|
172
|
+
class SitemapDiscoveryEvent(Event):
|
|
173
|
+
"""Event emitted when URLs are discovered from a sitemap."""
|
|
174
|
+
|
|
175
|
+
sitemap_url: str = ""
|
|
176
|
+
urls_discovered: int = 0
|
|
177
|
+
depth: int = 0
|
|
178
|
+
total_urls: int = 0
|
|
179
|
+
|
|
180
|
+
def __post_init__(self) -> None:
|
|
181
|
+
"""Set event type."""
|
|
182
|
+
self.event_type = EventType.SITEMAP_DISCOVERED
|
|
183
|
+
self.data = {
|
|
184
|
+
"sitemap_url": self.sitemap_url,
|
|
185
|
+
"urls_discovered": self.urls_discovered,
|
|
186
|
+
"depth": self.depth,
|
|
187
|
+
"total_urls": self.total_urls,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass
|
|
192
|
+
class BatchStartedEvent(Event):
|
|
193
|
+
"""Event emitted when a batch operation starts."""
|
|
194
|
+
|
|
195
|
+
batch_index: int = 0
|
|
196
|
+
batch_size: int = 0
|
|
197
|
+
total_batches: int = 0
|
|
198
|
+
|
|
199
|
+
def __post_init__(self) -> None:
|
|
200
|
+
"""Set event type."""
|
|
201
|
+
self.event_type = EventType.BATCH_STARTED
|
|
202
|
+
self.data = {
|
|
203
|
+
"batch_index": self.batch_index,
|
|
204
|
+
"batch_size": self.batch_size,
|
|
205
|
+
"total_batches": self.total_batches,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclass
|
|
210
|
+
class BatchCompletedEvent(Event):
|
|
211
|
+
"""Event emitted when a batch operation completes."""
|
|
212
|
+
|
|
213
|
+
batch_index: int = 0
|
|
214
|
+
success_count: int = 0
|
|
215
|
+
failure_count: int = 0
|
|
216
|
+
duration_ms: float = 0.0
|
|
217
|
+
|
|
218
|
+
def __post_init__(self) -> None:
|
|
219
|
+
"""Set event type."""
|
|
220
|
+
self.event_type = EventType.BATCH_COMPLETED
|
|
221
|
+
self.data = {
|
|
222
|
+
"batch_index": self.batch_index,
|
|
223
|
+
"success_count": self.success_count,
|
|
224
|
+
"failure_count": self.failure_count,
|
|
225
|
+
"duration_ms": self.duration_ms,
|
|
226
|
+
}
|