claude-mpm 4.0.32__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/INSTRUCTIONS.md +70 -2
- claude_mpm/agents/OUTPUT_STYLE.md +0 -11
- claude_mpm/agents/WORKFLOW.md +14 -2
- claude_mpm/agents/templates/documentation.json +51 -34
- claude_mpm/agents/templates/research.json +0 -11
- claude_mpm/cli/__init__.py +111 -33
- claude_mpm/cli/commands/agent_manager.py +10 -8
- claude_mpm/cli/commands/agents.py +82 -0
- claude_mpm/cli/commands/cleanup_orphaned_agents.py +150 -0
- claude_mpm/cli/commands/mcp_pipx_config.py +199 -0
- claude_mpm/cli/parsers/agents_parser.py +27 -0
- claude_mpm/cli/parsers/base_parser.py +6 -0
- claude_mpm/cli/startup_logging.py +75 -0
- claude_mpm/core/framework_loader.py +173 -84
- claude_mpm/dashboard/static/css/dashboard.css +449 -0
- claude_mpm/dashboard/static/dist/components/agent-inference.js +1 -1
- claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
- claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +1 -1
- claude_mpm/dashboard/static/dist/components/module-viewer.js +1 -1
- claude_mpm/dashboard/static/dist/components/session-manager.js +1 -1
- claude_mpm/dashboard/static/dist/dashboard.js +1 -1
- claude_mpm/dashboard/static/dist/socket-client.js +1 -1
- claude_mpm/dashboard/static/js/components/agent-hierarchy.js +774 -0
- claude_mpm/dashboard/static/js/components/agent-inference.js +257 -3
- claude_mpm/dashboard/static/js/components/build-tracker.js +323 -0
- claude_mpm/dashboard/static/js/components/event-viewer.js +168 -39
- claude_mpm/dashboard/static/js/components/file-tool-tracker.js +17 -0
- claude_mpm/dashboard/static/js/components/session-manager.js +23 -3
- claude_mpm/dashboard/static/js/components/socket-manager.js +2 -0
- claude_mpm/dashboard/static/js/dashboard.js +207 -31
- claude_mpm/dashboard/static/js/socket-client.js +92 -11
- claude_mpm/dashboard/templates/index.html +1 -0
- claude_mpm/hooks/claude_hooks/connection_pool.py +25 -4
- claude_mpm/hooks/claude_hooks/event_handlers.py +81 -19
- claude_mpm/hooks/claude_hooks/hook_handler.py +125 -163
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +398 -0
- claude_mpm/hooks/claude_hooks/response_tracking.py +10 -0
- claude_mpm/services/agents/deployment/agent_deployment.py +34 -48
- claude_mpm/services/agents/deployment/agent_discovery_service.py +4 -1
- claude_mpm/services/agents/deployment/agent_template_builder.py +20 -11
- claude_mpm/services/agents/deployment/agent_version_manager.py +4 -1
- claude_mpm/services/agents/deployment/agents_directory_resolver.py +10 -25
- claude_mpm/services/agents/deployment/multi_source_deployment_service.py +396 -13
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +3 -2
- claude_mpm/services/agents/deployment/strategies/system_strategy.py +10 -3
- claude_mpm/services/agents/deployment/strategies/user_strategy.py +10 -14
- claude_mpm/services/agents/deployment/system_instructions_deployer.py +8 -85
- claude_mpm/services/agents/memory/content_manager.py +98 -105
- claude_mpm/services/event_bus/__init__.py +18 -0
- claude_mpm/services/event_bus/config.py +165 -0
- claude_mpm/services/event_bus/event_bus.py +349 -0
- claude_mpm/services/event_bus/relay.py +297 -0
- claude_mpm/services/events/__init__.py +44 -0
- claude_mpm/services/events/consumers/__init__.py +18 -0
- claude_mpm/services/events/consumers/dead_letter.py +296 -0
- claude_mpm/services/events/consumers/logging.py +183 -0
- claude_mpm/services/events/consumers/metrics.py +242 -0
- claude_mpm/services/events/consumers/socketio.py +376 -0
- claude_mpm/services/events/core.py +470 -0
- claude_mpm/services/events/interfaces.py +230 -0
- claude_mpm/services/events/producers/__init__.py +14 -0
- claude_mpm/services/events/producers/hook.py +269 -0
- claude_mpm/services/events/producers/system.py +327 -0
- claude_mpm/services/mcp_gateway/auto_configure.py +372 -0
- claude_mpm/services/mcp_gateway/core/process_pool.py +411 -0
- claude_mpm/services/mcp_gateway/server/stdio_server.py +13 -0
- claude_mpm/services/monitor_build_service.py +345 -0
- claude_mpm/services/socketio/event_normalizer.py +667 -0
- claude_mpm/services/socketio/handlers/connection.py +81 -23
- claude_mpm/services/socketio/handlers/hook.py +14 -5
- claude_mpm/services/socketio/migration_utils.py +329 -0
- claude_mpm/services/socketio/server/broadcaster.py +26 -33
- claude_mpm/services/socketio/server/core.py +29 -5
- claude_mpm/services/socketio/server/eventbus_integration.py +189 -0
- claude_mpm/services/socketio/server/main.py +25 -0
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/METADATA +28 -9
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/RECORD +82 -56
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/WHEEL +0 -0
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics Event Consumer
|
|
3
|
+
=====================
|
|
4
|
+
|
|
5
|
+
Collects metrics and statistics from events.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from collections import defaultdict, deque
|
|
10
|
+
from typing import Any, Deque, Dict, List
|
|
11
|
+
|
|
12
|
+
from claude_mpm.core.logging_config import get_logger
|
|
13
|
+
|
|
14
|
+
from ..core import Event
|
|
15
|
+
from ..interfaces import ConsumerConfig, ConsumerPriority, IEventConsumer
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MetricsConsumer(IEventConsumer):
|
|
19
|
+
"""
|
|
20
|
+
Collects metrics and statistics from events.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Event counting by topic and type
|
|
24
|
+
- Rate calculation (events per second)
|
|
25
|
+
- Latency tracking
|
|
26
|
+
- Top event analysis
|
|
27
|
+
- Time-windowed statistics
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
window_size: int = 300, # 5 minutes
|
|
33
|
+
top_n: int = 10,
|
|
34
|
+
report_interval: float = 60.0, # Report every minute
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Initialize metrics consumer.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
window_size: Time window for statistics (seconds)
|
|
41
|
+
top_n: Number of top events to track
|
|
42
|
+
report_interval: How often to report metrics (seconds)
|
|
43
|
+
"""
|
|
44
|
+
self.logger = get_logger("MetricsConsumer")
|
|
45
|
+
|
|
46
|
+
# Configuration
|
|
47
|
+
self.window_size = window_size
|
|
48
|
+
self.top_n = top_n
|
|
49
|
+
self.report_interval = report_interval
|
|
50
|
+
|
|
51
|
+
# State
|
|
52
|
+
self._initialized = False
|
|
53
|
+
self._last_report_time = time.time()
|
|
54
|
+
|
|
55
|
+
# Metrics storage
|
|
56
|
+
self._event_counts: Dict[str, int] = defaultdict(int)
|
|
57
|
+
self._topic_counts: Dict[str, int] = defaultdict(int)
|
|
58
|
+
self._type_counts: Dict[str, int] = defaultdict(int)
|
|
59
|
+
self._source_counts: Dict[str, int] = defaultdict(int)
|
|
60
|
+
|
|
61
|
+
# Time-windowed metrics
|
|
62
|
+
self._recent_events: Deque[tuple] = deque() # (timestamp, topic, type)
|
|
63
|
+
self._latencies: Deque[float] = deque(maxlen=1000)
|
|
64
|
+
|
|
65
|
+
# Error tracking
|
|
66
|
+
self._error_counts: Dict[str, int] = defaultdict(int)
|
|
67
|
+
|
|
68
|
+
# Performance metrics
|
|
69
|
+
self._metrics = {
|
|
70
|
+
"total_events": 0,
|
|
71
|
+
"events_per_second": 0.0,
|
|
72
|
+
"average_latency_ms": 0.0,
|
|
73
|
+
"peak_rate": 0.0,
|
|
74
|
+
"unique_topics": 0,
|
|
75
|
+
"unique_types": 0,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Consumer configuration
|
|
79
|
+
self._config = ConsumerConfig(
|
|
80
|
+
name="MetricsConsumer",
|
|
81
|
+
topics=["**"], # Monitor all events
|
|
82
|
+
priority=ConsumerPriority.DEFERRED, # Process after other consumers
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
async def initialize(self) -> bool:
|
|
86
|
+
"""Initialize the metrics consumer."""
|
|
87
|
+
self._initialized = True
|
|
88
|
+
self.logger.info("Metrics consumer initialized")
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
async def consume(self, event: Event) -> bool:
|
|
92
|
+
"""Process a single event for metrics."""
|
|
93
|
+
if not self._initialized:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
current_time = time.time()
|
|
98
|
+
|
|
99
|
+
# Update counts
|
|
100
|
+
self._event_counts[f"{event.topic}:{event.type}"] += 1
|
|
101
|
+
self._topic_counts[event.topic] += 1
|
|
102
|
+
self._type_counts[event.type] += 1
|
|
103
|
+
self._source_counts[event.source] += 1
|
|
104
|
+
self._metrics["total_events"] += 1
|
|
105
|
+
|
|
106
|
+
# Track errors
|
|
107
|
+
if event.metadata and event.metadata.consumers_failed:
|
|
108
|
+
for consumer in event.metadata.consumers_failed:
|
|
109
|
+
self._error_counts[consumer] += 1
|
|
110
|
+
|
|
111
|
+
# Add to recent events
|
|
112
|
+
self._recent_events.append((current_time, event.topic, event.type))
|
|
113
|
+
|
|
114
|
+
# Calculate latency if timestamp available
|
|
115
|
+
if event.timestamp:
|
|
116
|
+
latency = (current_time - event.timestamp.timestamp()) * 1000
|
|
117
|
+
self._latencies.append(latency)
|
|
118
|
+
|
|
119
|
+
# Clean old events from window
|
|
120
|
+
cutoff_time = current_time - self.window_size
|
|
121
|
+
while self._recent_events and self._recent_events[0][0] < cutoff_time:
|
|
122
|
+
self._recent_events.popleft()
|
|
123
|
+
|
|
124
|
+
# Report metrics periodically
|
|
125
|
+
if current_time - self._last_report_time >= self.report_interval:
|
|
126
|
+
await self._report_metrics()
|
|
127
|
+
self._last_report_time = current_time
|
|
128
|
+
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
self.logger.error(f"Error processing event for metrics: {e}")
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
async def consume_batch(self, events: List[Event]) -> int:
|
|
136
|
+
"""Process multiple events."""
|
|
137
|
+
successful = 0
|
|
138
|
+
for event in events:
|
|
139
|
+
if await self.consume(event):
|
|
140
|
+
successful += 1
|
|
141
|
+
return successful
|
|
142
|
+
|
|
143
|
+
async def shutdown(self) -> None:
|
|
144
|
+
"""Shutdown the consumer."""
|
|
145
|
+
# Report final metrics
|
|
146
|
+
await self._report_metrics()
|
|
147
|
+
|
|
148
|
+
self.logger.info(
|
|
149
|
+
f"Metrics consumer shutdown - processed {self._metrics['total_events']} events"
|
|
150
|
+
)
|
|
151
|
+
self._initialized = False
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def config(self) -> ConsumerConfig:
|
|
155
|
+
"""Get consumer configuration."""
|
|
156
|
+
return self._config
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def is_healthy(self) -> bool:
|
|
160
|
+
"""Check if consumer is healthy."""
|
|
161
|
+
return self._initialized
|
|
162
|
+
|
|
163
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
164
|
+
"""Get consumer metrics."""
|
|
165
|
+
# Calculate current metrics
|
|
166
|
+
self._calculate_metrics()
|
|
167
|
+
|
|
168
|
+
# Get top events
|
|
169
|
+
top_events = sorted(
|
|
170
|
+
self._event_counts.items(),
|
|
171
|
+
key=lambda x: x[1],
|
|
172
|
+
reverse=True
|
|
173
|
+
)[:self.top_n]
|
|
174
|
+
|
|
175
|
+
top_topics = sorted(
|
|
176
|
+
self._topic_counts.items(),
|
|
177
|
+
key=lambda x: x[1],
|
|
178
|
+
reverse=True
|
|
179
|
+
)[:self.top_n]
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
**self._metrics,
|
|
183
|
+
"top_events": dict(top_events),
|
|
184
|
+
"top_topics": dict(top_topics),
|
|
185
|
+
"error_counts": dict(self._error_counts),
|
|
186
|
+
"window_size_seconds": self.window_size,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
def _calculate_metrics(self) -> None:
|
|
190
|
+
"""Calculate current metrics."""
|
|
191
|
+
# Events per second
|
|
192
|
+
if self._recent_events:
|
|
193
|
+
time_span = time.time() - self._recent_events[0][0]
|
|
194
|
+
if time_span > 0:
|
|
195
|
+
self._metrics["events_per_second"] = len(self._recent_events) / time_span
|
|
196
|
+
|
|
197
|
+
# Average latency
|
|
198
|
+
if self._latencies:
|
|
199
|
+
self._metrics["average_latency_ms"] = sum(self._latencies) / len(self._latencies)
|
|
200
|
+
|
|
201
|
+
# Unique counts
|
|
202
|
+
self._metrics["unique_topics"] = len(self._topic_counts)
|
|
203
|
+
self._metrics["unique_types"] = len(self._type_counts)
|
|
204
|
+
|
|
205
|
+
# Peak rate
|
|
206
|
+
if self._metrics["events_per_second"] > self._metrics["peak_rate"]:
|
|
207
|
+
self._metrics["peak_rate"] = self._metrics["events_per_second"]
|
|
208
|
+
|
|
209
|
+
async def _report_metrics(self) -> None:
|
|
210
|
+
"""Report current metrics to log."""
|
|
211
|
+
self._calculate_metrics()
|
|
212
|
+
|
|
213
|
+
# Build report
|
|
214
|
+
report = [
|
|
215
|
+
f"=== Event Metrics Report ===",
|
|
216
|
+
f"Total Events: {self._metrics['total_events']}",
|
|
217
|
+
f"Rate: {self._metrics['events_per_second']:.2f} events/sec",
|
|
218
|
+
f"Avg Latency: {self._metrics['average_latency_ms']:.1f}ms",
|
|
219
|
+
f"Unique Topics: {self._metrics['unique_topics']}",
|
|
220
|
+
f"Unique Types: {self._metrics['unique_types']}",
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
# Add top events
|
|
224
|
+
top_events = sorted(
|
|
225
|
+
self._event_counts.items(),
|
|
226
|
+
key=lambda x: x[1],
|
|
227
|
+
reverse=True
|
|
228
|
+
)[:5]
|
|
229
|
+
|
|
230
|
+
if top_events:
|
|
231
|
+
report.append("\nTop Events:")
|
|
232
|
+
for event_key, count in top_events:
|
|
233
|
+
report.append(f" {event_key}: {count}")
|
|
234
|
+
|
|
235
|
+
# Add error summary
|
|
236
|
+
if self._error_counts:
|
|
237
|
+
report.append("\nErrors by Consumer:")
|
|
238
|
+
for consumer, count in self._error_counts.items():
|
|
239
|
+
report.append(f" {consumer}: {count}")
|
|
240
|
+
|
|
241
|
+
# Log report
|
|
242
|
+
self.logger.info("\n".join(report))
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Socket.IO Event Consumer
|
|
3
|
+
=======================
|
|
4
|
+
|
|
5
|
+
Consumes events from the event bus and emits them via Socket.IO.
|
|
6
|
+
This is the only component that knows about Socket.IO, decoupling
|
|
7
|
+
the rest of the system from the transport layer.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
from claude_mpm.core.logging_config import get_logger
|
|
15
|
+
|
|
16
|
+
from ..core import Event
|
|
17
|
+
from ..interfaces import ConsumerConfig, ConsumerPriority, IEventConsumer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SocketIOConsumer(IEventConsumer):
|
|
21
|
+
"""
|
|
22
|
+
Consumes events and emits them via Socket.IO.
|
|
23
|
+
|
|
24
|
+
Features:
|
|
25
|
+
- Single Socket.IO connection management
|
|
26
|
+
- Automatic reconnection with backoff
|
|
27
|
+
- Event batching for efficiency
|
|
28
|
+
- Connection health monitoring
|
|
29
|
+
- Graceful degradation when Socket.IO unavailable
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
socketio_server=None,
|
|
35
|
+
port_range: tuple = (8765, 8785),
|
|
36
|
+
reconnect_delay: float = 1.0,
|
|
37
|
+
max_reconnect_delay: float = 30.0,
|
|
38
|
+
batch_size: int = 10,
|
|
39
|
+
batch_timeout: float = 0.1,
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Initialize Socket.IO consumer.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
socketio_server: Socket.IO server instance (optional)
|
|
46
|
+
port_range: Port range to try for connection
|
|
47
|
+
reconnect_delay: Initial reconnection delay
|
|
48
|
+
max_reconnect_delay: Maximum reconnection delay
|
|
49
|
+
batch_size: Number of events to batch
|
|
50
|
+
batch_timeout: Max time to wait for batch
|
|
51
|
+
"""
|
|
52
|
+
self.logger = get_logger("SocketIOConsumer")
|
|
53
|
+
|
|
54
|
+
# Socket.IO configuration
|
|
55
|
+
self.socketio_server = socketio_server
|
|
56
|
+
self.port_range = port_range
|
|
57
|
+
self.reconnect_delay = reconnect_delay
|
|
58
|
+
self.max_reconnect_delay = max_reconnect_delay
|
|
59
|
+
self.current_reconnect_delay = reconnect_delay
|
|
60
|
+
|
|
61
|
+
# Batching configuration
|
|
62
|
+
self.batch_size = batch_size
|
|
63
|
+
self.batch_timeout = batch_timeout
|
|
64
|
+
|
|
65
|
+
# State
|
|
66
|
+
self._initialized = False
|
|
67
|
+
self._connected = False
|
|
68
|
+
self._reconnect_task: Optional[asyncio.Task] = None
|
|
69
|
+
self._event_batch: List[Event] = []
|
|
70
|
+
self._batch_timer: Optional[asyncio.Task] = None
|
|
71
|
+
|
|
72
|
+
# Metrics
|
|
73
|
+
self._metrics = {
|
|
74
|
+
"events_received": 0,
|
|
75
|
+
"events_emitted": 0,
|
|
76
|
+
"events_failed": 0,
|
|
77
|
+
"connection_failures": 0,
|
|
78
|
+
"reconnections": 0,
|
|
79
|
+
"last_emit_time": None,
|
|
80
|
+
"average_emit_time_ms": 0,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# Consumer configuration
|
|
84
|
+
self._config = ConsumerConfig(
|
|
85
|
+
name="SocketIOConsumer",
|
|
86
|
+
topics=["**"], # Subscribe to all events
|
|
87
|
+
priority=ConsumerPriority.HIGH,
|
|
88
|
+
batch_size=batch_size,
|
|
89
|
+
batch_timeout=batch_timeout,
|
|
90
|
+
max_retries=3,
|
|
91
|
+
retry_backoff=2.0,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
async def initialize(self) -> bool:
|
|
95
|
+
"""Initialize the Socket.IO consumer."""
|
|
96
|
+
if self._initialized:
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
self.logger.info("Initializing Socket.IO consumer")
|
|
100
|
+
|
|
101
|
+
# Try to import socketio if not provided
|
|
102
|
+
if self.socketio_server is None:
|
|
103
|
+
try:
|
|
104
|
+
# Try to get existing server instance
|
|
105
|
+
from claude_mpm.services.socketio.server import get_socketio_server
|
|
106
|
+
self.socketio_server = get_socketio_server()
|
|
107
|
+
|
|
108
|
+
if self.socketio_server:
|
|
109
|
+
self._connected = True
|
|
110
|
+
self.logger.info("Connected to existing Socket.IO server")
|
|
111
|
+
|
|
112
|
+
except ImportError:
|
|
113
|
+
self.logger.warning("Socket.IO server not available")
|
|
114
|
+
# Continue without Socket.IO - events will be dropped
|
|
115
|
+
|
|
116
|
+
self._initialized = True
|
|
117
|
+
|
|
118
|
+
# Start reconnection task if not connected
|
|
119
|
+
if not self._connected and self.socketio_server:
|
|
120
|
+
self._reconnect_task = asyncio.create_task(self._reconnect_loop())
|
|
121
|
+
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
async def consume(self, event: Event) -> bool:
|
|
125
|
+
"""
|
|
126
|
+
Process a single event.
|
|
127
|
+
|
|
128
|
+
Events are batched for efficiency and emitted via Socket.IO.
|
|
129
|
+
"""
|
|
130
|
+
if not self._initialized:
|
|
131
|
+
self.logger.warning("Consumer not initialized")
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
self._metrics["events_received"] += 1
|
|
135
|
+
|
|
136
|
+
# Add to batch
|
|
137
|
+
self._event_batch.append(event)
|
|
138
|
+
|
|
139
|
+
# Process batch if full
|
|
140
|
+
if len(self._event_batch) >= self.batch_size:
|
|
141
|
+
return await self._flush_batch()
|
|
142
|
+
|
|
143
|
+
# Start batch timer if not running
|
|
144
|
+
if self._batch_timer is None or self._batch_timer.done():
|
|
145
|
+
self._batch_timer = asyncio.create_task(self._batch_timeout_handler())
|
|
146
|
+
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
async def consume_batch(self, events: List[Event]) -> int:
|
|
150
|
+
"""Process multiple events in a batch."""
|
|
151
|
+
if not self._initialized:
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
successful = 0
|
|
155
|
+
for event in events:
|
|
156
|
+
if await self.consume(event):
|
|
157
|
+
successful += 1
|
|
158
|
+
|
|
159
|
+
return successful
|
|
160
|
+
|
|
161
|
+
async def shutdown(self) -> None:
|
|
162
|
+
"""Shutdown the consumer gracefully."""
|
|
163
|
+
self.logger.info("Shutting down Socket.IO consumer")
|
|
164
|
+
|
|
165
|
+
# Cancel reconnection task
|
|
166
|
+
if self._reconnect_task:
|
|
167
|
+
self._reconnect_task.cancel()
|
|
168
|
+
try:
|
|
169
|
+
await self._reconnect_task
|
|
170
|
+
except asyncio.CancelledError:
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
# Cancel batch timer
|
|
174
|
+
if self._batch_timer:
|
|
175
|
+
self._batch_timer.cancel()
|
|
176
|
+
try:
|
|
177
|
+
await self._batch_timer
|
|
178
|
+
except asyncio.CancelledError:
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
# Flush remaining events
|
|
182
|
+
if self._event_batch:
|
|
183
|
+
await self._flush_batch()
|
|
184
|
+
|
|
185
|
+
self._initialized = False
|
|
186
|
+
self._connected = False
|
|
187
|
+
|
|
188
|
+
self.logger.info("Socket.IO consumer shutdown complete")
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def config(self) -> ConsumerConfig:
|
|
192
|
+
"""Get consumer configuration."""
|
|
193
|
+
return self._config
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def is_healthy(self) -> bool:
|
|
197
|
+
"""Check if consumer is healthy."""
|
|
198
|
+
# Healthy if initialized and either connected or attempting to reconnect
|
|
199
|
+
return self._initialized and (self._connected or self._reconnect_task is not None)
|
|
200
|
+
|
|
201
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
202
|
+
"""Get consumer metrics."""
|
|
203
|
+
return {
|
|
204
|
+
**self._metrics,
|
|
205
|
+
"connected": self._connected,
|
|
206
|
+
"batch_size": len(self._event_batch),
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async def _flush_batch(self) -> bool:
|
|
210
|
+
"""
|
|
211
|
+
Flush the current batch of events to Socket.IO.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
True if all events emitted successfully
|
|
215
|
+
"""
|
|
216
|
+
if not self._event_batch:
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
batch = self._event_batch
|
|
220
|
+
self._event_batch = []
|
|
221
|
+
|
|
222
|
+
# Cancel batch timer
|
|
223
|
+
if self._batch_timer:
|
|
224
|
+
self._batch_timer.cancel()
|
|
225
|
+
self._batch_timer = None
|
|
226
|
+
|
|
227
|
+
# Emit events
|
|
228
|
+
success = await self._emit_events(batch)
|
|
229
|
+
|
|
230
|
+
if not success:
|
|
231
|
+
# Re-queue failed events
|
|
232
|
+
self._event_batch = batch + self._event_batch
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
return True
|
|
236
|
+
|
|
237
|
+
async def _emit_events(self, events: List[Event]) -> bool:
|
|
238
|
+
"""
|
|
239
|
+
Emit events via Socket.IO.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
events: Events to emit
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
True if all events emitted successfully
|
|
246
|
+
"""
|
|
247
|
+
if not self._connected or not self.socketio_server:
|
|
248
|
+
self.logger.debug(f"Cannot emit {len(events)} events - not connected")
|
|
249
|
+
self._metrics["events_failed"] += len(events)
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
try:
|
|
253
|
+
start_time = time.time()
|
|
254
|
+
|
|
255
|
+
for event in events:
|
|
256
|
+
# Convert event to Socket.IO format
|
|
257
|
+
socketio_event = self._convert_to_socketio(event)
|
|
258
|
+
|
|
259
|
+
# Emit event
|
|
260
|
+
await self.socketio_server.emit(
|
|
261
|
+
socketio_event["event"],
|
|
262
|
+
socketio_event["data"],
|
|
263
|
+
namespace=socketio_event.get("namespace", "/")
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
self._metrics["events_emitted"] += 1
|
|
267
|
+
|
|
268
|
+
# Update metrics
|
|
269
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
270
|
+
self._metrics["last_emit_time"] = time.time()
|
|
271
|
+
|
|
272
|
+
# Update rolling average
|
|
273
|
+
avg = self._metrics["average_emit_time_ms"]
|
|
274
|
+
self._metrics["average_emit_time_ms"] = (avg * 0.9) + (elapsed_ms * 0.1)
|
|
275
|
+
|
|
276
|
+
self.logger.debug(
|
|
277
|
+
f"Emitted {len(events)} events in {elapsed_ms:.1f}ms "
|
|
278
|
+
f"(avg: {self._metrics['average_emit_time_ms']:.1f}ms)"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Reset reconnect delay on success
|
|
282
|
+
self.current_reconnect_delay = self.reconnect_delay
|
|
283
|
+
|
|
284
|
+
return True
|
|
285
|
+
|
|
286
|
+
except Exception as e:
|
|
287
|
+
self.logger.error(f"Error emitting events: {e}")
|
|
288
|
+
self._metrics["events_failed"] += len(events)
|
|
289
|
+
self._metrics["connection_failures"] += 1
|
|
290
|
+
self._connected = False
|
|
291
|
+
|
|
292
|
+
# Start reconnection
|
|
293
|
+
if self._reconnect_task is None or self._reconnect_task.done():
|
|
294
|
+
self._reconnect_task = asyncio.create_task(self._reconnect_loop())
|
|
295
|
+
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
def _convert_to_socketio(self, event: Event) -> Dict[str, Any]:
|
|
299
|
+
"""
|
|
300
|
+
Convert an Event to Socket.IO format.
|
|
301
|
+
|
|
302
|
+
Maps our standard event format to Socket.IO's expected format.
|
|
303
|
+
"""
|
|
304
|
+
# Determine Socket.IO event name based on topic
|
|
305
|
+
if event.topic.startswith("hook."):
|
|
306
|
+
socketio_event = "hook_event"
|
|
307
|
+
elif event.topic.startswith("cli."):
|
|
308
|
+
socketio_event = "cli_event"
|
|
309
|
+
elif event.topic.startswith("system."):
|
|
310
|
+
socketio_event = "system_event"
|
|
311
|
+
elif event.topic.startswith("agent."):
|
|
312
|
+
socketio_event = "agent_event"
|
|
313
|
+
elif event.topic.startswith("build."):
|
|
314
|
+
socketio_event = "build_event"
|
|
315
|
+
else:
|
|
316
|
+
socketio_event = "claude_event"
|
|
317
|
+
|
|
318
|
+
# Build Socket.IO data
|
|
319
|
+
return {
|
|
320
|
+
"event": socketio_event,
|
|
321
|
+
"data": {
|
|
322
|
+
"id": event.id,
|
|
323
|
+
"type": event.type,
|
|
324
|
+
"topic": event.topic,
|
|
325
|
+
"timestamp": event.timestamp.isoformat(),
|
|
326
|
+
"source": event.source,
|
|
327
|
+
"data": event.data,
|
|
328
|
+
"correlation_id": event.correlation_id,
|
|
329
|
+
},
|
|
330
|
+
"namespace": "/",
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async def _batch_timeout_handler(self) -> None:
|
|
334
|
+
"""Handle batch timeout - flush partial batch."""
|
|
335
|
+
await asyncio.sleep(self.batch_timeout)
|
|
336
|
+
|
|
337
|
+
if self._event_batch:
|
|
338
|
+
self.logger.debug(f"Batch timeout - flushing {len(self._event_batch)} events")
|
|
339
|
+
await self._flush_batch()
|
|
340
|
+
|
|
341
|
+
async def _reconnect_loop(self) -> None:
|
|
342
|
+
"""
|
|
343
|
+
Reconnection loop with exponential backoff.
|
|
344
|
+
|
|
345
|
+
Attempts to reconnect to Socket.IO server when connection is lost.
|
|
346
|
+
"""
|
|
347
|
+
while not self._connected and self._initialized:
|
|
348
|
+
try:
|
|
349
|
+
self.logger.info(
|
|
350
|
+
f"Attempting to reconnect to Socket.IO "
|
|
351
|
+
f"(delay: {self.current_reconnect_delay}s)"
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Try to reconnect
|
|
355
|
+
if self.socketio_server:
|
|
356
|
+
# Test connection
|
|
357
|
+
# In real implementation, would check server status
|
|
358
|
+
self._connected = True
|
|
359
|
+
self._metrics["reconnections"] += 1
|
|
360
|
+
self.logger.info("Reconnected to Socket.IO server")
|
|
361
|
+
|
|
362
|
+
# Reset delay
|
|
363
|
+
self.current_reconnect_delay = self.reconnect_delay
|
|
364
|
+
break
|
|
365
|
+
|
|
366
|
+
except Exception as e:
|
|
367
|
+
self.logger.error(f"Reconnection failed: {e}")
|
|
368
|
+
|
|
369
|
+
# Wait before next attempt
|
|
370
|
+
await asyncio.sleep(self.current_reconnect_delay)
|
|
371
|
+
|
|
372
|
+
# Exponential backoff
|
|
373
|
+
self.current_reconnect_delay = min(
|
|
374
|
+
self.current_reconnect_delay * 2,
|
|
375
|
+
self.max_reconnect_delay
|
|
376
|
+
)
|