claude-mpm 4.0.32__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/INSTRUCTIONS.md +70 -2
  3. claude_mpm/agents/OUTPUT_STYLE.md +0 -11
  4. claude_mpm/agents/WORKFLOW.md +14 -2
  5. claude_mpm/agents/templates/documentation.json +51 -34
  6. claude_mpm/agents/templates/research.json +0 -11
  7. claude_mpm/cli/__init__.py +111 -33
  8. claude_mpm/cli/commands/agent_manager.py +10 -8
  9. claude_mpm/cli/commands/agents.py +82 -0
  10. claude_mpm/cli/commands/cleanup_orphaned_agents.py +150 -0
  11. claude_mpm/cli/commands/mcp_pipx_config.py +199 -0
  12. claude_mpm/cli/parsers/agents_parser.py +27 -0
  13. claude_mpm/cli/parsers/base_parser.py +6 -0
  14. claude_mpm/cli/startup_logging.py +75 -0
  15. claude_mpm/core/framework_loader.py +173 -84
  16. claude_mpm/dashboard/static/css/dashboard.css +449 -0
  17. claude_mpm/dashboard/static/dist/components/agent-inference.js +1 -1
  18. claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
  19. claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +1 -1
  20. claude_mpm/dashboard/static/dist/components/module-viewer.js +1 -1
  21. claude_mpm/dashboard/static/dist/components/session-manager.js +1 -1
  22. claude_mpm/dashboard/static/dist/dashboard.js +1 -1
  23. claude_mpm/dashboard/static/dist/socket-client.js +1 -1
  24. claude_mpm/dashboard/static/js/components/agent-hierarchy.js +774 -0
  25. claude_mpm/dashboard/static/js/components/agent-inference.js +257 -3
  26. claude_mpm/dashboard/static/js/components/build-tracker.js +323 -0
  27. claude_mpm/dashboard/static/js/components/event-viewer.js +168 -39
  28. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +17 -0
  29. claude_mpm/dashboard/static/js/components/session-manager.js +23 -3
  30. claude_mpm/dashboard/static/js/components/socket-manager.js +2 -0
  31. claude_mpm/dashboard/static/js/dashboard.js +207 -31
  32. claude_mpm/dashboard/static/js/socket-client.js +92 -11
  33. claude_mpm/dashboard/templates/index.html +1 -0
  34. claude_mpm/hooks/claude_hooks/connection_pool.py +25 -4
  35. claude_mpm/hooks/claude_hooks/event_handlers.py +81 -19
  36. claude_mpm/hooks/claude_hooks/hook_handler.py +125 -163
  37. claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +398 -0
  38. claude_mpm/hooks/claude_hooks/response_tracking.py +10 -0
  39. claude_mpm/services/agents/deployment/agent_deployment.py +34 -48
  40. claude_mpm/services/agents/deployment/agent_discovery_service.py +4 -1
  41. claude_mpm/services/agents/deployment/agent_template_builder.py +20 -11
  42. claude_mpm/services/agents/deployment/agent_version_manager.py +4 -1
  43. claude_mpm/services/agents/deployment/agents_directory_resolver.py +10 -25
  44. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +396 -13
  45. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +3 -2
  46. claude_mpm/services/agents/deployment/strategies/system_strategy.py +10 -3
  47. claude_mpm/services/agents/deployment/strategies/user_strategy.py +10 -14
  48. claude_mpm/services/agents/deployment/system_instructions_deployer.py +8 -85
  49. claude_mpm/services/agents/memory/content_manager.py +98 -105
  50. claude_mpm/services/event_bus/__init__.py +18 -0
  51. claude_mpm/services/event_bus/config.py +165 -0
  52. claude_mpm/services/event_bus/event_bus.py +349 -0
  53. claude_mpm/services/event_bus/relay.py +297 -0
  54. claude_mpm/services/events/__init__.py +44 -0
  55. claude_mpm/services/events/consumers/__init__.py +18 -0
  56. claude_mpm/services/events/consumers/dead_letter.py +296 -0
  57. claude_mpm/services/events/consumers/logging.py +183 -0
  58. claude_mpm/services/events/consumers/metrics.py +242 -0
  59. claude_mpm/services/events/consumers/socketio.py +376 -0
  60. claude_mpm/services/events/core.py +470 -0
  61. claude_mpm/services/events/interfaces.py +230 -0
  62. claude_mpm/services/events/producers/__init__.py +14 -0
  63. claude_mpm/services/events/producers/hook.py +269 -0
  64. claude_mpm/services/events/producers/system.py +327 -0
  65. claude_mpm/services/mcp_gateway/auto_configure.py +372 -0
  66. claude_mpm/services/mcp_gateway/core/process_pool.py +411 -0
  67. claude_mpm/services/mcp_gateway/server/stdio_server.py +13 -0
  68. claude_mpm/services/monitor_build_service.py +345 -0
  69. claude_mpm/services/socketio/event_normalizer.py +667 -0
  70. claude_mpm/services/socketio/handlers/connection.py +81 -23
  71. claude_mpm/services/socketio/handlers/hook.py +14 -5
  72. claude_mpm/services/socketio/migration_utils.py +329 -0
  73. claude_mpm/services/socketio/server/broadcaster.py +26 -33
  74. claude_mpm/services/socketio/server/core.py +29 -5
  75. claude_mpm/services/socketio/server/eventbus_integration.py +189 -0
  76. claude_mpm/services/socketio/server/main.py +25 -0
  77. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/METADATA +28 -9
  78. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/RECORD +82 -56
  79. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/WHEEL +0 -0
  80. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/entry_points.txt +0 -0
  81. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/licenses/LICENSE +0 -0
  82. {claude_mpm-4.0.32.dist-info → claude_mpm-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,667 @@
1
+ """
2
+ Event normalizer for Socket.IO events in claude-mpm.
3
+
4
+ WHY: The system currently has inconsistent event formats across different components.
5
+ This normalizer ensures all events follow a consistent schema before broadcasting,
6
+ providing backward compatibility while establishing a standard format.
7
+
8
+ DESIGN DECISION: Transform all events to a consistent schema:
9
+ - event: Socket.IO event name (always "claude_event")
10
+ - type: Main category (hook, system, session, file, connection)
11
+ - subtype: Specific event type (pre_tool, heartbeat, started, etc.)
12
+ - timestamp: ISO format timestamp
13
+ - data: Raw event payload
14
+ """
15
+
16
+ import re
17
+ from datetime import datetime
18
+ from typing import Any, Dict, Optional, Tuple
19
+ from dataclasses import dataclass, field
20
+ from enum import Enum
21
+
22
+ from ...core.logging_config import get_logger
23
+
24
+
25
+ class EventSource(Enum):
26
+ """Event sources.
27
+
28
+ WHY: Identifying where events come from helps with debugging,
29
+ filtering, and understanding system behavior.
30
+ """
31
+ HOOK = "hook" # Events from Claude Code hooks
32
+ DASHBOARD = "dashboard" # Events from dashboard UI
33
+ SYSTEM = "system" # System/server operations
34
+ AGENT = "agent" # Agent operations
35
+ CLI = "cli" # CLI commands
36
+ API = "api" # API calls
37
+ TEST = "test" # Test scripts
38
+
39
+
40
+ class EventType(Enum):
41
+ """Main event categories.
42
+
43
+ WHY: Categorizing events helps with filtering, routing, and understanding
44
+ the system's behavior at a high level.
45
+ """
46
+ HOOK = "hook" # Claude Code hook events
47
+ SYSTEM = "system" # System health and status events
48
+ SESSION = "session" # Session lifecycle events
49
+ FILE = "file" # File system events
50
+ CONNECTION = "connection" # Client connection events
51
+ MEMORY = "memory" # Memory system events
52
+ GIT = "git" # Git operation events
53
+ TODO = "todo" # Todo list updates
54
+ TICKET = "ticket" # Ticket system events
55
+ AGENT = "agent" # Agent delegation events
56
+ ERROR = "error" # Error events
57
+ PERFORMANCE = "performance" # Performance metrics
58
+ CLAUDE = "claude" # Claude process events
59
+ TEST = "test" # Test events
60
+ TOOL = "tool" # Tool events
61
+ SUBAGENT = "subagent" # Subagent events
62
+
63
+
64
+ @dataclass
65
+ class NormalizedEvent:
66
+ """Represents a normalized event with consistent structure.
67
+
68
+ WHY: Using a dataclass ensures type safety and makes the event
69
+ structure explicit and self-documenting.
70
+ """
71
+ event: str = "claude_event" # Socket.IO event name
72
+ source: str = "" # WHERE the event comes from
73
+ type: str = "" # WHAT category of event
74
+ subtype: str = "" # Specific event type
75
+ timestamp: str = "" # ISO format timestamp
76
+ data: Dict[str, Any] = field(default_factory=dict) # Event payload
77
+
78
+ def to_dict(self) -> Dict[str, Any]:
79
+ """Convert to dictionary for emission."""
80
+ return {
81
+ "event": self.event,
82
+ "source": self.source,
83
+ "type": self.type,
84
+ "subtype": self.subtype,
85
+ "timestamp": self.timestamp,
86
+ "data": self.data
87
+ }
88
+
89
+
90
+ class EventNormalizer:
91
+ """Normalizes events to a consistent schema.
92
+
93
+ WHY: This class handles the transformation of various event formats
94
+ into a single, consistent schema that clients can reliably parse.
95
+ """
96
+
97
+ # Mapping of event names to (type, subtype) tuples
98
+ EVENT_MAPPINGS = {
99
+ # Hook events
100
+ "pre_tool": (EventType.HOOK, "pre_tool"),
101
+ "post_tool": (EventType.HOOK, "post_tool"),
102
+ "pre_response": (EventType.HOOK, "pre_response"),
103
+ "post_response": (EventType.HOOK, "post_response"),
104
+ "hook_event": (EventType.HOOK, "generic"),
105
+ "UserPrompt": (EventType.HOOK, "user_prompt"), # Legacy format
106
+
107
+ # Test events (legacy format)
108
+ "TestStart": (EventType.TEST, "start"),
109
+ "TestEnd": (EventType.TEST, "end"),
110
+
111
+ # Tool events (legacy format)
112
+ "ToolCall": (EventType.TOOL, "call"),
113
+
114
+ # Subagent events (legacy format)
115
+ "SubagentStart": (EventType.SUBAGENT, "start"),
116
+ "SubagentStop": (EventType.SUBAGENT, "stop"),
117
+
118
+ # System events
119
+ "heartbeat": (EventType.SYSTEM, "heartbeat"),
120
+ "system_status": (EventType.SYSTEM, "status"),
121
+ "system_event": (EventType.SYSTEM, "generic"),
122
+
123
+ # Session events
124
+ "session_started": (EventType.SESSION, "started"),
125
+ "session_ended": (EventType.SESSION, "ended"),
126
+ "session_event": (EventType.SESSION, "generic"),
127
+
128
+ # File events
129
+ "file_changed": (EventType.FILE, "changed"),
130
+ "file_created": (EventType.FILE, "created"),
131
+ "file_deleted": (EventType.FILE, "deleted"),
132
+ "file_event": (EventType.FILE, "generic"),
133
+
134
+ # Connection events
135
+ "client_connected": (EventType.CONNECTION, "connected"),
136
+ "client_disconnected": (EventType.CONNECTION, "disconnected"),
137
+ "connection_event": (EventType.CONNECTION, "generic"),
138
+
139
+ # Memory events
140
+ "memory_loaded": (EventType.MEMORY, "loaded"),
141
+ "memory_created": (EventType.MEMORY, "created"),
142
+ "memory_updated": (EventType.MEMORY, "updated"),
143
+ "memory_injected": (EventType.MEMORY, "injected"),
144
+ "memory_event": (EventType.MEMORY, "generic"),
145
+
146
+ # Git events
147
+ "git_operation": (EventType.GIT, "operation"),
148
+ "git_commit": (EventType.GIT, "commit"),
149
+ "git_push": (EventType.GIT, "push"),
150
+ "git_pull": (EventType.GIT, "pull"),
151
+
152
+ # Todo events
153
+ "todo_updated": (EventType.TODO, "updated"),
154
+ "todo_created": (EventType.TODO, "created"),
155
+ "todo_completed": (EventType.TODO, "completed"),
156
+
157
+ # Ticket events
158
+ "ticket_created": (EventType.TICKET, "created"),
159
+ "ticket_updated": (EventType.TICKET, "updated"),
160
+ "ticket_closed": (EventType.TICKET, "closed"),
161
+
162
+ # Agent events
163
+ "agent_delegated": (EventType.AGENT, "delegated"),
164
+ "agent_completed": (EventType.AGENT, "completed"),
165
+
166
+ # Claude events
167
+ "claude_status": (EventType.CLAUDE, "status"),
168
+ "claude_output": (EventType.CLAUDE, "output"),
169
+ "claude_started": (EventType.CLAUDE, "started"),
170
+ "claude_stopped": (EventType.CLAUDE, "stopped"),
171
+
172
+ # Error events
173
+ "error": (EventType.ERROR, "general"),
174
+ "error_occurred": (EventType.ERROR, "occurred"),
175
+
176
+ # Performance events
177
+ "performance": (EventType.PERFORMANCE, "metric"),
178
+ "performance_metric": (EventType.PERFORMANCE, "metric"),
179
+ }
180
+
181
+ # Patterns to extract event type from various formats
182
+ TYPE_PATTERNS = [
183
+ # Pattern 1: event_type field
184
+ (r'"event_type"\s*:\s*"([^"]+)"', lambda m: m.group(1)),
185
+ # Pattern 2: type field
186
+ (r'"type"\s*:\s*"([^"]+)"', lambda m: m.group(1)),
187
+ # Pattern 3: event field
188
+ (r'"event"\s*:\s*"([^"]+)"', lambda m: m.group(1)),
189
+ # Pattern 4: Hook format (hook:event_name)
190
+ (r'"hook"\s*:\s*"([^"]+)"', lambda m: f"hook_{m.group(1)}"),
191
+ ]
192
+
193
+ def __init__(self):
194
+ self.logger = get_logger(self.__class__.__name__)
195
+ self.stats = {
196
+ "normalized": 0,
197
+ "already_normalized": 0,
198
+ "unknown_format": 0,
199
+ "errors": 0
200
+ }
201
+
202
+ def normalize(self, event_data: Any, source: str = None) -> NormalizedEvent:
203
+ """Normalize an event to the standard schema.
204
+
205
+ WHY: This method handles various input formats and transforms them
206
+ into a consistent structure that all clients can understand.
207
+
208
+ Args:
209
+ event_data: The event data in any supported format
210
+ source: Optional source override (e.g., "hook", "dashboard", "test")
211
+
212
+ Returns:
213
+ NormalizedEvent with consistent structure
214
+ """
215
+ try:
216
+ # If already normalized (has all required fields), validate and return
217
+ if self._is_normalized(event_data):
218
+ self.stats["already_normalized"] += 1
219
+ return self._validate_normalized(event_data)
220
+
221
+ # Extract event information from various formats
222
+ event_type, subtype, data = self._extract_event_info(event_data)
223
+
224
+ # Determine event source
225
+ event_source = self._determine_source(event_data, event_type, source)
226
+
227
+ # Get or generate timestamp
228
+ timestamp = self._extract_timestamp(event_data)
229
+
230
+ # Create normalized event
231
+ normalized = NormalizedEvent(
232
+ event="claude_event",
233
+ source=event_source,
234
+ type=event_type,
235
+ subtype=subtype,
236
+ timestamp=timestamp,
237
+ data=data
238
+ )
239
+
240
+ self.stats["normalized"] += 1
241
+ self.logger.debug(f"Normalized event: {event_type}/{subtype}")
242
+
243
+ return normalized
244
+
245
+ except Exception as e:
246
+ self.stats["errors"] += 1
247
+ self.logger.error(f"Failed to normalize event: {e}")
248
+
249
+ # Return a generic event on error
250
+ return NormalizedEvent(
251
+ event="claude_event",
252
+ source="system",
253
+ type="unknown",
254
+ subtype="error",
255
+ timestamp=datetime.now().isoformat(),
256
+ data={"original": str(event_data), "error": str(e)}
257
+ )
258
+
259
+ def _is_normalized(self, event_data: Any) -> bool:
260
+ """Check if event is already in normalized format.
261
+
262
+ WHY: Avoid double-normalization and preserve already correct events.
263
+ """
264
+ if not isinstance(event_data, dict):
265
+ return False
266
+
267
+ # Check for normalized format (must have source, type, subtype, timestamp, and data)
268
+ required_fields = {"source", "type", "subtype", "timestamp", "data"}
269
+ return all(field in event_data for field in required_fields)
270
+
271
+ def _validate_normalized(self, event_data: Dict[str, Any]) -> NormalizedEvent:
272
+ """Validate and convert an already normalized event.
273
+
274
+ WHY: Ensure even pre-normalized events are valid and properly typed.
275
+ """
276
+ # Map source if it's a known indicator
277
+ source = event_data.get("source", "system")
278
+ if source == "claude_hooks":
279
+ source = EventSource.HOOK.value
280
+ elif source not in [e.value for e in EventSource]:
281
+ # If source is not a valid EventSource value, keep it as-is
282
+ pass
283
+
284
+ return NormalizedEvent(
285
+ event="claude_event", # Always use standard event name
286
+ source=source,
287
+ type=event_data.get("type", "unknown"),
288
+ subtype=event_data.get("subtype", "generic"),
289
+ timestamp=event_data.get("timestamp", datetime.now().isoformat()),
290
+ data=event_data.get("data", {})
291
+ )
292
+
293
+ def _extract_event_info(self, event_data: Any) -> Tuple[str, str, Dict[str, Any]]:
294
+ """Extract event type, subtype, and data from various formats.
295
+
296
+ WHY: The system has multiple event formats that need to be handled:
297
+ - Simple strings (event names)
298
+ - Dictionaries with type field
299
+ - Hook events with special structure
300
+ - Legacy formats
301
+ """
302
+ # Handle string events (just event name)
303
+ if isinstance(event_data, str):
304
+ event_type, subtype = self._map_event_name(event_data)
305
+ return event_type, subtype, {"event_name": event_data}
306
+
307
+ # Handle dictionary events
308
+ if isinstance(event_data, dict):
309
+ # Special case: type="hook" with event field (legacy hook format)
310
+ if event_data.get("type") == "hook" and "event" in event_data:
311
+ event_type = "hook"
312
+ subtype = event_data["event"]
313
+ data = self._extract_data_payload(event_data)
314
+ return event_type, subtype, data
315
+
316
+ # Try to extract event name/type
317
+ event_name = self._extract_event_name(event_data)
318
+
319
+ # Map to type and subtype
320
+ event_type, subtype = self._map_event_name(event_name)
321
+
322
+ # Extract data payload
323
+ data = self._extract_data_payload(event_data)
324
+
325
+ return event_type, subtype, data
326
+
327
+ # Unknown format
328
+ self.stats["unknown_format"] += 1
329
+ return "unknown", "generic", {"original": str(event_data)}
330
+
331
+ def _extract_event_name(self, event_dict: Dict[str, Any]) -> str:
332
+ """Extract event name from dictionary.
333
+
334
+ WHY: Events use different field names for the event identifier.
335
+ """
336
+ # Priority order for event name fields
337
+ for field in ["event_type", "type", "event", "hook", "name"]:
338
+ if field in event_dict:
339
+ value = event_dict[field]
340
+ if isinstance(value, str):
341
+ return value
342
+
343
+ # Try to extract from JSON string representation
344
+ event_str = str(event_dict)
345
+ for pattern, extractor in self.TYPE_PATTERNS:
346
+ match = re.search(pattern, event_str)
347
+ if match:
348
+ return extractor(match)
349
+
350
+ return "unknown"
351
+
352
+ def _map_event_name(self, event_name: str) -> Tuple[str, str]:
353
+ """Map event name to (type, subtype) tuple.
354
+
355
+ WHY: Consistent categorization helps clients filter and handle events.
356
+ """
357
+ # Direct mapping
358
+ if event_name in self.EVENT_MAPPINGS:
359
+ event_type, subtype = self.EVENT_MAPPINGS[event_name]
360
+ return event_type.value if isinstance(event_type, EventType) else event_type, subtype
361
+
362
+ # Handle dotted event names (e.g., "connection.status", "session.started")
363
+ if "." in event_name:
364
+ parts = event_name.split(".", 1)
365
+ if len(parts) == 2:
366
+ type_part, subtype_part = parts
367
+ # Map the type part to known types
368
+ type_lower = type_part.lower()
369
+ if type_lower in ["hook", "session", "file", "system", "connection",
370
+ "memory", "git", "todo", "ticket", "agent", "claude",
371
+ "error", "performance", "test", "tool", "subagent"]:
372
+ return type_lower, subtype_part
373
+
374
+ # Try to infer from event name patterns
375
+ event_lower = event_name.lower()
376
+
377
+ # Check if event name matches a known EventType value directly
378
+ for event_type_enum in EventType:
379
+ if event_lower == event_type_enum.value:
380
+ return event_type_enum.value, "generic"
381
+
382
+ # Hook events (hook_* or *_hook or hook.*)
383
+ if "hook" in event_lower:
384
+ # Handle "hook.event_name" format
385
+ if "hook." in event_lower:
386
+ # Extract the part after "hook."
387
+ parts = event_name.split(".", 1)
388
+ if len(parts) > 1:
389
+ return EventType.HOOK.value, parts[1]
390
+ # Handle pre_ and post_ prefixes
391
+ if event_lower.startswith("pre_"):
392
+ return EventType.HOOK.value, event_lower
393
+ elif event_lower.startswith("post_"):
394
+ return EventType.HOOK.value, event_lower
395
+ else:
396
+ return EventType.HOOK.value, "generic"
397
+
398
+ # Session events
399
+ if "session" in event_lower:
400
+ if "start" in event_lower:
401
+ return EventType.SESSION.value, "started"
402
+ elif "end" in event_lower:
403
+ return EventType.SESSION.value, "ended"
404
+ else:
405
+ return EventType.SESSION.value, "generic"
406
+
407
+ # File events
408
+ if "file" in event_lower:
409
+ if "create" in event_lower:
410
+ return EventType.FILE.value, "created"
411
+ elif "delete" in event_lower:
412
+ return EventType.FILE.value, "deleted"
413
+ elif "change" in event_lower or "modify" in event_lower:
414
+ return EventType.FILE.value, "changed"
415
+ else:
416
+ return EventType.FILE.value, "generic"
417
+
418
+ # System events
419
+ if "system" in event_lower or "heartbeat" in event_lower:
420
+ if "heartbeat" in event_lower:
421
+ return EventType.SYSTEM.value, "heartbeat"
422
+ else:
423
+ return EventType.SYSTEM.value, "status"
424
+
425
+ # Connection events
426
+ if "connect" in event_lower or "client" in event_lower:
427
+ if "disconnect" in event_lower:
428
+ return EventType.CONNECTION.value, "disconnected"
429
+ elif "connect" in event_lower:
430
+ return EventType.CONNECTION.value, "connected"
431
+ else:
432
+ return EventType.CONNECTION.value, "generic"
433
+
434
+ # Memory events
435
+ if "memory" in event_lower:
436
+ if "load" in event_lower:
437
+ return EventType.MEMORY.value, "loaded"
438
+ elif "create" in event_lower:
439
+ return EventType.MEMORY.value, "created"
440
+ elif "update" in event_lower:
441
+ return EventType.MEMORY.value, "updated"
442
+ elif "inject" in event_lower:
443
+ return EventType.MEMORY.value, "injected"
444
+ else:
445
+ return EventType.MEMORY.value, "generic"
446
+
447
+ # Default to unknown with lowercase subtype
448
+ return "unknown", event_name.lower() if event_name else ""
449
+
450
+ def _extract_data_payload(self, event_dict: Dict[str, Any]) -> Dict[str, Any]:
451
+ """Extract the data payload from an event dictionary.
452
+
453
+ WHY: Different event formats store the payload in different places.
454
+ """
455
+ # If there's a explicit data field, use it
456
+ if "data" in event_dict:
457
+ return event_dict["data"] if isinstance(event_dict["data"], dict) else {"value": event_dict["data"]}
458
+
459
+ # Otherwise, use the entire dict minus metadata fields
460
+ metadata_fields = {"event", "type", "subtype", "timestamp", "event_type", "hook"}
461
+ data = {k: v for k, v in event_dict.items() if k not in metadata_fields}
462
+
463
+ return data if data else event_dict
464
+
465
+ def _extract_timestamp(self, event_data: Any) -> str:
466
+ """Extract or generate timestamp.
467
+
468
+ WHY: Consistent timestamp format is essential for event ordering
469
+ and debugging.
470
+ """
471
+ if isinstance(event_data, dict):
472
+ # Try various timestamp field names
473
+ for field in ["timestamp", "time", "created_at", "date"]:
474
+ if field in event_data:
475
+ timestamp = event_data[field]
476
+ # Validate it's a string in ISO format
477
+ if isinstance(timestamp, str) and "T" in timestamp:
478
+ return timestamp
479
+ # Convert other formats
480
+ try:
481
+ if isinstance(timestamp, (int, float)):
482
+ return datetime.fromtimestamp(timestamp).isoformat()
483
+ except:
484
+ pass
485
+
486
+ # Generate new timestamp if not found
487
+ return datetime.now().isoformat()
488
+
489
+ def _determine_source(self, event_data: Any, event_type: str, source_override: str = None) -> str:
490
+ """Determine the source of an event.
491
+
492
+ WHY: Knowing where events originate helps with debugging,
493
+ filtering, and understanding system behavior.
494
+
495
+ Args:
496
+ event_data: The raw event data
497
+ event_type: The determined event type
498
+ source_override: Optional explicit source
499
+
500
+ Returns:
501
+ The event source as a string
502
+ """
503
+ # Use explicit source override if provided
504
+ if source_override:
505
+ return source_override
506
+
507
+ # Check if event data contains source field
508
+ if isinstance(event_data, dict):
509
+ # Direct source field
510
+ if "source" in event_data:
511
+ source = event_data["source"]
512
+ if isinstance(source, str):
513
+ # Map known source indicators to EventSource values
514
+ if source == "claude_hooks":
515
+ return EventSource.HOOK.value
516
+ # Return the source as-is if it's a valid EventSource value
517
+ valid_sources = [e.value for e in EventSource]
518
+ if source in valid_sources:
519
+ return source
520
+ # Otherwise, keep the original source value
521
+ return source
522
+
523
+ # Check for indicators of specific sources
524
+ # Test indicator - only if type is actually "test"
525
+ if event_type == "test" or (isinstance(event_data.get("type"), str) and event_data.get("type") == "test"):
526
+ return EventSource.TEST.value
527
+
528
+ # Dashboard indicator
529
+ if "dashboard" in str(event_data).lower() or "ui_action" in event_data:
530
+ return EventSource.DASHBOARD.value
531
+
532
+ # CLI indicator
533
+ if "cli" in str(event_data).lower() or "command" in event_data:
534
+ return EventSource.CLI.value
535
+
536
+ # API indicator
537
+ if "api" in str(event_data).lower() or "endpoint" in event_data:
538
+ return EventSource.API.value
539
+
540
+ # Infer from event type
541
+ if event_type == EventType.HOOK.value:
542
+ return EventSource.HOOK.value
543
+ elif event_type == EventType.TEST.value:
544
+ return EventSource.TEST.value
545
+ elif event_type in [EventType.AGENT.value, EventType.SUBAGENT.value]:
546
+ return EventSource.AGENT.value
547
+ elif event_type in [EventType.SYSTEM.value, EventType.SESSION.value,
548
+ EventType.CONNECTION.value, EventType.PERFORMANCE.value]:
549
+ return EventSource.SYSTEM.value
550
+
551
+ # Default to system source
552
+ return EventSource.SYSTEM.value
553
+
554
+ def get_stats(self) -> Dict[str, int]:
555
+ """Get normalization statistics.
556
+
557
+ WHY: Monitoring normalization helps identify problematic event sources.
558
+ """
559
+ return self.stats.copy()
560
+
561
+ def reset_stats(self):
562
+ """Reset statistics counters.
563
+
564
+ WHY: Periodic reset prevents counter overflow and enables
565
+ rate calculations.
566
+ """
567
+ self.stats = {
568
+ "normalized": 0,
569
+ "already_normalized": 0,
570
+ "unknown_format": 0,
571
+ "errors": 0
572
+ }
573
+
574
+
575
+ # Utility functions for consistent event type checking
576
+ def is_hook_event(event_data: Dict[str, Any]) -> bool:
577
+ """Check if an event is a hook event (handles both normalized and legacy formats).
578
+
579
+ WHY: Hook events can come in multiple formats and we need consistent checking
580
+ across the codebase to avoid missing events.
581
+
582
+ Args:
583
+ event_data: Event dictionary to check
584
+
585
+ Returns:
586
+ True if this is a hook event, False otherwise
587
+ """
588
+ if not isinstance(event_data, dict):
589
+ return False
590
+
591
+ event_type = event_data.get("type", "")
592
+
593
+ # Check normalized format: type="hook"
594
+ if event_type == "hook":
595
+ return True
596
+
597
+ # Check legacy format: type="hook.something"
598
+ if isinstance(event_type, str) and event_type.startswith("hook."):
599
+ return True
600
+
601
+ return False
602
+
603
+
604
+ def get_hook_event_name(event_data: Dict[str, Any]) -> str:
605
+ """Extract the hook event name from either normalized or legacy format.
606
+
607
+ WHY: Hook events store their specific name differently in normalized vs legacy
608
+ formats, and we need a consistent way to extract it.
609
+
610
+ Args:
611
+ event_data: Event dictionary containing a hook event
612
+
613
+ Returns:
614
+ The specific hook event name (e.g., "pre_tool", "user_prompt")
615
+ or empty string if not a hook event
616
+ """
617
+ if not is_hook_event(event_data):
618
+ return ""
619
+
620
+ event_type = event_data.get("type", "")
621
+ event_subtype = event_data.get("subtype", "")
622
+
623
+ # Normalized format: type="hook", subtype="pre_tool"
624
+ if event_type == "hook" and event_subtype:
625
+ return event_subtype
626
+
627
+ # Legacy format: type="hook.pre_tool"
628
+ if isinstance(event_type, str) and event_type.startswith("hook."):
629
+ return event_type[5:] # Remove "hook." prefix
630
+
631
+ # Fallback: check 'event' field (another legacy format)
632
+ return event_data.get("event", "")
633
+
634
+
635
+ def is_event_type(event_data: Dict[str, Any], type_name: str, subtype: Optional[str] = None) -> bool:
636
+ """Check if an event matches a specific type and optionally subtype.
637
+
638
+ WHY: This provides a consistent way to check event types that works with
639
+ both normalized and legacy formats.
640
+
641
+ Args:
642
+ event_data: Event dictionary to check
643
+ type_name: The type to check for (e.g., "hook", "session", "file")
644
+ subtype: Optional subtype to also check (e.g., "pre_tool", "started")
645
+
646
+ Returns:
647
+ True if the event matches the specified type (and subtype if provided)
648
+ """
649
+ if not isinstance(event_data, dict):
650
+ return False
651
+
652
+ event_type = event_data.get("type", "")
653
+ event_subtype = event_data.get("subtype", "")
654
+
655
+ # Check normalized format
656
+ if event_type == type_name:
657
+ if subtype is None:
658
+ return True
659
+ return event_subtype == subtype
660
+
661
+ # Check legacy dotted format (e.g., "hook.pre_tool")
662
+ if subtype and isinstance(event_type, str):
663
+ legacy_type = f"{type_name}.{subtype}"
664
+ if event_type == legacy_type:
665
+ return True
666
+
667
+ return False