kailash 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. kailash/__init__.py +3 -3
  2. kailash/api/custom_nodes_secure.py +3 -3
  3. kailash/api/gateway.py +1 -1
  4. kailash/api/studio.py +2 -3
  5. kailash/api/workflow_api.py +3 -4
  6. kailash/core/resilience/bulkhead.py +460 -0
  7. kailash/core/resilience/circuit_breaker.py +92 -10
  8. kailash/edge/discovery.py +86 -0
  9. kailash/mcp_server/__init__.py +334 -0
  10. kailash/mcp_server/advanced_features.py +1022 -0
  11. kailash/{mcp → mcp_server}/ai_registry_server.py +29 -4
  12. kailash/mcp_server/auth.py +789 -0
  13. kailash/mcp_server/client.py +712 -0
  14. kailash/mcp_server/discovery.py +1593 -0
  15. kailash/mcp_server/errors.py +673 -0
  16. kailash/mcp_server/oauth.py +1727 -0
  17. kailash/mcp_server/protocol.py +1126 -0
  18. kailash/mcp_server/registry_integration.py +587 -0
  19. kailash/mcp_server/server.py +1747 -0
  20. kailash/{mcp → mcp_server}/servers/ai_registry.py +2 -2
  21. kailash/mcp_server/transports.py +1169 -0
  22. kailash/mcp_server/utils/cache.py +510 -0
  23. kailash/middleware/auth/auth_manager.py +3 -3
  24. kailash/middleware/communication/api_gateway.py +2 -9
  25. kailash/middleware/communication/realtime.py +1 -1
  26. kailash/middleware/mcp/client_integration.py +1 -1
  27. kailash/middleware/mcp/enhanced_server.py +2 -2
  28. kailash/nodes/__init__.py +2 -0
  29. kailash/nodes/admin/audit_log.py +6 -6
  30. kailash/nodes/admin/permission_check.py +8 -8
  31. kailash/nodes/admin/role_management.py +32 -28
  32. kailash/nodes/admin/schema.sql +6 -1
  33. kailash/nodes/admin/schema_manager.py +13 -13
  34. kailash/nodes/admin/security_event.py +16 -20
  35. kailash/nodes/admin/tenant_isolation.py +3 -3
  36. kailash/nodes/admin/transaction_utils.py +3 -3
  37. kailash/nodes/admin/user_management.py +21 -22
  38. kailash/nodes/ai/a2a.py +11 -11
  39. kailash/nodes/ai/ai_providers.py +9 -12
  40. kailash/nodes/ai/embedding_generator.py +13 -14
  41. kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
  42. kailash/nodes/ai/iterative_llm_agent.py +3 -3
  43. kailash/nodes/ai/llm_agent.py +213 -36
  44. kailash/nodes/ai/self_organizing.py +2 -2
  45. kailash/nodes/alerts/discord.py +4 -4
  46. kailash/nodes/api/graphql.py +6 -6
  47. kailash/nodes/api/http.py +12 -17
  48. kailash/nodes/api/rate_limiting.py +4 -4
  49. kailash/nodes/api/rest.py +15 -15
  50. kailash/nodes/auth/mfa.py +3 -4
  51. kailash/nodes/auth/risk_assessment.py +2 -2
  52. kailash/nodes/auth/session_management.py +5 -5
  53. kailash/nodes/auth/sso.py +143 -0
  54. kailash/nodes/base.py +6 -2
  55. kailash/nodes/base_async.py +16 -2
  56. kailash/nodes/base_with_acl.py +2 -2
  57. kailash/nodes/cache/__init__.py +9 -0
  58. kailash/nodes/cache/cache.py +1172 -0
  59. kailash/nodes/cache/cache_invalidation.py +870 -0
  60. kailash/nodes/cache/redis_pool_manager.py +595 -0
  61. kailash/nodes/code/async_python.py +2 -1
  62. kailash/nodes/code/python.py +196 -35
  63. kailash/nodes/compliance/data_retention.py +6 -6
  64. kailash/nodes/compliance/gdpr.py +5 -5
  65. kailash/nodes/data/__init__.py +10 -0
  66. kailash/nodes/data/optimistic_locking.py +906 -0
  67. kailash/nodes/data/readers.py +8 -8
  68. kailash/nodes/data/redis.py +349 -0
  69. kailash/nodes/data/sql.py +314 -3
  70. kailash/nodes/data/streaming.py +21 -0
  71. kailash/nodes/enterprise/__init__.py +8 -0
  72. kailash/nodes/enterprise/audit_logger.py +285 -0
  73. kailash/nodes/enterprise/batch_processor.py +22 -3
  74. kailash/nodes/enterprise/data_lineage.py +1 -1
  75. kailash/nodes/enterprise/mcp_executor.py +205 -0
  76. kailash/nodes/enterprise/service_discovery.py +150 -0
  77. kailash/nodes/enterprise/tenant_assignment.py +108 -0
  78. kailash/nodes/logic/async_operations.py +2 -2
  79. kailash/nodes/logic/convergence.py +1 -1
  80. kailash/nodes/logic/operations.py +1 -1
  81. kailash/nodes/monitoring/__init__.py +11 -1
  82. kailash/nodes/monitoring/health_check.py +456 -0
  83. kailash/nodes/monitoring/log_processor.py +817 -0
  84. kailash/nodes/monitoring/metrics_collector.py +627 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +137 -11
  86. kailash/nodes/rag/advanced.py +7 -7
  87. kailash/nodes/rag/agentic.py +49 -2
  88. kailash/nodes/rag/conversational.py +3 -3
  89. kailash/nodes/rag/evaluation.py +3 -3
  90. kailash/nodes/rag/federated.py +3 -3
  91. kailash/nodes/rag/graph.py +3 -3
  92. kailash/nodes/rag/multimodal.py +3 -3
  93. kailash/nodes/rag/optimized.py +5 -5
  94. kailash/nodes/rag/privacy.py +3 -3
  95. kailash/nodes/rag/query_processing.py +6 -6
  96. kailash/nodes/rag/realtime.py +1 -1
  97. kailash/nodes/rag/registry.py +2 -6
  98. kailash/nodes/rag/router.py +1 -1
  99. kailash/nodes/rag/similarity.py +7 -7
  100. kailash/nodes/rag/strategies.py +4 -4
  101. kailash/nodes/security/abac_evaluator.py +6 -6
  102. kailash/nodes/security/behavior_analysis.py +5 -6
  103. kailash/nodes/security/credential_manager.py +1 -1
  104. kailash/nodes/security/rotating_credentials.py +11 -11
  105. kailash/nodes/security/threat_detection.py +8 -8
  106. kailash/nodes/testing/credential_testing.py +2 -2
  107. kailash/nodes/transform/processors.py +5 -5
  108. kailash/runtime/local.py +162 -14
  109. kailash/runtime/parameter_injection.py +425 -0
  110. kailash/runtime/parameter_injector.py +657 -0
  111. kailash/runtime/testing.py +2 -2
  112. kailash/testing/fixtures.py +2 -2
  113. kailash/workflow/builder.py +99 -18
  114. kailash/workflow/builder_improvements.py +207 -0
  115. kailash/workflow/input_handling.py +170 -0
  116. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/METADATA +21 -8
  117. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/RECORD +126 -101
  118. kailash/mcp/__init__.py +0 -53
  119. kailash/mcp/client.py +0 -445
  120. kailash/mcp/server.py +0 -292
  121. kailash/mcp/server_enhanced.py +0 -449
  122. kailash/mcp/utils/cache.py +0 -267
  123. /kailash/{mcp → mcp_server}/client_new.py +0 -0
  124. /kailash/{mcp → mcp_server}/utils/__init__.py +0 -0
  125. /kailash/{mcp → mcp_server}/utils/config.py +0 -0
  126. /kailash/{mcp → mcp_server}/utils/formatters.py +0 -0
  127. /kailash/{mcp → mcp_server}/utils/metrics.py +0 -0
  128. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/WHEEL +0 -0
  129. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/entry_points.txt +0 -0
  130. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/licenses/LICENSE +0 -0
  131. {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,817 @@
1
+ """Log processing node for comprehensive log analysis and management.
2
+
3
+ This module provides advanced log processing capabilities including parsing,
4
+ filtering, aggregation, pattern matching, and forwarding to various backends.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import re
10
+ import time
11
+ from datetime import UTC, datetime, timedelta
12
+ from enum import Enum
13
+ from typing import Any, Callable, Dict, List, Optional, Pattern, Union
14
+
15
+ from kailash.nodes.base import NodeParameter, register_node
16
+ from kailash.nodes.base_async import AsyncNode
17
+ from kailash.sdk_exceptions import NodeExecutionError
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LogLevel(Enum):
23
+ """Standard log levels for filtering."""
24
+
25
+ CRITICAL = 50
26
+ ERROR = 40
27
+ WARNING = 30
28
+ INFO = 20
29
+ DEBUG = 10
30
+ NOTSET = 0
31
+
32
+
33
+ class LogFormat(Enum):
34
+ """Supported log output formats."""
35
+
36
+ JSON = "json"
37
+ STRUCTURED = "structured"
38
+ RAW = "raw"
39
+ SYSLOG = "syslog"
40
+ ELK = "elk" # Elasticsearch/Logstash/Kibana format
41
+
42
+
43
+ class AggregationType(Enum):
44
+ """Types of log aggregation."""
45
+
46
+ COUNT = "count"
47
+ RATE = "rate"
48
+ UNIQUE = "unique"
49
+ TOP_VALUES = "top_values"
50
+ TIMELINE = "timeline"
51
+
52
+
53
+ @register_node()
54
+ class LogProcessorNode(AsyncNode):
55
+ """Node for processing, filtering, and analyzing logs.
56
+
57
+ This node provides comprehensive log processing capabilities including:
58
+ - Multi-format log parsing (JSON, structured text, regex patterns)
59
+ - Advanced filtering by level, timestamp, content, and custom rules
60
+ - Pattern extraction and field parsing
61
+ - Log aggregation and statistics
62
+ - Real-time alerting on log patterns
63
+ - Output formatting for various backends
64
+ - Log forwarding and streaming
65
+
66
+ Design Purpose:
67
+ - Centralized log processing for monitoring and observability
68
+ - Real-time log analysis and alerting
69
+ - Log data enrichment and transformation
70
+ - Support for various log backends and formats
71
+
72
+ Examples:
73
+ >>> # Basic log filtering and parsing
74
+ >>> processor = LogProcessorNode()
75
+ >>> result = await processor.execute(
76
+ ... logs=[
77
+ ... "2024-01-01 10:00:00 ERROR Failed to connect to database",
78
+ ... "2024-01-01 10:00:01 INFO User logged in successfully",
79
+ ... "2024-01-01 10:00:02 WARNING High memory usage detected"
80
+ ... ],
81
+ ... filters={"min_level": "WARNING"},
82
+ ... output_format="json"
83
+ ... )
84
+
85
+ >>> # Advanced pattern matching and alerting
86
+ >>> result = await processor.execute(
87
+ ... logs=log_stream,
88
+ ... patterns=[
89
+ ... {"name": "error_spike", "regex": r"ERROR.*database", "threshold": 5},
90
+ ... {"name": "auth_failure", "regex": r"authentication.*failed", "threshold": 3}
91
+ ... ],
92
+ ... aggregation={"type": "timeline", "interval": 60}
93
+ ... )
94
+ """
95
+
96
+ def __init__(self, **kwargs):
97
+ """Initialize the log processor node."""
98
+ super().__init__(**kwargs)
99
+ self.compiled_patterns: Dict[str, Pattern] = {}
100
+ self.aggregation_buffer: List[Dict[str, Any]] = []
101
+ self.last_aggregation_time = time.time()
102
+ self.logger.info(f"Initialized LogProcessorNode: {self.id}")
103
+
104
+ def get_parameters(self) -> Dict[str, NodeParameter]:
105
+ """Define the parameters this node accepts."""
106
+ return {
107
+ "logs": NodeParameter(
108
+ name="logs",
109
+ type=Any,
110
+ required=True,
111
+ description="Log entries to process (string or list of strings)",
112
+ ),
113
+ "log_format": NodeParameter(
114
+ name="log_format",
115
+ type=str,
116
+ required=False,
117
+ default="auto",
118
+ description="Input log format (auto, json, structured, raw)",
119
+ ),
120
+ "filters": NodeParameter(
121
+ name="filters",
122
+ type=dict,
123
+ required=False,
124
+ default={},
125
+ description="Filtering criteria for logs",
126
+ ),
127
+ "patterns": NodeParameter(
128
+ name="patterns",
129
+ type=list,
130
+ required=False,
131
+ default=[],
132
+ description="Pattern extraction and matching rules",
133
+ ),
134
+ "aggregation": NodeParameter(
135
+ name="aggregation",
136
+ type=dict,
137
+ required=False,
138
+ default={},
139
+ description="Aggregation configuration",
140
+ ),
141
+ "output_format": NodeParameter(
142
+ name="output_format",
143
+ type=str,
144
+ required=False,
145
+ default="json",
146
+ description="Output format (json, structured, raw, syslog, elk)",
147
+ ),
148
+ "enrichment": NodeParameter(
149
+ name="enrichment",
150
+ type=dict,
151
+ required=False,
152
+ default={},
153
+ description="Log enrichment configuration",
154
+ ),
155
+ "alerts": NodeParameter(
156
+ name="alerts",
157
+ type=list,
158
+ required=False,
159
+ default=[],
160
+ description="Alert rules for pattern matching",
161
+ ),
162
+ "max_buffer_size": NodeParameter(
163
+ name="max_buffer_size",
164
+ type=int,
165
+ required=False,
166
+ default=10000,
167
+ description="Maximum number of logs to buffer",
168
+ ),
169
+ }
170
+
171
+ def get_output_schema(self) -> Dict[str, NodeParameter]:
172
+ """Define the output schema for this node."""
173
+ return {
174
+ "processed_logs": NodeParameter(
175
+ name="processed_logs",
176
+ type=list,
177
+ description="Processed and filtered log entries",
178
+ ),
179
+ "filtered_count": NodeParameter(
180
+ name="filtered_count",
181
+ type=int,
182
+ description="Number of logs that passed filters",
183
+ ),
184
+ "total_count": NodeParameter(
185
+ name="total_count",
186
+ type=int,
187
+ description="Total number of input logs",
188
+ ),
189
+ "patterns_matched": NodeParameter(
190
+ name="patterns_matched",
191
+ type=dict,
192
+ description="Pattern matching results and counts",
193
+ ),
194
+ "aggregations": NodeParameter(
195
+ name="aggregations",
196
+ type=dict,
197
+ description="Log aggregation results",
198
+ ),
199
+ "alerts_triggered": NodeParameter(
200
+ name="alerts_triggered",
201
+ type=list,
202
+ description="Alerts triggered during processing",
203
+ ),
204
+ "processing_time": NodeParameter(
205
+ name="processing_time",
206
+ type=float,
207
+ description="Time taken to process logs",
208
+ ),
209
+ "timestamp": NodeParameter(
210
+ name="timestamp",
211
+ type=str,
212
+ description="ISO timestamp of processing",
213
+ ),
214
+ }
215
+
216
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
217
+ """Process logs based on configuration."""
218
+ logs = kwargs["logs"]
219
+ log_format = kwargs.get("log_format", "auto")
220
+ filters = kwargs.get("filters", {})
221
+ patterns = kwargs.get("patterns", [])
222
+ aggregation = kwargs.get("aggregation", {})
223
+ output_format = LogFormat(kwargs.get("output_format", "json"))
224
+ enrichment = kwargs.get("enrichment", {})
225
+ alerts = kwargs.get("alerts", [])
226
+ max_buffer_size = kwargs.get("max_buffer_size", 10000)
227
+
228
+ start_time = time.time()
229
+
230
+ try:
231
+ # Validate input
232
+ if logs is None:
233
+ raise ValueError("Logs parameter cannot be None")
234
+
235
+ # Normalize input logs to list
236
+ if isinstance(logs, str):
237
+ logs = [logs]
238
+
239
+ # Validate buffer size
240
+ if len(logs) > max_buffer_size:
241
+ self.logger.warning(
242
+ f"Input logs ({len(logs)}) exceed buffer size ({max_buffer_size}), truncating"
243
+ )
244
+ logs = logs[:max_buffer_size]
245
+
246
+ # Parse logs
247
+ parsed_logs = await self._parse_logs(logs, log_format)
248
+
249
+ # Apply filters
250
+ filtered_logs = await self._filter_logs(parsed_logs, filters)
251
+
252
+ # Process patterns
253
+ pattern_results = await self._process_patterns(filtered_logs, patterns)
254
+
255
+ # Enrich logs if configured
256
+ if enrichment:
257
+ filtered_logs = await self._enrich_logs(filtered_logs, enrichment)
258
+
259
+ # Process aggregations
260
+ aggregation_results = await self._process_aggregations(
261
+ filtered_logs, aggregation
262
+ )
263
+
264
+ # Check alert rules
265
+ alerts_triggered = await self._check_alerts(
266
+ filtered_logs, alerts, pattern_results
267
+ )
268
+
269
+ # Format output
270
+ formatted_logs = await self._format_output(filtered_logs, output_format)
271
+
272
+ processing_time = time.time() - start_time
273
+
274
+ return {
275
+ "success": True,
276
+ "processed_logs": formatted_logs,
277
+ "filtered_count": len(filtered_logs),
278
+ "total_count": len(logs),
279
+ "patterns_matched": pattern_results,
280
+ "aggregations": aggregation_results,
281
+ "alerts_triggered": alerts_triggered,
282
+ "processing_time": processing_time,
283
+ "timestamp": datetime.now(UTC).isoformat(),
284
+ }
285
+
286
+ except Exception as e:
287
+ self.logger.error(f"Log processing failed: {str(e)}")
288
+ raise NodeExecutionError(f"Failed to process logs: {str(e)}")
289
+
290
+ async def _parse_logs(
291
+ self, logs: List[str], log_format: str
292
+ ) -> List[Dict[str, Any]]:
293
+ """Parse raw log entries into structured format."""
294
+ parsed_logs = []
295
+
296
+ for log_entry in logs:
297
+ try:
298
+ if log_format == "json":
299
+ parsed_log = json.loads(log_entry)
300
+ elif log_format == "auto":
301
+ # Try JSON first, then fall back to structured parsing
302
+ try:
303
+ parsed_log = json.loads(log_entry)
304
+ except json.JSONDecodeError:
305
+ parsed_log = await self._parse_structured_log(log_entry)
306
+ else:
307
+ parsed_log = await self._parse_structured_log(log_entry)
308
+
309
+ # Ensure required fields
310
+ if "timestamp" not in parsed_log:
311
+ parsed_log["timestamp"] = datetime.now(UTC).isoformat()
312
+ if "level" not in parsed_log:
313
+ parsed_log["level"] = await self._extract_log_level(log_entry)
314
+ if "message" not in parsed_log:
315
+ parsed_log["message"] = log_entry
316
+
317
+ parsed_logs.append(parsed_log)
318
+
319
+ except Exception as e:
320
+ # If parsing fails, create a minimal log entry
321
+ self.logger.debug(f"Failed to parse log entry, using raw: {str(e)}")
322
+ parsed_logs.append(
323
+ {
324
+ "timestamp": datetime.now(UTC).isoformat(),
325
+ "level": "INFO",
326
+ "message": log_entry,
327
+ "raw": True,
328
+ "parse_error": str(e),
329
+ }
330
+ )
331
+
332
+ return parsed_logs
333
+
334
+ async def _parse_structured_log(self, log_entry: str) -> Dict[str, Any]:
335
+ """Parse structured log entries using common patterns."""
336
+ # Common log patterns
337
+ patterns = [
338
+ # ISO timestamp + level + message
339
+ r"(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)\s+(?P<level>\w+)\s+(?P<message>.*)",
340
+ # Date time + level + message
341
+ r"(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(?:\.\d+)?)\s+(?P<level>\w+)\s+(?P<message>.*)",
342
+ # Syslog format
343
+ r"(?P<timestamp>\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+(?P<hostname>\S+)\s+(?P<program>\S+):\s+(?P<message>.*)",
344
+ # Simple level + message
345
+ r"(?P<level>\w+):\s+(?P<message>.*)",
346
+ ]
347
+
348
+ for pattern in patterns:
349
+ match = re.match(pattern, log_entry.strip())
350
+ if match:
351
+ return match.groupdict()
352
+
353
+ # If no pattern matches, return as raw message
354
+ return {"message": log_entry}
355
+
356
+ async def _extract_log_level(self, log_entry: str) -> str:
357
+ """Extract log level from raw log entry."""
358
+ level_patterns = {
359
+ "CRITICAL": ["critical", "fatal", "crit"],
360
+ "ERROR": ["error", "err"],
361
+ "WARNING": ["warning", "warn"],
362
+ "INFO": ["info", "information"],
363
+ "DEBUG": ["debug", "trace"],
364
+ }
365
+
366
+ log_lower = log_entry.lower()
367
+ for level, keywords in level_patterns.items():
368
+ for keyword in keywords:
369
+ if keyword in log_lower:
370
+ return level
371
+
372
+ return "INFO" # Default level
373
+
374
+ async def _filter_logs(
375
+ self, logs: List[Dict[str, Any]], filters: Dict[str, Any]
376
+ ) -> List[Dict[str, Any]]:
377
+ """Apply filtering criteria to logs."""
378
+ if not filters:
379
+ return logs
380
+
381
+ filtered_logs = []
382
+
383
+ for log_entry in logs:
384
+ # Level filtering
385
+ if "min_level" in filters:
386
+ min_level = LogLevel[filters["min_level"].upper()]
387
+ log_level = LogLevel[log_entry.get("level", "INFO").upper()]
388
+ if log_level.value < min_level.value:
389
+ continue
390
+
391
+ # Time range filtering
392
+ if "start_time" in filters or "end_time" in filters:
393
+ log_time = datetime.fromisoformat(
394
+ log_entry.get("timestamp", datetime.now(UTC).isoformat())
395
+ )
396
+
397
+ if "start_time" in filters:
398
+ start_time = datetime.fromisoformat(filters["start_time"])
399
+ if log_time < start_time:
400
+ continue
401
+
402
+ if "end_time" in filters:
403
+ end_time = datetime.fromisoformat(filters["end_time"])
404
+ if log_time > end_time:
405
+ continue
406
+
407
+ # Content filtering
408
+ if "contains" in filters:
409
+ if filters["contains"] not in log_entry.get("message", ""):
410
+ continue
411
+
412
+ if "excludes" in filters:
413
+ exclude_text = filters["excludes"]
414
+ # Check in message, level, or raw fields
415
+ if (
416
+ exclude_text in log_entry.get("message", "")
417
+ or exclude_text in log_entry.get("level", "")
418
+ or exclude_text in str(log_entry.get("raw", ""))
419
+ ):
420
+ continue
421
+
422
+ # Regex filtering
423
+ if "regex" in filters:
424
+ if not re.search(filters["regex"], log_entry.get("message", "")):
425
+ continue
426
+
427
+ # Field-based filtering
428
+ if "fields" in filters:
429
+ field_match = True
430
+ for field, value in filters["fields"].items():
431
+ if log_entry.get(field) != value:
432
+ field_match = False
433
+ break
434
+ if not field_match:
435
+ continue
436
+
437
+ filtered_logs.append(log_entry)
438
+
439
+ return filtered_logs
440
+
441
+ async def _process_patterns(
442
+ self, logs: List[Dict[str, Any]], patterns: List[Dict[str, Any]]
443
+ ) -> Dict[str, Any]:
444
+ """Process pattern matching and extraction rules."""
445
+ if not patterns:
446
+ return {}
447
+
448
+ pattern_results = {}
449
+
450
+ for pattern_config in patterns:
451
+ pattern_name = pattern_config.get("name", "unnamed")
452
+ regex_pattern = pattern_config.get("regex")
453
+ extract_fields = pattern_config.get("extract_fields", [])
454
+
455
+ if not regex_pattern:
456
+ continue
457
+
458
+ # Compile pattern if not already compiled
459
+ if pattern_name not in self.compiled_patterns:
460
+ try:
461
+ self.compiled_patterns[pattern_name] = re.compile(regex_pattern)
462
+ except re.error as e:
463
+ self.logger.warning(f"Invalid regex pattern '{pattern_name}': {e}")
464
+ continue
465
+
466
+ compiled_pattern = self.compiled_patterns[pattern_name]
467
+ matches = []
468
+ match_count = 0
469
+
470
+ for log_entry in logs:
471
+ message = log_entry.get("message", "")
472
+ level = log_entry.get("level", "")
473
+ # Search in message first, then in level + message combined
474
+ match = compiled_pattern.search(message)
475
+ if not match and level:
476
+ combined_text = f"{level} {message}"
477
+ match = compiled_pattern.search(combined_text)
478
+
479
+ if match:
480
+ match_count += 1
481
+ match_data = {
482
+ "timestamp": log_entry.get("timestamp"),
483
+ "full_match": match.group(0),
484
+ "groups": match.groups(),
485
+ "log_entry": log_entry,
486
+ }
487
+
488
+ # Extract named groups
489
+ if match.groupdict():
490
+ match_data["named_groups"] = match.groupdict()
491
+
492
+ # Extract specified fields
493
+ if extract_fields:
494
+ extracted = {}
495
+ for field in extract_fields:
496
+ if field in log_entry:
497
+ extracted[field] = log_entry[field]
498
+ match_data["extracted_fields"] = extracted
499
+
500
+ matches.append(match_data)
501
+
502
+ pattern_results[pattern_name] = {
503
+ "match_count": match_count,
504
+ "matches": matches,
505
+ "pattern": regex_pattern,
506
+ }
507
+
508
+ return pattern_results
509
+
510
+ async def _enrich_logs(
511
+ self, logs: List[Dict[str, Any]], enrichment: Dict[str, Any]
512
+ ) -> List[Dict[str, Any]]:
513
+ """Enrich logs with additional data."""
514
+ enriched_logs = []
515
+
516
+ for log_entry in logs.copy():
517
+ # Add static fields
518
+ if "static_fields" in enrichment:
519
+ log_entry.update(enrichment["static_fields"])
520
+
521
+ # Add computed fields
522
+ if "computed_fields" in enrichment:
523
+ for field_name, computation in enrichment["computed_fields"].items():
524
+ if computation["type"] == "timestamp_parse":
525
+ # Parse timestamp to components
526
+ try:
527
+ dt = datetime.fromisoformat(log_entry.get("timestamp", ""))
528
+ log_entry[field_name] = {
529
+ "year": dt.year,
530
+ "month": dt.month,
531
+ "day": dt.day,
532
+ "hour": dt.hour,
533
+ "minute": dt.minute,
534
+ "weekday": dt.strftime("%A"),
535
+ }
536
+ except Exception:
537
+ log_entry[field_name] = None
538
+
539
+ elif computation["type"] == "field_extraction":
540
+ # Extract field using regex
541
+ source_field = computation.get("source_field", "message")
542
+ pattern = computation.get("pattern")
543
+ if pattern and source_field in log_entry:
544
+ match = re.search(pattern, str(log_entry[source_field]))
545
+ if match:
546
+ log_entry[field_name] = (
547
+ match.group(1) if match.groups() else match.group(0)
548
+ )
549
+
550
+ # Add processing metadata
551
+ log_entry["_processed_at"] = datetime.now(UTC).isoformat()
552
+ log_entry["_processor_id"] = self.id
553
+
554
+ enriched_logs.append(log_entry)
555
+
556
+ return enriched_logs
557
+
558
+ async def _process_aggregations(
559
+ self, logs: List[Dict[str, Any]], aggregation: Dict[str, Any]
560
+ ) -> Dict[str, Any]:
561
+ """Process log aggregations."""
562
+ if not aggregation:
563
+ return {}
564
+
565
+ agg_type = AggregationType(aggregation.get("type", "count"))
566
+ field = aggregation.get("field", "level")
567
+ interval = aggregation.get("interval", 60) # seconds
568
+
569
+ results = {}
570
+
571
+ if agg_type == AggregationType.COUNT:
572
+ # Count by field values
573
+ counts = {}
574
+ for log_entry in logs:
575
+ value = log_entry.get(field, "unknown")
576
+ counts[value] = counts.get(value, 0) + 1
577
+ results["counts"] = counts
578
+
579
+ elif agg_type == AggregationType.RATE:
580
+ # Calculate rate over time
581
+ if logs:
582
+ time_span = (
583
+ datetime.fromisoformat(logs[-1]["timestamp"])
584
+ - datetime.fromisoformat(logs[0]["timestamp"])
585
+ ).total_seconds()
586
+ if time_span > 0:
587
+ results["rate"] = len(logs) / time_span
588
+ else:
589
+ results["rate"] = 0
590
+
591
+ elif agg_type == AggregationType.UNIQUE:
592
+ # Count unique values
593
+ unique_values = set()
594
+ for log_entry in logs:
595
+ value = log_entry.get(field)
596
+ if value is not None:
597
+ unique_values.add(str(value))
598
+ results["unique_count"] = len(unique_values)
599
+ results["unique_values"] = list(unique_values)
600
+
601
+ elif agg_type == AggregationType.TOP_VALUES:
602
+ # Top N values by count
603
+ counts = {}
604
+ for log_entry in logs:
605
+ value = log_entry.get(field, "unknown")
606
+ counts[value] = counts.get(value, 0) + 1
607
+
608
+ top_n = aggregation.get("top_n", 10)
609
+ top_values = sorted(counts.items(), key=lambda x: x[1], reverse=True)[
610
+ :top_n
611
+ ]
612
+ results["top_values"] = top_values
613
+
614
+ elif agg_type == AggregationType.TIMELINE:
615
+ # Timeline aggregation
616
+ timeline = {}
617
+ for log_entry in logs:
618
+ timestamp = datetime.fromisoformat(log_entry["timestamp"])
619
+ # Round to interval
620
+ interval_start = timestamp.replace(second=0, microsecond=0)
621
+ if interval >= 3600: # Hour intervals
622
+ interval_start = interval_start.replace(minute=0)
623
+
624
+ interval_key = interval_start.isoformat()
625
+ if interval_key not in timeline:
626
+ timeline[interval_key] = 0
627
+ timeline[interval_key] += 1
628
+
629
+ results["timeline"] = timeline
630
+
631
+ return results
632
+
633
+ async def _check_alerts(
634
+ self,
635
+ logs: List[Dict[str, Any]],
636
+ alerts: List[Dict[str, Any]],
637
+ pattern_results: Dict[str, Any],
638
+ ) -> List[Dict[str, Any]]:
639
+ """Check alert rules and trigger alerts."""
640
+ triggered_alerts = []
641
+
642
+ for alert_config in alerts:
643
+ alert_name = alert_config.get("name", "unnamed")
644
+ alert_type = alert_config.get("type", "threshold")
645
+
646
+ if alert_type == "threshold":
647
+ # Threshold-based alerts
648
+ threshold = alert_config.get("threshold", 0)
649
+ field = alert_config.get("field", "level")
650
+ condition = alert_config.get("condition", "ERROR")
651
+
652
+ count = sum(1 for log in logs if log.get(field) == condition)
653
+ if count >= threshold:
654
+ triggered_alerts.append(
655
+ {
656
+ "name": alert_name,
657
+ "type": alert_type,
658
+ "triggered_at": datetime.now(UTC).isoformat(),
659
+ "threshold": threshold,
660
+ "actual_count": count,
661
+ "condition": condition,
662
+ "severity": alert_config.get("severity", "medium"),
663
+ }
664
+ )
665
+
666
+ elif alert_type == "pattern":
667
+ # Pattern-based alerts
668
+ pattern_name = alert_config.get("pattern_name")
669
+ threshold = alert_config.get("threshold", 1)
670
+
671
+ if pattern_name in pattern_results:
672
+ match_count = pattern_results[pattern_name]["match_count"]
673
+ if match_count >= threshold:
674
+ triggered_alerts.append(
675
+ {
676
+ "name": alert_name,
677
+ "type": alert_type,
678
+ "triggered_at": datetime.now(UTC).isoformat(),
679
+ "pattern_name": pattern_name,
680
+ "threshold": threshold,
681
+ "match_count": match_count,
682
+ "severity": alert_config.get("severity", "medium"),
683
+ }
684
+ )
685
+
686
+ elif alert_type == "rate":
687
+ # Rate-based alerts
688
+ time_window = alert_config.get("time_window", 300) # 5 minutes
689
+ rate_threshold = alert_config.get(
690
+ "rate_threshold", 10
691
+ ) # logs per second
692
+
693
+ now = datetime.now(UTC)
694
+ window_start = now - timedelta(seconds=time_window)
695
+
696
+ recent_logs = [
697
+ log
698
+ for log in logs
699
+ if datetime.fromisoformat(log["timestamp"]) >= window_start
700
+ ]
701
+
702
+ if recent_logs:
703
+ rate = len(recent_logs) / time_window
704
+ if rate >= rate_threshold:
705
+ triggered_alerts.append(
706
+ {
707
+ "name": alert_name,
708
+ "type": alert_type,
709
+ "triggered_at": datetime.now(UTC).isoformat(),
710
+ "rate_threshold": rate_threshold,
711
+ "actual_rate": rate,
712
+ "time_window": time_window,
713
+ "log_count": len(recent_logs),
714
+ "severity": alert_config.get("severity", "medium"),
715
+ }
716
+ )
717
+
718
+ return triggered_alerts
719
+
720
+ async def _format_output(
721
+ self, logs: List[Dict[str, Any]], output_format: LogFormat
722
+ ) -> Union[List[Dict[str, Any]], List[str], str]:
723
+ """Format logs according to specified output format."""
724
+ if output_format == LogFormat.JSON:
725
+ return logs
726
+
727
+ elif output_format == LogFormat.RAW:
728
+ return [log.get("message", str(log)) for log in logs]
729
+
730
+ elif output_format == LogFormat.STRUCTURED:
731
+ formatted = []
732
+ for log in logs:
733
+ timestamp = log.get("timestamp", "")
734
+ level = log.get("level", "INFO")
735
+ message = log.get("message", "")
736
+ formatted.append(f"{timestamp} {level} {message}")
737
+ return formatted
738
+
739
+ elif output_format == LogFormat.SYSLOG:
740
+ formatted = []
741
+ for log in logs:
742
+ timestamp = log.get("timestamp", "")
743
+ hostname = log.get("hostname", "localhost")
744
+ program = log.get("program", "kailash")
745
+ message = log.get("message", "")
746
+ formatted.append(f"{timestamp} {hostname} {program}: {message}")
747
+ return formatted
748
+
749
+ elif output_format == LogFormat.ELK:
750
+ # Elasticsearch/Logstash/Kibana format
751
+ elk_logs = []
752
+ for log in logs:
753
+ elk_log = {
754
+ "@timestamp": log.get("timestamp"),
755
+ "@version": "1",
756
+ "message": log.get("message"),
757
+ "level": log.get("level"),
758
+ "logger_name": log.get("logger", "kailash"),
759
+ "thread_name": log.get("thread", "main"),
760
+ "fields": {
761
+ k: v
762
+ for k, v in log.items()
763
+ if k not in ["timestamp", "message", "level"]
764
+ },
765
+ }
766
+ elk_logs.append(elk_log)
767
+ return elk_logs
768
+
769
+ return logs
770
+
771
+ def run(self, **kwargs) -> Dict[str, Any]:
772
+ """Synchronous wrapper for compatibility."""
773
+ import asyncio
774
+
775
+ try:
776
+ # Try to get current event loop
777
+ loop = asyncio.get_running_loop()
778
+ except RuntimeError:
779
+ # No event loop running, safe to use asyncio.run()
780
+ try:
781
+ result = asyncio.run(self.async_run(**kwargs))
782
+ return result
783
+ except Exception as e:
784
+ return {
785
+ "success": False,
786
+ "error": str(e),
787
+ "processed_logs": [],
788
+ "filtered_count": 0,
789
+ "total_count": 0,
790
+ "patterns_matched": {},
791
+ "aggregations": {},
792
+ "alerts_triggered": [],
793
+ "processing_time": 0.0,
794
+ "timestamp": datetime.now(UTC).isoformat(),
795
+ }
796
+ else:
797
+ # Event loop is running, create a task
798
+ import concurrent.futures
799
+
800
+ try:
801
+ with concurrent.futures.ThreadPoolExecutor() as executor:
802
+ future = executor.submit(asyncio.run, self.async_run(**kwargs))
803
+ result = future.result()
804
+ return result
805
+ except Exception as e:
806
+ return {
807
+ "success": False,
808
+ "error": str(e),
809
+ "processed_logs": [],
810
+ "filtered_count": 0,
811
+ "total_count": 0,
812
+ "patterns_matched": {},
813
+ "aggregations": {},
814
+ "alerts_triggered": [],
815
+ "processing_time": 0.0,
816
+ "timestamp": datetime.now(UTC).isoformat(),
817
+ }