kailash 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +3 -3
- kailash/api/custom_nodes_secure.py +3 -3
- kailash/api/gateway.py +1 -1
- kailash/api/studio.py +2 -3
- kailash/api/workflow_api.py +3 -4
- kailash/core/resilience/bulkhead.py +460 -0
- kailash/core/resilience/circuit_breaker.py +92 -10
- kailash/edge/discovery.py +86 -0
- kailash/mcp_server/__init__.py +334 -0
- kailash/mcp_server/advanced_features.py +1022 -0
- kailash/{mcp → mcp_server}/ai_registry_server.py +29 -4
- kailash/mcp_server/auth.py +789 -0
- kailash/mcp_server/client.py +712 -0
- kailash/mcp_server/discovery.py +1593 -0
- kailash/mcp_server/errors.py +673 -0
- kailash/mcp_server/oauth.py +1727 -0
- kailash/mcp_server/protocol.py +1126 -0
- kailash/mcp_server/registry_integration.py +587 -0
- kailash/mcp_server/server.py +1747 -0
- kailash/{mcp → mcp_server}/servers/ai_registry.py +2 -2
- kailash/mcp_server/transports.py +1169 -0
- kailash/mcp_server/utils/cache.py +510 -0
- kailash/middleware/auth/auth_manager.py +3 -3
- kailash/middleware/communication/api_gateway.py +2 -9
- kailash/middleware/communication/realtime.py +1 -1
- kailash/middleware/mcp/client_integration.py +1 -1
- kailash/middleware/mcp/enhanced_server.py +2 -2
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/admin/audit_log.py +6 -6
- kailash/nodes/admin/permission_check.py +8 -8
- kailash/nodes/admin/role_management.py +32 -28
- kailash/nodes/admin/schema.sql +6 -1
- kailash/nodes/admin/schema_manager.py +13 -13
- kailash/nodes/admin/security_event.py +16 -20
- kailash/nodes/admin/tenant_isolation.py +3 -3
- kailash/nodes/admin/transaction_utils.py +3 -3
- kailash/nodes/admin/user_management.py +21 -22
- kailash/nodes/ai/a2a.py +11 -11
- kailash/nodes/ai/ai_providers.py +9 -12
- kailash/nodes/ai/embedding_generator.py +13 -14
- kailash/nodes/ai/intelligent_agent_orchestrator.py +19 -19
- kailash/nodes/ai/iterative_llm_agent.py +3 -3
- kailash/nodes/ai/llm_agent.py +213 -36
- kailash/nodes/ai/self_organizing.py +2 -2
- kailash/nodes/alerts/discord.py +4 -4
- kailash/nodes/api/graphql.py +6 -6
- kailash/nodes/api/http.py +12 -17
- kailash/nodes/api/rate_limiting.py +4 -4
- kailash/nodes/api/rest.py +15 -15
- kailash/nodes/auth/mfa.py +3 -4
- kailash/nodes/auth/risk_assessment.py +2 -2
- kailash/nodes/auth/session_management.py +5 -5
- kailash/nodes/auth/sso.py +143 -0
- kailash/nodes/base.py +6 -2
- kailash/nodes/base_async.py +16 -2
- kailash/nodes/base_with_acl.py +2 -2
- kailash/nodes/cache/__init__.py +9 -0
- kailash/nodes/cache/cache.py +1172 -0
- kailash/nodes/cache/cache_invalidation.py +870 -0
- kailash/nodes/cache/redis_pool_manager.py +595 -0
- kailash/nodes/code/async_python.py +2 -1
- kailash/nodes/code/python.py +196 -35
- kailash/nodes/compliance/data_retention.py +6 -6
- kailash/nodes/compliance/gdpr.py +5 -5
- kailash/nodes/data/__init__.py +10 -0
- kailash/nodes/data/optimistic_locking.py +906 -0
- kailash/nodes/data/readers.py +8 -8
- kailash/nodes/data/redis.py +349 -0
- kailash/nodes/data/sql.py +314 -3
- kailash/nodes/data/streaming.py +21 -0
- kailash/nodes/enterprise/__init__.py +8 -0
- kailash/nodes/enterprise/audit_logger.py +285 -0
- kailash/nodes/enterprise/batch_processor.py +22 -3
- kailash/nodes/enterprise/data_lineage.py +1 -1
- kailash/nodes/enterprise/mcp_executor.py +205 -0
- kailash/nodes/enterprise/service_discovery.py +150 -0
- kailash/nodes/enterprise/tenant_assignment.py +108 -0
- kailash/nodes/logic/async_operations.py +2 -2
- kailash/nodes/logic/convergence.py +1 -1
- kailash/nodes/logic/operations.py +1 -1
- kailash/nodes/monitoring/__init__.py +11 -1
- kailash/nodes/monitoring/health_check.py +456 -0
- kailash/nodes/monitoring/log_processor.py +817 -0
- kailash/nodes/monitoring/metrics_collector.py +627 -0
- kailash/nodes/monitoring/performance_benchmark.py +137 -11
- kailash/nodes/rag/advanced.py +7 -7
- kailash/nodes/rag/agentic.py +49 -2
- kailash/nodes/rag/conversational.py +3 -3
- kailash/nodes/rag/evaluation.py +3 -3
- kailash/nodes/rag/federated.py +3 -3
- kailash/nodes/rag/graph.py +3 -3
- kailash/nodes/rag/multimodal.py +3 -3
- kailash/nodes/rag/optimized.py +5 -5
- kailash/nodes/rag/privacy.py +3 -3
- kailash/nodes/rag/query_processing.py +6 -6
- kailash/nodes/rag/realtime.py +1 -1
- kailash/nodes/rag/registry.py +2 -6
- kailash/nodes/rag/router.py +1 -1
- kailash/nodes/rag/similarity.py +7 -7
- kailash/nodes/rag/strategies.py +4 -4
- kailash/nodes/security/abac_evaluator.py +6 -6
- kailash/nodes/security/behavior_analysis.py +5 -6
- kailash/nodes/security/credential_manager.py +1 -1
- kailash/nodes/security/rotating_credentials.py +11 -11
- kailash/nodes/security/threat_detection.py +8 -8
- kailash/nodes/testing/credential_testing.py +2 -2
- kailash/nodes/transform/processors.py +5 -5
- kailash/runtime/local.py +162 -14
- kailash/runtime/parameter_injection.py +425 -0
- kailash/runtime/parameter_injector.py +657 -0
- kailash/runtime/testing.py +2 -2
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +99 -18
- kailash/workflow/builder_improvements.py +207 -0
- kailash/workflow/input_handling.py +170 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/METADATA +21 -8
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/RECORD +126 -101
- kailash/mcp/__init__.py +0 -53
- kailash/mcp/client.py +0 -445
- kailash/mcp/server.py +0 -292
- kailash/mcp/server_enhanced.py +0 -449
- kailash/mcp/utils/cache.py +0 -267
- /kailash/{mcp → mcp_server}/client_new.py +0 -0
- /kailash/{mcp → mcp_server}/utils/__init__.py +0 -0
- /kailash/{mcp → mcp_server}/utils/config.py +0 -0
- /kailash/{mcp → mcp_server}/utils/formatters.py +0 -0
- /kailash/{mcp → mcp_server}/utils/metrics.py +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/WHEEL +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.2.dist-info → kailash-0.6.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,817 @@
|
|
1
|
+
"""Log processing node for comprehensive log analysis and management.
|
2
|
+
|
3
|
+
This module provides advanced log processing capabilities including parsing,
|
4
|
+
filtering, aggregation, pattern matching, and forwarding to various backends.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import json
|
8
|
+
import logging
|
9
|
+
import re
|
10
|
+
import time
|
11
|
+
from datetime import UTC, datetime, timedelta
|
12
|
+
from enum import Enum
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Pattern, Union
|
14
|
+
|
15
|
+
from kailash.nodes.base import NodeParameter, register_node
|
16
|
+
from kailash.nodes.base_async import AsyncNode
|
17
|
+
from kailash.sdk_exceptions import NodeExecutionError
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class LogLevel(Enum):
|
23
|
+
"""Standard log levels for filtering."""
|
24
|
+
|
25
|
+
CRITICAL = 50
|
26
|
+
ERROR = 40
|
27
|
+
WARNING = 30
|
28
|
+
INFO = 20
|
29
|
+
DEBUG = 10
|
30
|
+
NOTSET = 0
|
31
|
+
|
32
|
+
|
33
|
+
class LogFormat(Enum):
|
34
|
+
"""Supported log output formats."""
|
35
|
+
|
36
|
+
JSON = "json"
|
37
|
+
STRUCTURED = "structured"
|
38
|
+
RAW = "raw"
|
39
|
+
SYSLOG = "syslog"
|
40
|
+
ELK = "elk" # Elasticsearch/Logstash/Kibana format
|
41
|
+
|
42
|
+
|
43
|
+
class AggregationType(Enum):
|
44
|
+
"""Types of log aggregation."""
|
45
|
+
|
46
|
+
COUNT = "count"
|
47
|
+
RATE = "rate"
|
48
|
+
UNIQUE = "unique"
|
49
|
+
TOP_VALUES = "top_values"
|
50
|
+
TIMELINE = "timeline"
|
51
|
+
|
52
|
+
|
53
|
+
@register_node()
|
54
|
+
class LogProcessorNode(AsyncNode):
|
55
|
+
"""Node for processing, filtering, and analyzing logs.
|
56
|
+
|
57
|
+
This node provides comprehensive log processing capabilities including:
|
58
|
+
- Multi-format log parsing (JSON, structured text, regex patterns)
|
59
|
+
- Advanced filtering by level, timestamp, content, and custom rules
|
60
|
+
- Pattern extraction and field parsing
|
61
|
+
- Log aggregation and statistics
|
62
|
+
- Real-time alerting on log patterns
|
63
|
+
- Output formatting for various backends
|
64
|
+
- Log forwarding and streaming
|
65
|
+
|
66
|
+
Design Purpose:
|
67
|
+
- Centralized log processing for monitoring and observability
|
68
|
+
- Real-time log analysis and alerting
|
69
|
+
- Log data enrichment and transformation
|
70
|
+
- Support for various log backends and formats
|
71
|
+
|
72
|
+
Examples:
|
73
|
+
>>> # Basic log filtering and parsing
|
74
|
+
>>> processor = LogProcessorNode()
|
75
|
+
>>> result = await processor.execute(
|
76
|
+
... logs=[
|
77
|
+
... "2024-01-01 10:00:00 ERROR Failed to connect to database",
|
78
|
+
... "2024-01-01 10:00:01 INFO User logged in successfully",
|
79
|
+
... "2024-01-01 10:00:02 WARNING High memory usage detected"
|
80
|
+
... ],
|
81
|
+
... filters={"min_level": "WARNING"},
|
82
|
+
... output_format="json"
|
83
|
+
... )
|
84
|
+
|
85
|
+
>>> # Advanced pattern matching and alerting
|
86
|
+
>>> result = await processor.execute(
|
87
|
+
... logs=log_stream,
|
88
|
+
... patterns=[
|
89
|
+
... {"name": "error_spike", "regex": r"ERROR.*database", "threshold": 5},
|
90
|
+
... {"name": "auth_failure", "regex": r"authentication.*failed", "threshold": 3}
|
91
|
+
... ],
|
92
|
+
... aggregation={"type": "timeline", "interval": 60}
|
93
|
+
... )
|
94
|
+
"""
|
95
|
+
|
96
|
+
def __init__(self, **kwargs):
|
97
|
+
"""Initialize the log processor node."""
|
98
|
+
super().__init__(**kwargs)
|
99
|
+
self.compiled_patterns: Dict[str, Pattern] = {}
|
100
|
+
self.aggregation_buffer: List[Dict[str, Any]] = []
|
101
|
+
self.last_aggregation_time = time.time()
|
102
|
+
self.logger.info(f"Initialized LogProcessorNode: {self.id}")
|
103
|
+
|
104
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
105
|
+
"""Define the parameters this node accepts."""
|
106
|
+
return {
|
107
|
+
"logs": NodeParameter(
|
108
|
+
name="logs",
|
109
|
+
type=Any,
|
110
|
+
required=True,
|
111
|
+
description="Log entries to process (string or list of strings)",
|
112
|
+
),
|
113
|
+
"log_format": NodeParameter(
|
114
|
+
name="log_format",
|
115
|
+
type=str,
|
116
|
+
required=False,
|
117
|
+
default="auto",
|
118
|
+
description="Input log format (auto, json, structured, raw)",
|
119
|
+
),
|
120
|
+
"filters": NodeParameter(
|
121
|
+
name="filters",
|
122
|
+
type=dict,
|
123
|
+
required=False,
|
124
|
+
default={},
|
125
|
+
description="Filtering criteria for logs",
|
126
|
+
),
|
127
|
+
"patterns": NodeParameter(
|
128
|
+
name="patterns",
|
129
|
+
type=list,
|
130
|
+
required=False,
|
131
|
+
default=[],
|
132
|
+
description="Pattern extraction and matching rules",
|
133
|
+
),
|
134
|
+
"aggregation": NodeParameter(
|
135
|
+
name="aggregation",
|
136
|
+
type=dict,
|
137
|
+
required=False,
|
138
|
+
default={},
|
139
|
+
description="Aggregation configuration",
|
140
|
+
),
|
141
|
+
"output_format": NodeParameter(
|
142
|
+
name="output_format",
|
143
|
+
type=str,
|
144
|
+
required=False,
|
145
|
+
default="json",
|
146
|
+
description="Output format (json, structured, raw, syslog, elk)",
|
147
|
+
),
|
148
|
+
"enrichment": NodeParameter(
|
149
|
+
name="enrichment",
|
150
|
+
type=dict,
|
151
|
+
required=False,
|
152
|
+
default={},
|
153
|
+
description="Log enrichment configuration",
|
154
|
+
),
|
155
|
+
"alerts": NodeParameter(
|
156
|
+
name="alerts",
|
157
|
+
type=list,
|
158
|
+
required=False,
|
159
|
+
default=[],
|
160
|
+
description="Alert rules for pattern matching",
|
161
|
+
),
|
162
|
+
"max_buffer_size": NodeParameter(
|
163
|
+
name="max_buffer_size",
|
164
|
+
type=int,
|
165
|
+
required=False,
|
166
|
+
default=10000,
|
167
|
+
description="Maximum number of logs to buffer",
|
168
|
+
),
|
169
|
+
}
|
170
|
+
|
171
|
+
def get_output_schema(self) -> Dict[str, NodeParameter]:
|
172
|
+
"""Define the output schema for this node."""
|
173
|
+
return {
|
174
|
+
"processed_logs": NodeParameter(
|
175
|
+
name="processed_logs",
|
176
|
+
type=list,
|
177
|
+
description="Processed and filtered log entries",
|
178
|
+
),
|
179
|
+
"filtered_count": NodeParameter(
|
180
|
+
name="filtered_count",
|
181
|
+
type=int,
|
182
|
+
description="Number of logs that passed filters",
|
183
|
+
),
|
184
|
+
"total_count": NodeParameter(
|
185
|
+
name="total_count",
|
186
|
+
type=int,
|
187
|
+
description="Total number of input logs",
|
188
|
+
),
|
189
|
+
"patterns_matched": NodeParameter(
|
190
|
+
name="patterns_matched",
|
191
|
+
type=dict,
|
192
|
+
description="Pattern matching results and counts",
|
193
|
+
),
|
194
|
+
"aggregations": NodeParameter(
|
195
|
+
name="aggregations",
|
196
|
+
type=dict,
|
197
|
+
description="Log aggregation results",
|
198
|
+
),
|
199
|
+
"alerts_triggered": NodeParameter(
|
200
|
+
name="alerts_triggered",
|
201
|
+
type=list,
|
202
|
+
description="Alerts triggered during processing",
|
203
|
+
),
|
204
|
+
"processing_time": NodeParameter(
|
205
|
+
name="processing_time",
|
206
|
+
type=float,
|
207
|
+
description="Time taken to process logs",
|
208
|
+
),
|
209
|
+
"timestamp": NodeParameter(
|
210
|
+
name="timestamp",
|
211
|
+
type=str,
|
212
|
+
description="ISO timestamp of processing",
|
213
|
+
),
|
214
|
+
}
|
215
|
+
|
216
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
217
|
+
"""Process logs based on configuration."""
|
218
|
+
logs = kwargs["logs"]
|
219
|
+
log_format = kwargs.get("log_format", "auto")
|
220
|
+
filters = kwargs.get("filters", {})
|
221
|
+
patterns = kwargs.get("patterns", [])
|
222
|
+
aggregation = kwargs.get("aggregation", {})
|
223
|
+
output_format = LogFormat(kwargs.get("output_format", "json"))
|
224
|
+
enrichment = kwargs.get("enrichment", {})
|
225
|
+
alerts = kwargs.get("alerts", [])
|
226
|
+
max_buffer_size = kwargs.get("max_buffer_size", 10000)
|
227
|
+
|
228
|
+
start_time = time.time()
|
229
|
+
|
230
|
+
try:
|
231
|
+
# Validate input
|
232
|
+
if logs is None:
|
233
|
+
raise ValueError("Logs parameter cannot be None")
|
234
|
+
|
235
|
+
# Normalize input logs to list
|
236
|
+
if isinstance(logs, str):
|
237
|
+
logs = [logs]
|
238
|
+
|
239
|
+
# Validate buffer size
|
240
|
+
if len(logs) > max_buffer_size:
|
241
|
+
self.logger.warning(
|
242
|
+
f"Input logs ({len(logs)}) exceed buffer size ({max_buffer_size}), truncating"
|
243
|
+
)
|
244
|
+
logs = logs[:max_buffer_size]
|
245
|
+
|
246
|
+
# Parse logs
|
247
|
+
parsed_logs = await self._parse_logs(logs, log_format)
|
248
|
+
|
249
|
+
# Apply filters
|
250
|
+
filtered_logs = await self._filter_logs(parsed_logs, filters)
|
251
|
+
|
252
|
+
# Process patterns
|
253
|
+
pattern_results = await self._process_patterns(filtered_logs, patterns)
|
254
|
+
|
255
|
+
# Enrich logs if configured
|
256
|
+
if enrichment:
|
257
|
+
filtered_logs = await self._enrich_logs(filtered_logs, enrichment)
|
258
|
+
|
259
|
+
# Process aggregations
|
260
|
+
aggregation_results = await self._process_aggregations(
|
261
|
+
filtered_logs, aggregation
|
262
|
+
)
|
263
|
+
|
264
|
+
# Check alert rules
|
265
|
+
alerts_triggered = await self._check_alerts(
|
266
|
+
filtered_logs, alerts, pattern_results
|
267
|
+
)
|
268
|
+
|
269
|
+
# Format output
|
270
|
+
formatted_logs = await self._format_output(filtered_logs, output_format)
|
271
|
+
|
272
|
+
processing_time = time.time() - start_time
|
273
|
+
|
274
|
+
return {
|
275
|
+
"success": True,
|
276
|
+
"processed_logs": formatted_logs,
|
277
|
+
"filtered_count": len(filtered_logs),
|
278
|
+
"total_count": len(logs),
|
279
|
+
"patterns_matched": pattern_results,
|
280
|
+
"aggregations": aggregation_results,
|
281
|
+
"alerts_triggered": alerts_triggered,
|
282
|
+
"processing_time": processing_time,
|
283
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
284
|
+
}
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
self.logger.error(f"Log processing failed: {str(e)}")
|
288
|
+
raise NodeExecutionError(f"Failed to process logs: {str(e)}")
|
289
|
+
|
290
|
+
async def _parse_logs(
|
291
|
+
self, logs: List[str], log_format: str
|
292
|
+
) -> List[Dict[str, Any]]:
|
293
|
+
"""Parse raw log entries into structured format."""
|
294
|
+
parsed_logs = []
|
295
|
+
|
296
|
+
for log_entry in logs:
|
297
|
+
try:
|
298
|
+
if log_format == "json":
|
299
|
+
parsed_log = json.loads(log_entry)
|
300
|
+
elif log_format == "auto":
|
301
|
+
# Try JSON first, then fall back to structured parsing
|
302
|
+
try:
|
303
|
+
parsed_log = json.loads(log_entry)
|
304
|
+
except json.JSONDecodeError:
|
305
|
+
parsed_log = await self._parse_structured_log(log_entry)
|
306
|
+
else:
|
307
|
+
parsed_log = await self._parse_structured_log(log_entry)
|
308
|
+
|
309
|
+
# Ensure required fields
|
310
|
+
if "timestamp" not in parsed_log:
|
311
|
+
parsed_log["timestamp"] = datetime.now(UTC).isoformat()
|
312
|
+
if "level" not in parsed_log:
|
313
|
+
parsed_log["level"] = await self._extract_log_level(log_entry)
|
314
|
+
if "message" not in parsed_log:
|
315
|
+
parsed_log["message"] = log_entry
|
316
|
+
|
317
|
+
parsed_logs.append(parsed_log)
|
318
|
+
|
319
|
+
except Exception as e:
|
320
|
+
# If parsing fails, create a minimal log entry
|
321
|
+
self.logger.debug(f"Failed to parse log entry, using raw: {str(e)}")
|
322
|
+
parsed_logs.append(
|
323
|
+
{
|
324
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
325
|
+
"level": "INFO",
|
326
|
+
"message": log_entry,
|
327
|
+
"raw": True,
|
328
|
+
"parse_error": str(e),
|
329
|
+
}
|
330
|
+
)
|
331
|
+
|
332
|
+
return parsed_logs
|
333
|
+
|
334
|
+
async def _parse_structured_log(self, log_entry: str) -> Dict[str, Any]:
|
335
|
+
"""Parse structured log entries using common patterns."""
|
336
|
+
# Common log patterns
|
337
|
+
patterns = [
|
338
|
+
# ISO timestamp + level + message
|
339
|
+
r"(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)\s+(?P<level>\w+)\s+(?P<message>.*)",
|
340
|
+
# Date time + level + message
|
341
|
+
r"(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(?:\.\d+)?)\s+(?P<level>\w+)\s+(?P<message>.*)",
|
342
|
+
# Syslog format
|
343
|
+
r"(?P<timestamp>\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+(?P<hostname>\S+)\s+(?P<program>\S+):\s+(?P<message>.*)",
|
344
|
+
# Simple level + message
|
345
|
+
r"(?P<level>\w+):\s+(?P<message>.*)",
|
346
|
+
]
|
347
|
+
|
348
|
+
for pattern in patterns:
|
349
|
+
match = re.match(pattern, log_entry.strip())
|
350
|
+
if match:
|
351
|
+
return match.groupdict()
|
352
|
+
|
353
|
+
# If no pattern matches, return as raw message
|
354
|
+
return {"message": log_entry}
|
355
|
+
|
356
|
+
async def _extract_log_level(self, log_entry: str) -> str:
|
357
|
+
"""Extract log level from raw log entry."""
|
358
|
+
level_patterns = {
|
359
|
+
"CRITICAL": ["critical", "fatal", "crit"],
|
360
|
+
"ERROR": ["error", "err"],
|
361
|
+
"WARNING": ["warning", "warn"],
|
362
|
+
"INFO": ["info", "information"],
|
363
|
+
"DEBUG": ["debug", "trace"],
|
364
|
+
}
|
365
|
+
|
366
|
+
log_lower = log_entry.lower()
|
367
|
+
for level, keywords in level_patterns.items():
|
368
|
+
for keyword in keywords:
|
369
|
+
if keyword in log_lower:
|
370
|
+
return level
|
371
|
+
|
372
|
+
return "INFO" # Default level
|
373
|
+
|
374
|
+
async def _filter_logs(
|
375
|
+
self, logs: List[Dict[str, Any]], filters: Dict[str, Any]
|
376
|
+
) -> List[Dict[str, Any]]:
|
377
|
+
"""Apply filtering criteria to logs."""
|
378
|
+
if not filters:
|
379
|
+
return logs
|
380
|
+
|
381
|
+
filtered_logs = []
|
382
|
+
|
383
|
+
for log_entry in logs:
|
384
|
+
# Level filtering
|
385
|
+
if "min_level" in filters:
|
386
|
+
min_level = LogLevel[filters["min_level"].upper()]
|
387
|
+
log_level = LogLevel[log_entry.get("level", "INFO").upper()]
|
388
|
+
if log_level.value < min_level.value:
|
389
|
+
continue
|
390
|
+
|
391
|
+
# Time range filtering
|
392
|
+
if "start_time" in filters or "end_time" in filters:
|
393
|
+
log_time = datetime.fromisoformat(
|
394
|
+
log_entry.get("timestamp", datetime.now(UTC).isoformat())
|
395
|
+
)
|
396
|
+
|
397
|
+
if "start_time" in filters:
|
398
|
+
start_time = datetime.fromisoformat(filters["start_time"])
|
399
|
+
if log_time < start_time:
|
400
|
+
continue
|
401
|
+
|
402
|
+
if "end_time" in filters:
|
403
|
+
end_time = datetime.fromisoformat(filters["end_time"])
|
404
|
+
if log_time > end_time:
|
405
|
+
continue
|
406
|
+
|
407
|
+
# Content filtering
|
408
|
+
if "contains" in filters:
|
409
|
+
if filters["contains"] not in log_entry.get("message", ""):
|
410
|
+
continue
|
411
|
+
|
412
|
+
if "excludes" in filters:
|
413
|
+
exclude_text = filters["excludes"]
|
414
|
+
# Check in message, level, or raw fields
|
415
|
+
if (
|
416
|
+
exclude_text in log_entry.get("message", "")
|
417
|
+
or exclude_text in log_entry.get("level", "")
|
418
|
+
or exclude_text in str(log_entry.get("raw", ""))
|
419
|
+
):
|
420
|
+
continue
|
421
|
+
|
422
|
+
# Regex filtering
|
423
|
+
if "regex" in filters:
|
424
|
+
if not re.search(filters["regex"], log_entry.get("message", "")):
|
425
|
+
continue
|
426
|
+
|
427
|
+
# Field-based filtering
|
428
|
+
if "fields" in filters:
|
429
|
+
field_match = True
|
430
|
+
for field, value in filters["fields"].items():
|
431
|
+
if log_entry.get(field) != value:
|
432
|
+
field_match = False
|
433
|
+
break
|
434
|
+
if not field_match:
|
435
|
+
continue
|
436
|
+
|
437
|
+
filtered_logs.append(log_entry)
|
438
|
+
|
439
|
+
return filtered_logs
|
440
|
+
|
441
|
+
async def _process_patterns(
|
442
|
+
self, logs: List[Dict[str, Any]], patterns: List[Dict[str, Any]]
|
443
|
+
) -> Dict[str, Any]:
|
444
|
+
"""Process pattern matching and extraction rules."""
|
445
|
+
if not patterns:
|
446
|
+
return {}
|
447
|
+
|
448
|
+
pattern_results = {}
|
449
|
+
|
450
|
+
for pattern_config in patterns:
|
451
|
+
pattern_name = pattern_config.get("name", "unnamed")
|
452
|
+
regex_pattern = pattern_config.get("regex")
|
453
|
+
extract_fields = pattern_config.get("extract_fields", [])
|
454
|
+
|
455
|
+
if not regex_pattern:
|
456
|
+
continue
|
457
|
+
|
458
|
+
# Compile pattern if not already compiled
|
459
|
+
if pattern_name not in self.compiled_patterns:
|
460
|
+
try:
|
461
|
+
self.compiled_patterns[pattern_name] = re.compile(regex_pattern)
|
462
|
+
except re.error as e:
|
463
|
+
self.logger.warning(f"Invalid regex pattern '{pattern_name}': {e}")
|
464
|
+
continue
|
465
|
+
|
466
|
+
compiled_pattern = self.compiled_patterns[pattern_name]
|
467
|
+
matches = []
|
468
|
+
match_count = 0
|
469
|
+
|
470
|
+
for log_entry in logs:
|
471
|
+
message = log_entry.get("message", "")
|
472
|
+
level = log_entry.get("level", "")
|
473
|
+
# Search in message first, then in level + message combined
|
474
|
+
match = compiled_pattern.search(message)
|
475
|
+
if not match and level:
|
476
|
+
combined_text = f"{level} {message}"
|
477
|
+
match = compiled_pattern.search(combined_text)
|
478
|
+
|
479
|
+
if match:
|
480
|
+
match_count += 1
|
481
|
+
match_data = {
|
482
|
+
"timestamp": log_entry.get("timestamp"),
|
483
|
+
"full_match": match.group(0),
|
484
|
+
"groups": match.groups(),
|
485
|
+
"log_entry": log_entry,
|
486
|
+
}
|
487
|
+
|
488
|
+
# Extract named groups
|
489
|
+
if match.groupdict():
|
490
|
+
match_data["named_groups"] = match.groupdict()
|
491
|
+
|
492
|
+
# Extract specified fields
|
493
|
+
if extract_fields:
|
494
|
+
extracted = {}
|
495
|
+
for field in extract_fields:
|
496
|
+
if field in log_entry:
|
497
|
+
extracted[field] = log_entry[field]
|
498
|
+
match_data["extracted_fields"] = extracted
|
499
|
+
|
500
|
+
matches.append(match_data)
|
501
|
+
|
502
|
+
pattern_results[pattern_name] = {
|
503
|
+
"match_count": match_count,
|
504
|
+
"matches": matches,
|
505
|
+
"pattern": regex_pattern,
|
506
|
+
}
|
507
|
+
|
508
|
+
return pattern_results
|
509
|
+
|
510
|
+
async def _enrich_logs(
|
511
|
+
self, logs: List[Dict[str, Any]], enrichment: Dict[str, Any]
|
512
|
+
) -> List[Dict[str, Any]]:
|
513
|
+
"""Enrich logs with additional data."""
|
514
|
+
enriched_logs = []
|
515
|
+
|
516
|
+
for log_entry in logs.copy():
|
517
|
+
# Add static fields
|
518
|
+
if "static_fields" in enrichment:
|
519
|
+
log_entry.update(enrichment["static_fields"])
|
520
|
+
|
521
|
+
# Add computed fields
|
522
|
+
if "computed_fields" in enrichment:
|
523
|
+
for field_name, computation in enrichment["computed_fields"].items():
|
524
|
+
if computation["type"] == "timestamp_parse":
|
525
|
+
# Parse timestamp to components
|
526
|
+
try:
|
527
|
+
dt = datetime.fromisoformat(log_entry.get("timestamp", ""))
|
528
|
+
log_entry[field_name] = {
|
529
|
+
"year": dt.year,
|
530
|
+
"month": dt.month,
|
531
|
+
"day": dt.day,
|
532
|
+
"hour": dt.hour,
|
533
|
+
"minute": dt.minute,
|
534
|
+
"weekday": dt.strftime("%A"),
|
535
|
+
}
|
536
|
+
except Exception:
|
537
|
+
log_entry[field_name] = None
|
538
|
+
|
539
|
+
elif computation["type"] == "field_extraction":
|
540
|
+
# Extract field using regex
|
541
|
+
source_field = computation.get("source_field", "message")
|
542
|
+
pattern = computation.get("pattern")
|
543
|
+
if pattern and source_field in log_entry:
|
544
|
+
match = re.search(pattern, str(log_entry[source_field]))
|
545
|
+
if match:
|
546
|
+
log_entry[field_name] = (
|
547
|
+
match.group(1) if match.groups() else match.group(0)
|
548
|
+
)
|
549
|
+
|
550
|
+
# Add processing metadata
|
551
|
+
log_entry["_processed_at"] = datetime.now(UTC).isoformat()
|
552
|
+
log_entry["_processor_id"] = self.id
|
553
|
+
|
554
|
+
enriched_logs.append(log_entry)
|
555
|
+
|
556
|
+
return enriched_logs
|
557
|
+
|
558
|
+
async def _process_aggregations(
|
559
|
+
self, logs: List[Dict[str, Any]], aggregation: Dict[str, Any]
|
560
|
+
) -> Dict[str, Any]:
|
561
|
+
"""Process log aggregations."""
|
562
|
+
if not aggregation:
|
563
|
+
return {}
|
564
|
+
|
565
|
+
agg_type = AggregationType(aggregation.get("type", "count"))
|
566
|
+
field = aggregation.get("field", "level")
|
567
|
+
interval = aggregation.get("interval", 60) # seconds
|
568
|
+
|
569
|
+
results = {}
|
570
|
+
|
571
|
+
if agg_type == AggregationType.COUNT:
|
572
|
+
# Count by field values
|
573
|
+
counts = {}
|
574
|
+
for log_entry in logs:
|
575
|
+
value = log_entry.get(field, "unknown")
|
576
|
+
counts[value] = counts.get(value, 0) + 1
|
577
|
+
results["counts"] = counts
|
578
|
+
|
579
|
+
elif agg_type == AggregationType.RATE:
|
580
|
+
# Calculate rate over time
|
581
|
+
if logs:
|
582
|
+
time_span = (
|
583
|
+
datetime.fromisoformat(logs[-1]["timestamp"])
|
584
|
+
- datetime.fromisoformat(logs[0]["timestamp"])
|
585
|
+
).total_seconds()
|
586
|
+
if time_span > 0:
|
587
|
+
results["rate"] = len(logs) / time_span
|
588
|
+
else:
|
589
|
+
results["rate"] = 0
|
590
|
+
|
591
|
+
elif agg_type == AggregationType.UNIQUE:
|
592
|
+
# Count unique values
|
593
|
+
unique_values = set()
|
594
|
+
for log_entry in logs:
|
595
|
+
value = log_entry.get(field)
|
596
|
+
if value is not None:
|
597
|
+
unique_values.add(str(value))
|
598
|
+
results["unique_count"] = len(unique_values)
|
599
|
+
results["unique_values"] = list(unique_values)
|
600
|
+
|
601
|
+
elif agg_type == AggregationType.TOP_VALUES:
|
602
|
+
# Top N values by count
|
603
|
+
counts = {}
|
604
|
+
for log_entry in logs:
|
605
|
+
value = log_entry.get(field, "unknown")
|
606
|
+
counts[value] = counts.get(value, 0) + 1
|
607
|
+
|
608
|
+
top_n = aggregation.get("top_n", 10)
|
609
|
+
top_values = sorted(counts.items(), key=lambda x: x[1], reverse=True)[
|
610
|
+
:top_n
|
611
|
+
]
|
612
|
+
results["top_values"] = top_values
|
613
|
+
|
614
|
+
elif agg_type == AggregationType.TIMELINE:
|
615
|
+
# Timeline aggregation
|
616
|
+
timeline = {}
|
617
|
+
for log_entry in logs:
|
618
|
+
timestamp = datetime.fromisoformat(log_entry["timestamp"])
|
619
|
+
# Round to interval
|
620
|
+
interval_start = timestamp.replace(second=0, microsecond=0)
|
621
|
+
if interval >= 3600: # Hour intervals
|
622
|
+
interval_start = interval_start.replace(minute=0)
|
623
|
+
|
624
|
+
interval_key = interval_start.isoformat()
|
625
|
+
if interval_key not in timeline:
|
626
|
+
timeline[interval_key] = 0
|
627
|
+
timeline[interval_key] += 1
|
628
|
+
|
629
|
+
results["timeline"] = timeline
|
630
|
+
|
631
|
+
return results
|
632
|
+
|
633
|
+
async def _check_alerts(
|
634
|
+
self,
|
635
|
+
logs: List[Dict[str, Any]],
|
636
|
+
alerts: List[Dict[str, Any]],
|
637
|
+
pattern_results: Dict[str, Any],
|
638
|
+
) -> List[Dict[str, Any]]:
|
639
|
+
"""Check alert rules and trigger alerts."""
|
640
|
+
triggered_alerts = []
|
641
|
+
|
642
|
+
for alert_config in alerts:
|
643
|
+
alert_name = alert_config.get("name", "unnamed")
|
644
|
+
alert_type = alert_config.get("type", "threshold")
|
645
|
+
|
646
|
+
if alert_type == "threshold":
|
647
|
+
# Threshold-based alerts
|
648
|
+
threshold = alert_config.get("threshold", 0)
|
649
|
+
field = alert_config.get("field", "level")
|
650
|
+
condition = alert_config.get("condition", "ERROR")
|
651
|
+
|
652
|
+
count = sum(1 for log in logs if log.get(field) == condition)
|
653
|
+
if count >= threshold:
|
654
|
+
triggered_alerts.append(
|
655
|
+
{
|
656
|
+
"name": alert_name,
|
657
|
+
"type": alert_type,
|
658
|
+
"triggered_at": datetime.now(UTC).isoformat(),
|
659
|
+
"threshold": threshold,
|
660
|
+
"actual_count": count,
|
661
|
+
"condition": condition,
|
662
|
+
"severity": alert_config.get("severity", "medium"),
|
663
|
+
}
|
664
|
+
)
|
665
|
+
|
666
|
+
elif alert_type == "pattern":
|
667
|
+
# Pattern-based alerts
|
668
|
+
pattern_name = alert_config.get("pattern_name")
|
669
|
+
threshold = alert_config.get("threshold", 1)
|
670
|
+
|
671
|
+
if pattern_name in pattern_results:
|
672
|
+
match_count = pattern_results[pattern_name]["match_count"]
|
673
|
+
if match_count >= threshold:
|
674
|
+
triggered_alerts.append(
|
675
|
+
{
|
676
|
+
"name": alert_name,
|
677
|
+
"type": alert_type,
|
678
|
+
"triggered_at": datetime.now(UTC).isoformat(),
|
679
|
+
"pattern_name": pattern_name,
|
680
|
+
"threshold": threshold,
|
681
|
+
"match_count": match_count,
|
682
|
+
"severity": alert_config.get("severity", "medium"),
|
683
|
+
}
|
684
|
+
)
|
685
|
+
|
686
|
+
elif alert_type == "rate":
|
687
|
+
# Rate-based alerts
|
688
|
+
time_window = alert_config.get("time_window", 300) # 5 minutes
|
689
|
+
rate_threshold = alert_config.get(
|
690
|
+
"rate_threshold", 10
|
691
|
+
) # logs per second
|
692
|
+
|
693
|
+
now = datetime.now(UTC)
|
694
|
+
window_start = now - timedelta(seconds=time_window)
|
695
|
+
|
696
|
+
recent_logs = [
|
697
|
+
log
|
698
|
+
for log in logs
|
699
|
+
if datetime.fromisoformat(log["timestamp"]) >= window_start
|
700
|
+
]
|
701
|
+
|
702
|
+
if recent_logs:
|
703
|
+
rate = len(recent_logs) / time_window
|
704
|
+
if rate >= rate_threshold:
|
705
|
+
triggered_alerts.append(
|
706
|
+
{
|
707
|
+
"name": alert_name,
|
708
|
+
"type": alert_type,
|
709
|
+
"triggered_at": datetime.now(UTC).isoformat(),
|
710
|
+
"rate_threshold": rate_threshold,
|
711
|
+
"actual_rate": rate,
|
712
|
+
"time_window": time_window,
|
713
|
+
"log_count": len(recent_logs),
|
714
|
+
"severity": alert_config.get("severity", "medium"),
|
715
|
+
}
|
716
|
+
)
|
717
|
+
|
718
|
+
return triggered_alerts
|
719
|
+
|
720
|
+
async def _format_output(
|
721
|
+
self, logs: List[Dict[str, Any]], output_format: LogFormat
|
722
|
+
) -> Union[List[Dict[str, Any]], List[str], str]:
|
723
|
+
"""Format logs according to specified output format."""
|
724
|
+
if output_format == LogFormat.JSON:
|
725
|
+
return logs
|
726
|
+
|
727
|
+
elif output_format == LogFormat.RAW:
|
728
|
+
return [log.get("message", str(log)) for log in logs]
|
729
|
+
|
730
|
+
elif output_format == LogFormat.STRUCTURED:
|
731
|
+
formatted = []
|
732
|
+
for log in logs:
|
733
|
+
timestamp = log.get("timestamp", "")
|
734
|
+
level = log.get("level", "INFO")
|
735
|
+
message = log.get("message", "")
|
736
|
+
formatted.append(f"{timestamp} {level} {message}")
|
737
|
+
return formatted
|
738
|
+
|
739
|
+
elif output_format == LogFormat.SYSLOG:
|
740
|
+
formatted = []
|
741
|
+
for log in logs:
|
742
|
+
timestamp = log.get("timestamp", "")
|
743
|
+
hostname = log.get("hostname", "localhost")
|
744
|
+
program = log.get("program", "kailash")
|
745
|
+
message = log.get("message", "")
|
746
|
+
formatted.append(f"{timestamp} {hostname} {program}: {message}")
|
747
|
+
return formatted
|
748
|
+
|
749
|
+
elif output_format == LogFormat.ELK:
|
750
|
+
# Elasticsearch/Logstash/Kibana format
|
751
|
+
elk_logs = []
|
752
|
+
for log in logs:
|
753
|
+
elk_log = {
|
754
|
+
"@timestamp": log.get("timestamp"),
|
755
|
+
"@version": "1",
|
756
|
+
"message": log.get("message"),
|
757
|
+
"level": log.get("level"),
|
758
|
+
"logger_name": log.get("logger", "kailash"),
|
759
|
+
"thread_name": log.get("thread", "main"),
|
760
|
+
"fields": {
|
761
|
+
k: v
|
762
|
+
for k, v in log.items()
|
763
|
+
if k not in ["timestamp", "message", "level"]
|
764
|
+
},
|
765
|
+
}
|
766
|
+
elk_logs.append(elk_log)
|
767
|
+
return elk_logs
|
768
|
+
|
769
|
+
return logs
|
770
|
+
|
771
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
772
|
+
"""Synchronous wrapper for compatibility."""
|
773
|
+
import asyncio
|
774
|
+
|
775
|
+
try:
|
776
|
+
# Try to get current event loop
|
777
|
+
loop = asyncio.get_running_loop()
|
778
|
+
except RuntimeError:
|
779
|
+
# No event loop running, safe to use asyncio.run()
|
780
|
+
try:
|
781
|
+
result = asyncio.run(self.async_run(**kwargs))
|
782
|
+
return result
|
783
|
+
except Exception as e:
|
784
|
+
return {
|
785
|
+
"success": False,
|
786
|
+
"error": str(e),
|
787
|
+
"processed_logs": [],
|
788
|
+
"filtered_count": 0,
|
789
|
+
"total_count": 0,
|
790
|
+
"patterns_matched": {},
|
791
|
+
"aggregations": {},
|
792
|
+
"alerts_triggered": [],
|
793
|
+
"processing_time": 0.0,
|
794
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
795
|
+
}
|
796
|
+
else:
|
797
|
+
# Event loop is running, create a task
|
798
|
+
import concurrent.futures
|
799
|
+
|
800
|
+
try:
|
801
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
802
|
+
future = executor.submit(asyncio.run, self.async_run(**kwargs))
|
803
|
+
result = future.result()
|
804
|
+
return result
|
805
|
+
except Exception as e:
|
806
|
+
return {
|
807
|
+
"success": False,
|
808
|
+
"error": str(e),
|
809
|
+
"processed_logs": [],
|
810
|
+
"filtered_count": 0,
|
811
|
+
"total_count": 0,
|
812
|
+
"patterns_matched": {},
|
813
|
+
"aggregations": {},
|
814
|
+
"alerts_triggered": [],
|
815
|
+
"processing_time": 0.0,
|
816
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
817
|
+
}
|