kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1471 @@
|
|
1
|
+
"""Workflow performance report generation.
|
2
|
+
|
3
|
+
This module provides comprehensive reporting capabilities for workflow performance
|
4
|
+
analysis, including detailed metrics, visualizations, and actionable insights.
|
5
|
+
|
6
|
+
Design Purpose:
|
7
|
+
- Generate comprehensive performance reports for workflow executions
|
8
|
+
- Provide detailed analysis with actionable insights and recommendations
|
9
|
+
- Support multiple output formats (HTML, PDF, JSON, Markdown)
|
10
|
+
- Enable automated report generation and scheduling
|
11
|
+
|
12
|
+
Upstream Dependencies:
|
13
|
+
- TaskManager provides execution data and metrics
|
14
|
+
- PerformanceVisualizer provides chart generation
|
15
|
+
- MetricsCollector provides detailed performance data
|
16
|
+
- RealTimeDashboard provides live monitoring capabilities
|
17
|
+
|
18
|
+
Downstream Consumers:
|
19
|
+
- CLI tools use this for generating analysis reports
|
20
|
+
- Web interfaces display generated reports
|
21
|
+
- Automated systems schedule and distribute reports
|
22
|
+
"""
|
23
|
+
|
24
|
+
import json
|
25
|
+
import logging
|
26
|
+
from dataclasses import dataclass, field
|
27
|
+
from datetime import datetime
|
28
|
+
from enum import Enum
|
29
|
+
from pathlib import Path
|
30
|
+
from typing import Any, Dict, List, Optional, Union
|
31
|
+
|
32
|
+
import numpy as np
|
33
|
+
|
34
|
+
from kailash.tracking.manager import TaskManager
|
35
|
+
from kailash.tracking.models import TaskRun, TaskStatus
|
36
|
+
from kailash.visualization.performance import PerformanceVisualizer
|
37
|
+
|
38
|
+
logger = logging.getLogger(__name__)
|
39
|
+
|
40
|
+
|
41
|
+
class ReportFormat(Enum):
|
42
|
+
"""Supported report output formats."""
|
43
|
+
|
44
|
+
HTML = "html"
|
45
|
+
MARKDOWN = "markdown"
|
46
|
+
JSON = "json"
|
47
|
+
PDF = "pdf" # Future enhancement
|
48
|
+
|
49
|
+
|
50
|
+
@dataclass
|
51
|
+
class ReportConfig:
|
52
|
+
"""Configuration for report generation.
|
53
|
+
|
54
|
+
Attributes:
|
55
|
+
include_charts: Whether to include performance charts
|
56
|
+
include_recommendations: Whether to include optimization recommendations
|
57
|
+
chart_format: Format for embedded charts ('png', 'svg')
|
58
|
+
detail_level: Level of detail ('summary', 'detailed', 'comprehensive')
|
59
|
+
compare_historical: Whether to compare with historical runs
|
60
|
+
theme: Report theme ('light', 'dark', 'corporate')
|
61
|
+
"""
|
62
|
+
|
63
|
+
include_charts: bool = True
|
64
|
+
include_recommendations: bool = True
|
65
|
+
chart_format: str = "png"
|
66
|
+
detail_level: str = "detailed"
|
67
|
+
compare_historical: bool = True
|
68
|
+
theme: str = "corporate"
|
69
|
+
|
70
|
+
|
71
|
+
@dataclass
|
72
|
+
class PerformanceInsight:
|
73
|
+
"""Container for performance insights and recommendations.
|
74
|
+
|
75
|
+
Attributes:
|
76
|
+
category: Type of insight ('bottleneck', 'optimization', 'warning')
|
77
|
+
severity: Severity level ('low', 'medium', 'high', 'critical')
|
78
|
+
title: Brief insight title
|
79
|
+
description: Detailed description
|
80
|
+
recommendation: Actionable recommendation
|
81
|
+
metrics: Supporting metrics data
|
82
|
+
"""
|
83
|
+
|
84
|
+
category: str
|
85
|
+
severity: str
|
86
|
+
title: str
|
87
|
+
description: str
|
88
|
+
recommendation: str
|
89
|
+
metrics: Dict[str, Any] = field(default_factory=dict)
|
90
|
+
|
91
|
+
|
92
|
+
@dataclass
|
93
|
+
class WorkflowSummary:
|
94
|
+
"""Summary statistics for a workflow run.
|
95
|
+
|
96
|
+
Attributes:
|
97
|
+
run_id: Workflow run identifier
|
98
|
+
workflow_name: Name of the workflow
|
99
|
+
total_tasks: Total number of tasks
|
100
|
+
completed_tasks: Number of completed tasks
|
101
|
+
failed_tasks: Number of failed tasks
|
102
|
+
total_duration: Total execution time
|
103
|
+
avg_cpu_usage: Average CPU usage across tasks
|
104
|
+
peak_memory_usage: Peak memory usage
|
105
|
+
total_io_read: Total I/O read in bytes
|
106
|
+
total_io_write: Total I/O write in bytes
|
107
|
+
throughput: Tasks completed per minute
|
108
|
+
efficiency_score: Overall efficiency score (0-100)
|
109
|
+
"""
|
110
|
+
|
111
|
+
run_id: str
|
112
|
+
workflow_name: str
|
113
|
+
total_tasks: int = 0
|
114
|
+
completed_tasks: int = 0
|
115
|
+
failed_tasks: int = 0
|
116
|
+
total_duration: float = 0.0
|
117
|
+
avg_cpu_usage: float = 0.0
|
118
|
+
peak_memory_usage: float = 0.0
|
119
|
+
total_io_read: int = 0
|
120
|
+
total_io_write: int = 0
|
121
|
+
throughput: float = 0.0
|
122
|
+
efficiency_score: float = 0.0
|
123
|
+
|
124
|
+
|
125
|
+
class WorkflowPerformanceReporter:
|
126
|
+
"""Comprehensive workflow performance report generator.
|
127
|
+
|
128
|
+
This class provides detailed performance analysis and reporting capabilities
|
129
|
+
for workflow executions, including insights, recommendations, and comparative
|
130
|
+
analysis across multiple runs.
|
131
|
+
|
132
|
+
Usage:
|
133
|
+
reporter = WorkflowPerformanceReporter(task_manager)
|
134
|
+
report = reporter.generate_report(run_id, output_path="report.html")
|
135
|
+
"""
|
136
|
+
|
137
|
+
def __init__(
|
138
|
+
self, task_manager: TaskManager, config: Optional[ReportConfig] = None
|
139
|
+
):
|
140
|
+
"""Initialize performance reporter.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
task_manager: TaskManager instance for data access
|
144
|
+
config: Report configuration options
|
145
|
+
"""
|
146
|
+
self.task_manager = task_manager
|
147
|
+
self.config = config or ReportConfig()
|
148
|
+
self.performance_viz = PerformanceVisualizer(task_manager)
|
149
|
+
self.logger = logger
|
150
|
+
|
151
|
+
def generate_report(
|
152
|
+
self,
|
153
|
+
run_id: str,
|
154
|
+
output_path: Optional[Union[str, Path]] = None,
|
155
|
+
format: ReportFormat = ReportFormat.HTML,
|
156
|
+
compare_runs: Optional[List[str]] = None,
|
157
|
+
) -> Path:
|
158
|
+
"""Generate comprehensive performance report.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
run_id: Workflow run to analyze
|
162
|
+
output_path: Path to save report file
|
163
|
+
format: Output format for the report
|
164
|
+
compare_runs: List of run IDs to compare against
|
165
|
+
|
166
|
+
Returns:
|
167
|
+
Path to generated report file
|
168
|
+
"""
|
169
|
+
if output_path is None:
|
170
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
171
|
+
output_path = (
|
172
|
+
Path.cwd()
|
173
|
+
/ "outputs"
|
174
|
+
/ f"workflow_report_{run_id[:8]}_{timestamp}.{format.value}"
|
175
|
+
)
|
176
|
+
|
177
|
+
output_path = Path(output_path)
|
178
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
179
|
+
|
180
|
+
# Analyze workflow run
|
181
|
+
analysis = self._analyze_workflow_run(run_id)
|
182
|
+
|
183
|
+
# Generate insights and recommendations
|
184
|
+
insights = self._generate_insights(analysis)
|
185
|
+
|
186
|
+
# Compare with other runs if requested
|
187
|
+
comparison_data = None
|
188
|
+
if compare_runs:
|
189
|
+
comparison_data = self._compare_runs([run_id] + compare_runs)
|
190
|
+
|
191
|
+
# Generate report content based on format
|
192
|
+
if format == ReportFormat.HTML:
|
193
|
+
content = self._generate_html_report(analysis, insights, comparison_data)
|
194
|
+
elif format == ReportFormat.MARKDOWN:
|
195
|
+
content = self._generate_markdown_report(
|
196
|
+
analysis, insights, comparison_data
|
197
|
+
)
|
198
|
+
elif format == ReportFormat.JSON:
|
199
|
+
content = self._generate_json_report(analysis, insights, comparison_data)
|
200
|
+
else:
|
201
|
+
raise ValueError(f"Unsupported report format: {format}")
|
202
|
+
|
203
|
+
# Write report file
|
204
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
205
|
+
f.write(content)
|
206
|
+
|
207
|
+
self.logger.info(f"Generated {format.value.upper()} report: {output_path}")
|
208
|
+
return output_path
|
209
|
+
|
210
|
+
def _analyze_workflow_run(self, run_id: str) -> Dict[str, Any]:
|
211
|
+
"""Perform detailed analysis of a workflow run.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
run_id: Run ID to analyze
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
Dictionary containing analysis results
|
218
|
+
"""
|
219
|
+
# Get run and task data
|
220
|
+
run = self.task_manager.get_run(run_id)
|
221
|
+
if not run:
|
222
|
+
raise ValueError(f"Run {run_id} not found")
|
223
|
+
|
224
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
225
|
+
|
226
|
+
# Calculate workflow summary
|
227
|
+
summary = self._calculate_workflow_summary(run, tasks)
|
228
|
+
|
229
|
+
# Analyze task performance patterns
|
230
|
+
task_analysis = self._analyze_task_performance(tasks)
|
231
|
+
|
232
|
+
# Identify bottlenecks
|
233
|
+
bottlenecks = self._identify_bottlenecks(tasks)
|
234
|
+
|
235
|
+
# Resource utilization analysis
|
236
|
+
resource_analysis = self._analyze_resource_utilization(tasks)
|
237
|
+
|
238
|
+
# Error analysis
|
239
|
+
error_analysis = self._analyze_errors(tasks)
|
240
|
+
|
241
|
+
return {
|
242
|
+
"run_info": {
|
243
|
+
"run_id": run_id,
|
244
|
+
"workflow_name": run.workflow_name,
|
245
|
+
"started_at": run.started_at,
|
246
|
+
"ended_at": run.ended_at,
|
247
|
+
"status": run.status,
|
248
|
+
"total_tasks": len(tasks),
|
249
|
+
},
|
250
|
+
"summary": summary,
|
251
|
+
"task_analysis": task_analysis,
|
252
|
+
"bottlenecks": bottlenecks,
|
253
|
+
"resource_analysis": resource_analysis,
|
254
|
+
"error_analysis": error_analysis,
|
255
|
+
"charts": (
|
256
|
+
self._generate_analysis_charts(run_id, tasks)
|
257
|
+
if self.config.include_charts
|
258
|
+
else {}
|
259
|
+
),
|
260
|
+
}
|
261
|
+
|
262
|
+
def _calculate_workflow_summary(
|
263
|
+
self, run: Any, tasks: List[TaskRun]
|
264
|
+
) -> WorkflowSummary:
|
265
|
+
"""Calculate summary statistics for the workflow run."""
|
266
|
+
summary = WorkflowSummary(
|
267
|
+
run_id=run.run_id, workflow_name=run.workflow_name, total_tasks=len(tasks)
|
268
|
+
)
|
269
|
+
|
270
|
+
# Count task statuses
|
271
|
+
summary.completed_tasks = sum(
|
272
|
+
1 for t in tasks if t.status == TaskStatus.COMPLETED
|
273
|
+
)
|
274
|
+
summary.failed_tasks = sum(1 for t in tasks if t.status == TaskStatus.FAILED)
|
275
|
+
|
276
|
+
# Calculate performance metrics for completed tasks
|
277
|
+
completed_with_metrics = [
|
278
|
+
t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
279
|
+
]
|
280
|
+
|
281
|
+
if completed_with_metrics:
|
282
|
+
# Duration metrics
|
283
|
+
durations = [
|
284
|
+
t.metrics.duration for t in completed_with_metrics if t.metrics.duration
|
285
|
+
]
|
286
|
+
if durations:
|
287
|
+
summary.total_duration = sum(durations)
|
288
|
+
|
289
|
+
# CPU metrics
|
290
|
+
cpu_values = [
|
291
|
+
t.metrics.cpu_usage
|
292
|
+
for t in completed_with_metrics
|
293
|
+
if t.metrics.cpu_usage
|
294
|
+
]
|
295
|
+
if cpu_values:
|
296
|
+
summary.avg_cpu_usage = np.mean(cpu_values)
|
297
|
+
|
298
|
+
# Memory metrics
|
299
|
+
memory_values = [
|
300
|
+
t.metrics.memory_usage_mb
|
301
|
+
for t in completed_with_metrics
|
302
|
+
if t.metrics.memory_usage_mb
|
303
|
+
]
|
304
|
+
if memory_values:
|
305
|
+
summary.peak_memory_usage = max(memory_values)
|
306
|
+
|
307
|
+
# I/O metrics
|
308
|
+
for task in completed_with_metrics:
|
309
|
+
if task.metrics.custom_metrics:
|
310
|
+
custom = task.metrics.custom_metrics
|
311
|
+
summary.total_io_read += custom.get("io_read_bytes", 0)
|
312
|
+
summary.total_io_write += custom.get("io_write_bytes", 0)
|
313
|
+
|
314
|
+
# Calculate throughput (tasks/minute)
|
315
|
+
if summary.total_duration > 0:
|
316
|
+
summary.throughput = (
|
317
|
+
summary.completed_tasks / summary.total_duration
|
318
|
+
) * 60
|
319
|
+
|
320
|
+
# Calculate efficiency score (0-100)
|
321
|
+
success_rate = (
|
322
|
+
summary.completed_tasks / summary.total_tasks
|
323
|
+
if summary.total_tasks > 0
|
324
|
+
else 0
|
325
|
+
)
|
326
|
+
avg_efficiency = min(
|
327
|
+
100, max(0, 100 - summary.avg_cpu_usage)
|
328
|
+
) # Lower CPU = higher efficiency
|
329
|
+
memory_efficiency = min(
|
330
|
+
100, max(0, 100 - (summary.peak_memory_usage / 1000))
|
331
|
+
) # Normalize memory
|
332
|
+
|
333
|
+
summary.efficiency_score = (
|
334
|
+
(success_rate * 50) + (avg_efficiency * 0.3) + (memory_efficiency * 0.2)
|
335
|
+
)
|
336
|
+
|
337
|
+
return summary
|
338
|
+
|
339
|
+
def _analyze_task_performance(self, tasks: List[TaskRun]) -> Dict[str, Any]:
|
340
|
+
"""Analyze performance patterns across tasks."""
|
341
|
+
analysis = {
|
342
|
+
"by_node_type": {},
|
343
|
+
"duration_distribution": {},
|
344
|
+
"resource_patterns": {},
|
345
|
+
"execution_order": [],
|
346
|
+
}
|
347
|
+
|
348
|
+
# Group tasks by node type
|
349
|
+
by_type = {}
|
350
|
+
for task in tasks:
|
351
|
+
if task.node_type not in by_type:
|
352
|
+
by_type[task.node_type] = []
|
353
|
+
by_type[task.node_type].append(task)
|
354
|
+
|
355
|
+
# Analyze each node type
|
356
|
+
for node_type, type_tasks in by_type.items():
|
357
|
+
completed = [
|
358
|
+
t for t in type_tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
359
|
+
]
|
360
|
+
|
361
|
+
if completed:
|
362
|
+
durations = [
|
363
|
+
t.metrics.duration for t in completed if t.metrics.duration
|
364
|
+
]
|
365
|
+
cpu_values = [
|
366
|
+
t.metrics.cpu_usage for t in completed if t.metrics.cpu_usage
|
367
|
+
]
|
368
|
+
memory_values = [
|
369
|
+
t.metrics.memory_usage_mb
|
370
|
+
for t in completed
|
371
|
+
if t.metrics.memory_usage_mb
|
372
|
+
]
|
373
|
+
|
374
|
+
analysis["by_node_type"][node_type] = {
|
375
|
+
"count": len(type_tasks),
|
376
|
+
"completed": len(completed),
|
377
|
+
"avg_duration": np.mean(durations) if durations else 0,
|
378
|
+
"max_duration": max(durations) if durations else 0,
|
379
|
+
"avg_cpu": np.mean(cpu_values) if cpu_values else 0,
|
380
|
+
"avg_memory": np.mean(memory_values) if memory_values else 0,
|
381
|
+
"success_rate": len(completed) / len(type_tasks) * 100,
|
382
|
+
}
|
383
|
+
|
384
|
+
# Execution order analysis
|
385
|
+
ordered_tasks = sorted(
|
386
|
+
[t for t in tasks if t.started_at], key=lambda t: t.started_at
|
387
|
+
)
|
388
|
+
|
389
|
+
analysis["execution_order"] = [
|
390
|
+
{
|
391
|
+
"node_id": t.node_id,
|
392
|
+
"node_type": t.node_type,
|
393
|
+
"started_at": t.started_at.isoformat() if t.started_at else None,
|
394
|
+
"duration": t.metrics.duration if t.metrics else None,
|
395
|
+
"status": t.status,
|
396
|
+
}
|
397
|
+
for t in ordered_tasks[:20] # Limit to first 20 for readability
|
398
|
+
]
|
399
|
+
|
400
|
+
return analysis
|
401
|
+
|
402
|
+
def _identify_bottlenecks(self, tasks: List[TaskRun]) -> List[Dict[str, Any]]:
|
403
|
+
"""Identify performance bottlenecks in the workflow."""
|
404
|
+
bottlenecks = []
|
405
|
+
|
406
|
+
completed_tasks = [
|
407
|
+
t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
408
|
+
]
|
409
|
+
|
410
|
+
if len(completed_tasks) < 2:
|
411
|
+
return bottlenecks
|
412
|
+
|
413
|
+
# Find duration outliers
|
414
|
+
durations = [t.metrics.duration for t in completed_tasks if t.metrics.duration]
|
415
|
+
if durations:
|
416
|
+
duration_threshold = np.percentile(durations, 90)
|
417
|
+
slow_tasks = [
|
418
|
+
t
|
419
|
+
for t in completed_tasks
|
420
|
+
if t.metrics.duration and t.metrics.duration > duration_threshold
|
421
|
+
]
|
422
|
+
|
423
|
+
for task in slow_tasks:
|
424
|
+
bottlenecks.append(
|
425
|
+
{
|
426
|
+
"type": "duration",
|
427
|
+
"node_id": task.node_id,
|
428
|
+
"node_type": task.node_type,
|
429
|
+
"value": task.metrics.duration,
|
430
|
+
"threshold": duration_threshold,
|
431
|
+
"severity": (
|
432
|
+
"high"
|
433
|
+
if task.metrics.duration > duration_threshold * 2
|
434
|
+
else "medium"
|
435
|
+
),
|
436
|
+
}
|
437
|
+
)
|
438
|
+
|
439
|
+
# Find memory outliers
|
440
|
+
memory_values = [
|
441
|
+
t.metrics.memory_usage_mb
|
442
|
+
for t in completed_tasks
|
443
|
+
if t.metrics.memory_usage_mb
|
444
|
+
]
|
445
|
+
if memory_values:
|
446
|
+
memory_threshold = np.percentile(memory_values, 90)
|
447
|
+
memory_intensive_tasks = [
|
448
|
+
t
|
449
|
+
for t in completed_tasks
|
450
|
+
if t.metrics.memory_usage_mb
|
451
|
+
and t.metrics.memory_usage_mb > memory_threshold
|
452
|
+
]
|
453
|
+
|
454
|
+
for task in memory_intensive_tasks:
|
455
|
+
bottlenecks.append(
|
456
|
+
{
|
457
|
+
"type": "memory",
|
458
|
+
"node_id": task.node_id,
|
459
|
+
"node_type": task.node_type,
|
460
|
+
"value": task.metrics.memory_usage_mb,
|
461
|
+
"threshold": memory_threshold,
|
462
|
+
"severity": (
|
463
|
+
"high"
|
464
|
+
if task.metrics.memory_usage_mb > memory_threshold * 2
|
465
|
+
else "medium"
|
466
|
+
),
|
467
|
+
}
|
468
|
+
)
|
469
|
+
|
470
|
+
# Find CPU outliers
|
471
|
+
cpu_values = [
|
472
|
+
t.metrics.cpu_usage for t in completed_tasks if t.metrics.cpu_usage
|
473
|
+
]
|
474
|
+
if cpu_values:
|
475
|
+
cpu_threshold = np.percentile(cpu_values, 90)
|
476
|
+
cpu_intensive_tasks = [
|
477
|
+
t
|
478
|
+
for t in completed_tasks
|
479
|
+
if t.metrics.cpu_usage and t.metrics.cpu_usage > cpu_threshold
|
480
|
+
]
|
481
|
+
|
482
|
+
for task in cpu_intensive_tasks:
|
483
|
+
bottlenecks.append(
|
484
|
+
{
|
485
|
+
"type": "cpu",
|
486
|
+
"node_id": task.node_id,
|
487
|
+
"node_type": task.node_type,
|
488
|
+
"value": task.metrics.cpu_usage,
|
489
|
+
"threshold": cpu_threshold,
|
490
|
+
"severity": "high" if task.metrics.cpu_usage > 80 else "medium",
|
491
|
+
}
|
492
|
+
)
|
493
|
+
|
494
|
+
return sorted(bottlenecks, key=lambda x: x["value"], reverse=True)
|
495
|
+
|
496
|
+
def _analyze_resource_utilization(self, tasks: List[TaskRun]) -> Dict[str, Any]:
|
497
|
+
"""Analyze overall resource utilization patterns."""
|
498
|
+
analysis = {
|
499
|
+
"cpu_distribution": {},
|
500
|
+
"memory_distribution": {},
|
501
|
+
"io_patterns": {},
|
502
|
+
"resource_efficiency": {},
|
503
|
+
}
|
504
|
+
|
505
|
+
completed_tasks = [
|
506
|
+
t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
507
|
+
]
|
508
|
+
|
509
|
+
if not completed_tasks:
|
510
|
+
return analysis
|
511
|
+
|
512
|
+
# CPU distribution analysis
|
513
|
+
cpu_values = [
|
514
|
+
t.metrics.cpu_usage for t in completed_tasks if t.metrics.cpu_usage
|
515
|
+
]
|
516
|
+
if cpu_values:
|
517
|
+
analysis["cpu_distribution"] = {
|
518
|
+
"mean": np.mean(cpu_values),
|
519
|
+
"median": np.median(cpu_values),
|
520
|
+
"std": np.std(cpu_values),
|
521
|
+
"min": min(cpu_values),
|
522
|
+
"max": max(cpu_values),
|
523
|
+
"percentiles": {
|
524
|
+
"25th": np.percentile(cpu_values, 25),
|
525
|
+
"75th": np.percentile(cpu_values, 75),
|
526
|
+
"90th": np.percentile(cpu_values, 90),
|
527
|
+
},
|
528
|
+
}
|
529
|
+
|
530
|
+
# Memory distribution analysis
|
531
|
+
memory_values = [
|
532
|
+
t.metrics.memory_usage_mb
|
533
|
+
for t in completed_tasks
|
534
|
+
if t.metrics.memory_usage_mb
|
535
|
+
]
|
536
|
+
if memory_values:
|
537
|
+
analysis["memory_distribution"] = {
|
538
|
+
"mean": np.mean(memory_values),
|
539
|
+
"median": np.median(memory_values),
|
540
|
+
"std": np.std(memory_values),
|
541
|
+
"min": min(memory_values),
|
542
|
+
"max": max(memory_values),
|
543
|
+
"total": sum(memory_values),
|
544
|
+
"percentiles": {
|
545
|
+
"25th": np.percentile(memory_values, 25),
|
546
|
+
"75th": np.percentile(memory_values, 75),
|
547
|
+
"90th": np.percentile(memory_values, 90),
|
548
|
+
},
|
549
|
+
}
|
550
|
+
|
551
|
+
# I/O patterns analysis
|
552
|
+
io_read_total = 0
|
553
|
+
io_write_total = 0
|
554
|
+
io_intensive_tasks = 0
|
555
|
+
|
556
|
+
for task in completed_tasks:
|
557
|
+
if task.metrics.custom_metrics:
|
558
|
+
custom = task.metrics.custom_metrics
|
559
|
+
read_bytes = custom.get("io_read_bytes", 0)
|
560
|
+
write_bytes = custom.get("io_write_bytes", 0)
|
561
|
+
|
562
|
+
io_read_total += read_bytes
|
563
|
+
io_write_total += write_bytes
|
564
|
+
|
565
|
+
if read_bytes > 1024 * 1024 or write_bytes > 1024 * 1024: # > 1MB
|
566
|
+
io_intensive_tasks += 1
|
567
|
+
|
568
|
+
analysis["io_patterns"] = {
|
569
|
+
"total_read_mb": io_read_total / (1024 * 1024),
|
570
|
+
"total_write_mb": io_write_total / (1024 * 1024),
|
571
|
+
"io_intensive_tasks": io_intensive_tasks,
|
572
|
+
"avg_read_per_task_mb": (io_read_total / len(completed_tasks))
|
573
|
+
/ (1024 * 1024),
|
574
|
+
"avg_write_per_task_mb": (io_write_total / len(completed_tasks))
|
575
|
+
/ (1024 * 1024),
|
576
|
+
}
|
577
|
+
|
578
|
+
return analysis
|
579
|
+
|
580
|
+
def _analyze_errors(self, tasks: List[TaskRun]) -> Dict[str, Any]:
|
581
|
+
"""Analyze error patterns and failure modes."""
|
582
|
+
analysis = {
|
583
|
+
"error_summary": {},
|
584
|
+
"error_by_type": {},
|
585
|
+
"error_timeline": [],
|
586
|
+
"recovery_suggestions": [],
|
587
|
+
}
|
588
|
+
|
589
|
+
failed_tasks = [t for t in tasks if t.status == TaskStatus.FAILED]
|
590
|
+
|
591
|
+
analysis["error_summary"] = {
|
592
|
+
"total_errors": len(failed_tasks),
|
593
|
+
"error_rate": len(failed_tasks) / len(tasks) * 100 if tasks else 0,
|
594
|
+
"critical_failures": len(
|
595
|
+
[t for t in failed_tasks if "critical" in (t.error or "").lower()]
|
596
|
+
),
|
597
|
+
}
|
598
|
+
|
599
|
+
# Group errors by node type
|
600
|
+
error_by_type = {}
|
601
|
+
for task in failed_tasks:
|
602
|
+
node_type = task.node_type
|
603
|
+
if node_type not in error_by_type:
|
604
|
+
error_by_type[node_type] = []
|
605
|
+
error_by_type[node_type].append(
|
606
|
+
{
|
607
|
+
"node_id": task.node_id,
|
608
|
+
"error_message": task.error,
|
609
|
+
"started_at": (
|
610
|
+
task.started_at.isoformat() if task.started_at else None
|
611
|
+
),
|
612
|
+
}
|
613
|
+
)
|
614
|
+
|
615
|
+
analysis["error_by_type"] = error_by_type
|
616
|
+
|
617
|
+
# Error timeline
|
618
|
+
failed_with_time = [t for t in failed_tasks if t.started_at]
|
619
|
+
failed_with_time.sort(key=lambda t: t.started_at)
|
620
|
+
|
621
|
+
analysis["error_timeline"] = [
|
622
|
+
{
|
623
|
+
"time": t.started_at.isoformat(),
|
624
|
+
"node_id": t.node_id,
|
625
|
+
"node_type": t.node_type,
|
626
|
+
"error": t.error,
|
627
|
+
}
|
628
|
+
for t in failed_with_time
|
629
|
+
]
|
630
|
+
|
631
|
+
return analysis
|
632
|
+
|
633
|
+
def _generate_insights(self, analysis: Dict[str, Any]) -> List[PerformanceInsight]:
|
634
|
+
"""Generate actionable insights from analysis results."""
|
635
|
+
insights = []
|
636
|
+
|
637
|
+
if not self.config.include_recommendations:
|
638
|
+
return insights
|
639
|
+
|
640
|
+
summary = analysis["summary"]
|
641
|
+
bottlenecks = analysis["bottlenecks"]
|
642
|
+
resource_analysis = analysis["resource_analysis"]
|
643
|
+
error_analysis = analysis["error_analysis"]
|
644
|
+
|
645
|
+
# Efficiency insights
|
646
|
+
if summary.efficiency_score < 70:
|
647
|
+
insights.append(
|
648
|
+
PerformanceInsight(
|
649
|
+
category="optimization",
|
650
|
+
severity="high",
|
651
|
+
title="Low Overall Efficiency",
|
652
|
+
description=f"Workflow efficiency score is {summary.efficiency_score:.1f}/100, indicating room for improvement.",
|
653
|
+
recommendation="Review task resource usage and consider optimizing high-CPU or memory-intensive operations.",
|
654
|
+
metrics={"efficiency_score": summary.efficiency_score},
|
655
|
+
)
|
656
|
+
)
|
657
|
+
|
658
|
+
# Bottleneck insights
|
659
|
+
duration_bottlenecks = [b for b in bottlenecks if b["type"] == "duration"]
|
660
|
+
if duration_bottlenecks:
|
661
|
+
slowest = duration_bottlenecks[0]
|
662
|
+
insights.append(
|
663
|
+
PerformanceInsight(
|
664
|
+
category="bottleneck",
|
665
|
+
severity=slowest["severity"],
|
666
|
+
title="Execution Time Bottleneck",
|
667
|
+
description=f"Task {slowest['node_id']} ({slowest['node_type']}) is taking {slowest['value']:.2f}s, significantly longer than average.",
|
668
|
+
recommendation="Consider optimizing this task or running it in parallel with other operations.",
|
669
|
+
metrics={
|
670
|
+
"duration": slowest["value"],
|
671
|
+
"threshold": slowest["threshold"],
|
672
|
+
},
|
673
|
+
)
|
674
|
+
)
|
675
|
+
|
676
|
+
# Memory insights
|
677
|
+
memory_bottlenecks = [b for b in bottlenecks if b["type"] == "memory"]
|
678
|
+
if memory_bottlenecks:
|
679
|
+
memory_heavy = memory_bottlenecks[0]
|
680
|
+
insights.append(
|
681
|
+
PerformanceInsight(
|
682
|
+
category="bottleneck",
|
683
|
+
severity=memory_heavy["severity"],
|
684
|
+
title="High Memory Usage",
|
685
|
+
description=f"Task {memory_heavy['node_id']} is using {memory_heavy['value']:.1f}MB of memory.",
|
686
|
+
recommendation="Consider processing data in chunks or optimizing data structures to reduce memory footprint.",
|
687
|
+
metrics={"memory_mb": memory_heavy["value"]},
|
688
|
+
)
|
689
|
+
)
|
690
|
+
|
691
|
+
# Error insights
|
692
|
+
if error_analysis["error_summary"]["error_rate"] > 10:
|
693
|
+
insights.append(
|
694
|
+
PerformanceInsight(
|
695
|
+
category="warning",
|
696
|
+
severity="high",
|
697
|
+
title="High Error Rate",
|
698
|
+
description=f"Error rate is {error_analysis['error_summary']['error_rate']:.1f}%, indicating reliability issues.",
|
699
|
+
recommendation="Review error logs and implement better error handling and retry mechanisms.",
|
700
|
+
metrics={
|
701
|
+
"error_rate": error_analysis["error_summary"]["error_rate"]
|
702
|
+
},
|
703
|
+
)
|
704
|
+
)
|
705
|
+
|
706
|
+
# Success rate insights
|
707
|
+
success_rate = (
|
708
|
+
(summary.completed_tasks / summary.total_tasks) * 100
|
709
|
+
if summary.total_tasks > 0
|
710
|
+
else 0
|
711
|
+
)
|
712
|
+
if success_rate < 95:
|
713
|
+
insights.append(
|
714
|
+
PerformanceInsight(
|
715
|
+
category="warning",
|
716
|
+
severity="medium",
|
717
|
+
title="Low Success Rate",
|
718
|
+
description=f"Only {success_rate:.1f}% of tasks completed successfully.",
|
719
|
+
recommendation="Investigate failed tasks and improve error handling mechanisms.",
|
720
|
+
metrics={"success_rate": success_rate},
|
721
|
+
)
|
722
|
+
)
|
723
|
+
|
724
|
+
# Throughput insights
|
725
|
+
if summary.throughput < 1: # Less than 1 task per minute
|
726
|
+
insights.append(
|
727
|
+
PerformanceInsight(
|
728
|
+
category="optimization",
|
729
|
+
severity="medium",
|
730
|
+
title="Low Throughput",
|
731
|
+
description=f"Workflow throughput is {summary.throughput:.2f} tasks/minute.",
|
732
|
+
recommendation="Consider parallelizing tasks or optimizing slow operations to improve throughput.",
|
733
|
+
metrics={"throughput": summary.throughput},
|
734
|
+
)
|
735
|
+
)
|
736
|
+
|
737
|
+
return insights
|
738
|
+
|
739
|
+
def _generate_analysis_charts(
|
740
|
+
self, run_id: str, tasks: List[TaskRun]
|
741
|
+
) -> Dict[str, str]:
|
742
|
+
"""Generate analysis charts and return file paths."""
|
743
|
+
charts = {}
|
744
|
+
|
745
|
+
try:
|
746
|
+
# Use existing performance visualizer
|
747
|
+
chart_outputs = self.performance_viz.create_run_performance_summary(run_id)
|
748
|
+
charts.update(chart_outputs)
|
749
|
+
except Exception as e:
|
750
|
+
self.logger.warning(f"Failed to generate charts: {e}")
|
751
|
+
|
752
|
+
return charts
|
753
|
+
|
754
|
+
def _compare_runs(self, run_ids: List[str]) -> Dict[str, Any]:
|
755
|
+
"""Compare performance across multiple runs."""
|
756
|
+
comparison = {"runs": [], "trends": {}, "relative_performance": {}}
|
757
|
+
|
758
|
+
run_summaries = []
|
759
|
+
for run_id in run_ids:
|
760
|
+
try:
|
761
|
+
run = self.task_manager.get_run(run_id)
|
762
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
763
|
+
summary = self._calculate_workflow_summary(run, tasks)
|
764
|
+
run_summaries.append(summary)
|
765
|
+
except Exception as e:
|
766
|
+
self.logger.warning(f"Failed to analyze run {run_id}: {e}")
|
767
|
+
|
768
|
+
if len(run_summaries) < 2:
|
769
|
+
return comparison
|
770
|
+
|
771
|
+
comparison["runs"] = [
|
772
|
+
{
|
773
|
+
"run_id": s.run_id,
|
774
|
+
"workflow_name": s.workflow_name,
|
775
|
+
"total_duration": s.total_duration,
|
776
|
+
"efficiency_score": s.efficiency_score,
|
777
|
+
"throughput": s.throughput,
|
778
|
+
"success_rate": (
|
779
|
+
(s.completed_tasks / s.total_tasks) * 100
|
780
|
+
if s.total_tasks > 0
|
781
|
+
else 0
|
782
|
+
),
|
783
|
+
}
|
784
|
+
for s in run_summaries
|
785
|
+
]
|
786
|
+
|
787
|
+
# Calculate trends
|
788
|
+
baseline = run_summaries[0]
|
789
|
+
latest = run_summaries[-1]
|
790
|
+
|
791
|
+
comparison["trends"] = {
|
792
|
+
"duration_change": (
|
793
|
+
(
|
794
|
+
(latest.total_duration - baseline.total_duration)
|
795
|
+
/ baseline.total_duration
|
796
|
+
* 100
|
797
|
+
)
|
798
|
+
if baseline.total_duration > 0
|
799
|
+
else 0
|
800
|
+
),
|
801
|
+
"efficiency_change": latest.efficiency_score - baseline.efficiency_score,
|
802
|
+
"throughput_change": (
|
803
|
+
((latest.throughput - baseline.throughput) / baseline.throughput * 100)
|
804
|
+
if baseline.throughput > 0
|
805
|
+
else 0
|
806
|
+
),
|
807
|
+
}
|
808
|
+
|
809
|
+
return comparison
|
810
|
+
|
811
|
+
def _generate_html_report(
|
812
|
+
self,
|
813
|
+
analysis: Dict[str, Any],
|
814
|
+
insights: List[PerformanceInsight],
|
815
|
+
comparison_data: Optional[Dict[str, Any]] = None,
|
816
|
+
) -> str:
|
817
|
+
"""Generate HTML report content."""
|
818
|
+
run_info = analysis["run_info"]
|
819
|
+
summary = analysis["summary"]
|
820
|
+
|
821
|
+
# CSS styles
|
822
|
+
css_styles = self._get_report_css()
|
823
|
+
|
824
|
+
# Build HTML sections
|
825
|
+
header_section = f"""
|
826
|
+
<header class="report-header">
|
827
|
+
<h1>🚀 Workflow Performance Report</h1>
|
828
|
+
<div class="run-info">
|
829
|
+
<div class="info-item">
|
830
|
+
<span class="label">Run ID:</span>
|
831
|
+
<span class="value">{run_info['run_id']}</span>
|
832
|
+
</div>
|
833
|
+
<div class="info-item">
|
834
|
+
<span class="label">Workflow:</span>
|
835
|
+
<span class="value">{run_info['workflow_name']}</span>
|
836
|
+
</div>
|
837
|
+
<div class="info-item">
|
838
|
+
<span class="label">Started:</span>
|
839
|
+
<span class="value">{run_info['started_at']}</span>
|
840
|
+
</div>
|
841
|
+
<div class="info-item">
|
842
|
+
<span class="label">Status:</span>
|
843
|
+
<span class="value status-{run_info['status'].lower()}">{run_info['status']}</span>
|
844
|
+
</div>
|
845
|
+
</div>
|
846
|
+
</header>
|
847
|
+
"""
|
848
|
+
|
849
|
+
# Executive summary
|
850
|
+
summary_section = f"""
|
851
|
+
<section class="executive-summary">
|
852
|
+
<h2>📊 Executive Summary</h2>
|
853
|
+
<div class="summary-grid">
|
854
|
+
<div class="summary-card">
|
855
|
+
<div class="metric-value">{summary.total_tasks}</div>
|
856
|
+
<div class="metric-label">Total Tasks</div>
|
857
|
+
</div>
|
858
|
+
<div class="summary-card">
|
859
|
+
<div class="metric-value">{summary.completed_tasks}</div>
|
860
|
+
<div class="metric-label">Completed</div>
|
861
|
+
</div>
|
862
|
+
<div class="summary-card">
|
863
|
+
<div class="metric-value">{summary.failed_tasks}</div>
|
864
|
+
<div class="metric-label">Failed</div>
|
865
|
+
</div>
|
866
|
+
<div class="summary-card">
|
867
|
+
<div class="metric-value">{summary.total_duration:.1f}s</div>
|
868
|
+
<div class="metric-label">Duration</div>
|
869
|
+
</div>
|
870
|
+
<div class="summary-card">
|
871
|
+
<div class="metric-value">{summary.avg_cpu_usage:.1f}%</div>
|
872
|
+
<div class="metric-label">Avg CPU</div>
|
873
|
+
</div>
|
874
|
+
<div class="summary-card">
|
875
|
+
<div class="metric-value">{summary.peak_memory_usage:.0f}MB</div>
|
876
|
+
<div class="metric-label">Peak Memory</div>
|
877
|
+
</div>
|
878
|
+
<div class="summary-card">
|
879
|
+
<div class="metric-value">{summary.throughput:.1f}</div>
|
880
|
+
<div class="metric-label">Tasks/Min</div>
|
881
|
+
</div>
|
882
|
+
<div class="summary-card">
|
883
|
+
<div class="metric-value efficiency-score">{summary.efficiency_score:.0f}/100</div>
|
884
|
+
<div class="metric-label">Efficiency Score</div>
|
885
|
+
</div>
|
886
|
+
</div>
|
887
|
+
</section>
|
888
|
+
"""
|
889
|
+
|
890
|
+
# Insights section
|
891
|
+
insights_section = ""
|
892
|
+
if insights:
|
893
|
+
insight_items = ""
|
894
|
+
for insight in insights:
|
895
|
+
severity_class = f"severity-{insight.severity}"
|
896
|
+
category_icon = {
|
897
|
+
"bottleneck": "🔍",
|
898
|
+
"optimization": "⚡",
|
899
|
+
"warning": "⚠️",
|
900
|
+
}.get(insight.category, "📋")
|
901
|
+
|
902
|
+
insight_items += f"""
|
903
|
+
<div class="insight-item {severity_class}">
|
904
|
+
<div class="insight-header">
|
905
|
+
<span class="insight-icon">{category_icon}</span>
|
906
|
+
<h4>{insight.title}</h4>
|
907
|
+
<span class="severity-badge {severity_class}">{insight.severity}</span>
|
908
|
+
</div>
|
909
|
+
<div class="insight-content">
|
910
|
+
<p class="description">{insight.description}</p>
|
911
|
+
<p class="recommendation"><strong>Recommendation:</strong> {insight.recommendation}</p>
|
912
|
+
</div>
|
913
|
+
</div>
|
914
|
+
"""
|
915
|
+
|
916
|
+
insights_section = f"""
|
917
|
+
<section class="insights-section">
|
918
|
+
<h2>💡 Performance Insights</h2>
|
919
|
+
<div class="insights-container">
|
920
|
+
{insight_items}
|
921
|
+
</div>
|
922
|
+
</section>
|
923
|
+
"""
|
924
|
+
|
925
|
+
# Charts section
|
926
|
+
charts_section = ""
|
927
|
+
if analysis.get("charts"):
|
928
|
+
chart_items = ""
|
929
|
+
for chart_name, chart_path in analysis["charts"].items():
|
930
|
+
chart_items += f"""
|
931
|
+
<div class="chart-item">
|
932
|
+
<h4>{chart_name.replace('_', ' ').title()}</h4>
|
933
|
+
<img src="{chart_path}" alt="{chart_name}" class="chart-image">
|
934
|
+
</div>
|
935
|
+
"""
|
936
|
+
|
937
|
+
charts_section = f"""
|
938
|
+
<section class="charts-section">
|
939
|
+
<h2>📈 Performance Visualizations</h2>
|
940
|
+
<div class="charts-grid">
|
941
|
+
{chart_items}
|
942
|
+
</div>
|
943
|
+
</section>
|
944
|
+
"""
|
945
|
+
|
946
|
+
# Comparison section
|
947
|
+
comparison_section = ""
|
948
|
+
if comparison_data and comparison_data.get("runs"):
|
949
|
+
comparison_section = self._generate_comparison_html(comparison_data)
|
950
|
+
|
951
|
+
# Combine all sections
|
952
|
+
html_content = f"""
|
953
|
+
<!DOCTYPE html>
|
954
|
+
<html lang="en">
|
955
|
+
<head>
|
956
|
+
<meta charset="UTF-8">
|
957
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
958
|
+
<title>Workflow Performance Report</title>
|
959
|
+
<style>{css_styles}</style>
|
960
|
+
</head>
|
961
|
+
<body>
|
962
|
+
<div class="report-container">
|
963
|
+
{header_section}
|
964
|
+
{summary_section}
|
965
|
+
{insights_section}
|
966
|
+
{charts_section}
|
967
|
+
{comparison_section}
|
968
|
+
|
969
|
+
<footer class="report-footer">
|
970
|
+
<p>Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} by Kailash Workflow Performance Reporter</p>
|
971
|
+
</footer>
|
972
|
+
</div>
|
973
|
+
</body>
|
974
|
+
</html>
|
975
|
+
"""
|
976
|
+
|
977
|
+
return html_content
|
978
|
+
|
979
|
+
def _generate_markdown_report(
|
980
|
+
self,
|
981
|
+
analysis: Dict[str, Any],
|
982
|
+
insights: List[PerformanceInsight],
|
983
|
+
comparison_data: Optional[Dict[str, Any]] = None,
|
984
|
+
) -> str:
|
985
|
+
"""Generate Markdown report content."""
|
986
|
+
run_info = analysis["run_info"]
|
987
|
+
summary = analysis["summary"]
|
988
|
+
|
989
|
+
lines = []
|
990
|
+
lines.append("# 🚀 Workflow Performance Report")
|
991
|
+
lines.append("")
|
992
|
+
lines.append(f"**Run ID:** {run_info['run_id']}")
|
993
|
+
lines.append(f"**Workflow:** {run_info['workflow_name']}")
|
994
|
+
lines.append(f"**Started:** {run_info['started_at']}")
|
995
|
+
lines.append(f"**Status:** {run_info['status']}")
|
996
|
+
lines.append("")
|
997
|
+
|
998
|
+
# Executive Summary
|
999
|
+
lines.append("## 📊 Executive Summary")
|
1000
|
+
lines.append("")
|
1001
|
+
lines.append("| Metric | Value |")
|
1002
|
+
lines.append("|--------|-------|")
|
1003
|
+
lines.append(f"| Total Tasks | {summary.total_tasks} |")
|
1004
|
+
lines.append(f"| Completed Tasks | {summary.completed_tasks} |")
|
1005
|
+
lines.append(f"| Failed Tasks | {summary.failed_tasks} |")
|
1006
|
+
lines.append(f"| Total Duration | {summary.total_duration:.2f}s |")
|
1007
|
+
lines.append(f"| Average CPU Usage | {summary.avg_cpu_usage:.1f}% |")
|
1008
|
+
lines.append(f"| Peak Memory Usage | {summary.peak_memory_usage:.0f}MB |")
|
1009
|
+
lines.append(f"| Throughput | {summary.throughput:.2f} tasks/min |")
|
1010
|
+
lines.append(f"| Efficiency Score | {summary.efficiency_score:.0f}/100 |")
|
1011
|
+
lines.append("")
|
1012
|
+
|
1013
|
+
# Insights
|
1014
|
+
if insights:
|
1015
|
+
lines.append("## 💡 Performance Insights")
|
1016
|
+
lines.append("")
|
1017
|
+
for insight in insights:
|
1018
|
+
icon = {"bottleneck": "🔍", "optimization": "⚡", "warning": "⚠️"}.get(
|
1019
|
+
insight.category, "📋"
|
1020
|
+
)
|
1021
|
+
lines.append(f"### {icon} {insight.title} ({insight.severity.upper()})")
|
1022
|
+
lines.append("")
|
1023
|
+
lines.append(f"**Description:** {insight.description}")
|
1024
|
+
lines.append("")
|
1025
|
+
lines.append(f"**Recommendation:** {insight.recommendation}")
|
1026
|
+
lines.append("")
|
1027
|
+
|
1028
|
+
# Task Analysis
|
1029
|
+
task_analysis = analysis.get("task_analysis", {})
|
1030
|
+
if task_analysis.get("by_node_type"):
|
1031
|
+
lines.append("## 📋 Task Performance by Node Type")
|
1032
|
+
lines.append("")
|
1033
|
+
lines.append(
|
1034
|
+
"| Node Type | Count | Completed | Avg Duration | Success Rate |"
|
1035
|
+
)
|
1036
|
+
lines.append(
|
1037
|
+
"|-----------|-------|-----------|--------------|--------------|"
|
1038
|
+
)
|
1039
|
+
|
1040
|
+
for node_type, stats in task_analysis["by_node_type"].items():
|
1041
|
+
lines.append(
|
1042
|
+
f"| {node_type} | {stats['count']} | {stats['completed']} | "
|
1043
|
+
f"{stats['avg_duration']:.2f}s | {stats['success_rate']:.1f}% |"
|
1044
|
+
)
|
1045
|
+
lines.append("")
|
1046
|
+
|
1047
|
+
# Bottlenecks
|
1048
|
+
bottlenecks = analysis.get("bottlenecks", [])
|
1049
|
+
if bottlenecks:
|
1050
|
+
lines.append("## 🔍 Performance Bottlenecks")
|
1051
|
+
lines.append("")
|
1052
|
+
for bottleneck in bottlenecks[:5]: # Top 5 bottlenecks
|
1053
|
+
lines.append(
|
1054
|
+
f"- **{bottleneck['node_id']}** ({bottleneck['node_type']}): "
|
1055
|
+
f"{bottleneck['type']} = {bottleneck['value']:.2f} "
|
1056
|
+
f"(threshold: {bottleneck['threshold']:.2f}) - {bottleneck['severity']} severity"
|
1057
|
+
)
|
1058
|
+
lines.append("")
|
1059
|
+
|
1060
|
+
# Error Analysis
|
1061
|
+
error_analysis = analysis.get("error_analysis", {})
|
1062
|
+
if error_analysis.get("error_summary", {}).get("total_errors", 0) > 0:
|
1063
|
+
lines.append("## ⚠️ Error Analysis")
|
1064
|
+
lines.append("")
|
1065
|
+
error_summary = error_analysis["error_summary"]
|
1066
|
+
lines.append(f"- **Total Errors:** {error_summary['total_errors']}")
|
1067
|
+
lines.append(f"- **Error Rate:** {error_summary['error_rate']:.1f}%")
|
1068
|
+
lines.append(
|
1069
|
+
f"- **Critical Failures:** {error_summary['critical_failures']}"
|
1070
|
+
)
|
1071
|
+
lines.append("")
|
1072
|
+
|
1073
|
+
# Comparison
|
1074
|
+
if comparison_data and comparison_data.get("runs"):
|
1075
|
+
lines.append("## 📈 Performance Comparison")
|
1076
|
+
lines.append("")
|
1077
|
+
trends = comparison_data.get("trends", {})
|
1078
|
+
lines.append("**Trends vs Previous Run:**")
|
1079
|
+
lines.append(f"- Duration Change: {trends.get('duration_change', 0):.1f}%")
|
1080
|
+
lines.append(
|
1081
|
+
f"- Efficiency Change: {trends.get('efficiency_change', 0):.1f} points"
|
1082
|
+
)
|
1083
|
+
lines.append(
|
1084
|
+
f"- Throughput Change: {trends.get('throughput_change', 0):.1f}%"
|
1085
|
+
)
|
1086
|
+
lines.append("")
|
1087
|
+
|
1088
|
+
lines.append("---")
|
1089
|
+
lines.append(
|
1090
|
+
f"*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} by Kailash Performance Reporter*"
|
1091
|
+
)
|
1092
|
+
|
1093
|
+
return "\n".join(lines)
|
1094
|
+
|
1095
|
+
def _generate_json_report(
|
1096
|
+
self,
|
1097
|
+
analysis: Dict[str, Any],
|
1098
|
+
insights: List[PerformanceInsight],
|
1099
|
+
comparison_data: Optional[Dict[str, Any]] = None,
|
1100
|
+
) -> str:
|
1101
|
+
"""Generate JSON report content."""
|
1102
|
+
report_data = {
|
1103
|
+
"metadata": {
|
1104
|
+
"generated_at": datetime.now().isoformat(),
|
1105
|
+
"generator": "Kailash Workflow Performance Reporter",
|
1106
|
+
"version": "1.0",
|
1107
|
+
},
|
1108
|
+
"run_info": analysis["run_info"],
|
1109
|
+
"summary": {
|
1110
|
+
"total_tasks": analysis["summary"].total_tasks,
|
1111
|
+
"completed_tasks": analysis["summary"].completed_tasks,
|
1112
|
+
"failed_tasks": analysis["summary"].failed_tasks,
|
1113
|
+
"total_duration": analysis["summary"].total_duration,
|
1114
|
+
"avg_cpu_usage": analysis["summary"].avg_cpu_usage,
|
1115
|
+
"peak_memory_usage": analysis["summary"].peak_memory_usage,
|
1116
|
+
"throughput": analysis["summary"].throughput,
|
1117
|
+
"efficiency_score": analysis["summary"].efficiency_score,
|
1118
|
+
},
|
1119
|
+
"insights": [
|
1120
|
+
{
|
1121
|
+
"category": insight.category,
|
1122
|
+
"severity": insight.severity,
|
1123
|
+
"title": insight.title,
|
1124
|
+
"description": insight.description,
|
1125
|
+
"recommendation": insight.recommendation,
|
1126
|
+
"metrics": insight.metrics,
|
1127
|
+
}
|
1128
|
+
for insight in insights
|
1129
|
+
],
|
1130
|
+
"detailed_analysis": {
|
1131
|
+
"task_analysis": analysis.get("task_analysis", {}),
|
1132
|
+
"bottlenecks": analysis.get("bottlenecks", []),
|
1133
|
+
"resource_analysis": analysis.get("resource_analysis", {}),
|
1134
|
+
"error_analysis": analysis.get("error_analysis", {}),
|
1135
|
+
},
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
if comparison_data:
|
1139
|
+
report_data["comparison"] = comparison_data
|
1140
|
+
|
1141
|
+
return json.dumps(report_data, indent=2, default=str)
|
1142
|
+
|
1143
|
+
def _generate_comparison_html(self, comparison_data: Dict[str, Any]) -> str:
|
1144
|
+
"""Generate HTML for run comparison section."""
|
1145
|
+
runs = comparison_data.get("runs", [])
|
1146
|
+
trends = comparison_data.get("trends", {})
|
1147
|
+
|
1148
|
+
if not runs:
|
1149
|
+
return ""
|
1150
|
+
|
1151
|
+
# Build comparison table
|
1152
|
+
table_rows = ""
|
1153
|
+
for run in runs:
|
1154
|
+
table_rows += f"""
|
1155
|
+
<tr>
|
1156
|
+
<td>{run['run_id'][:8]}...</td>
|
1157
|
+
<td>{run['total_duration']:.1f}s</td>
|
1158
|
+
<td>{run['efficiency_score']:.0f}/100</td>
|
1159
|
+
<td>{run['throughput']:.2f}</td>
|
1160
|
+
<td>{run['success_rate']:.1f}%</td>
|
1161
|
+
</tr>
|
1162
|
+
"""
|
1163
|
+
|
1164
|
+
# Trend indicators
|
1165
|
+
duration_trend = "📈" if trends.get("duration_change", 0) > 0 else "📉"
|
1166
|
+
efficiency_trend = "📈" if trends.get("efficiency_change", 0) > 0 else "📉"
|
1167
|
+
throughput_trend = "📈" if trends.get("throughput_change", 0) > 0 else "📉"
|
1168
|
+
|
1169
|
+
return f"""
|
1170
|
+
<section class="comparison-section">
|
1171
|
+
<h2>📈 Performance Comparison</h2>
|
1172
|
+
|
1173
|
+
<div class="trends-summary">
|
1174
|
+
<h3>Trends vs Previous Run</h3>
|
1175
|
+
<div class="trends-grid">
|
1176
|
+
<div class="trend-item">
|
1177
|
+
<span class="trend-icon">{duration_trend}</span>
|
1178
|
+
<span class="trend-label">Duration</span>
|
1179
|
+
<span class="trend-value">{trends.get('duration_change', 0):+.1f}%</span>
|
1180
|
+
</div>
|
1181
|
+
<div class="trend-item">
|
1182
|
+
<span class="trend-icon">{efficiency_trend}</span>
|
1183
|
+
<span class="trend-label">Efficiency</span>
|
1184
|
+
<span class="trend-value">{trends.get('efficiency_change', 0):+.1f}</span>
|
1185
|
+
</div>
|
1186
|
+
<div class="trend-item">
|
1187
|
+
<span class="trend-icon">{throughput_trend}</span>
|
1188
|
+
<span class="trend-label">Throughput</span>
|
1189
|
+
<span class="trend-value">{trends.get('throughput_change', 0):+.1f}%</span>
|
1190
|
+
</div>
|
1191
|
+
</div>
|
1192
|
+
</div>
|
1193
|
+
|
1194
|
+
<table class="comparison-table">
|
1195
|
+
<thead>
|
1196
|
+
<tr>
|
1197
|
+
<th>Run ID</th>
|
1198
|
+
<th>Duration</th>
|
1199
|
+
<th>Efficiency</th>
|
1200
|
+
<th>Throughput</th>
|
1201
|
+
<th>Success Rate</th>
|
1202
|
+
</tr>
|
1203
|
+
</thead>
|
1204
|
+
<tbody>
|
1205
|
+
{table_rows}
|
1206
|
+
</tbody>
|
1207
|
+
</table>
|
1208
|
+
</section>
|
1209
|
+
"""
|
1210
|
+
|
1211
|
+
def _get_report_css(self) -> str:
|
1212
|
+
"""Get CSS styles for HTML reports."""
|
1213
|
+
return """
|
1214
|
+
* {
|
1215
|
+
margin: 0;
|
1216
|
+
padding: 0;
|
1217
|
+
box-sizing: border-box;
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
body {
|
1221
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
1222
|
+
line-height: 1.6;
|
1223
|
+
color: #333;
|
1224
|
+
background-color: #f8f9fa;
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
.report-container {
|
1228
|
+
max-width: 1200px;
|
1229
|
+
margin: 0 auto;
|
1230
|
+
padding: 20px;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
.report-header {
|
1234
|
+
background: white;
|
1235
|
+
padding: 30px;
|
1236
|
+
border-radius: 12px;
|
1237
|
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
1238
|
+
margin-bottom: 30px;
|
1239
|
+
}
|
1240
|
+
|
1241
|
+
.report-header h1 {
|
1242
|
+
color: #2c3e50;
|
1243
|
+
margin-bottom: 20px;
|
1244
|
+
font-size: 2.5em;
|
1245
|
+
}
|
1246
|
+
|
1247
|
+
.run-info {
|
1248
|
+
display: grid;
|
1249
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
1250
|
+
gap: 15px;
|
1251
|
+
}
|
1252
|
+
|
1253
|
+
.info-item {
|
1254
|
+
display: flex;
|
1255
|
+
flex-direction: column;
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
.info-item .label {
|
1259
|
+
font-weight: bold;
|
1260
|
+
color: #7f8c8d;
|
1261
|
+
font-size: 0.9em;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
.info-item .value {
|
1265
|
+
font-size: 1.1em;
|
1266
|
+
color: #2c3e50;
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
.status-completed { color: #27ae60; }
|
1270
|
+
.status-failed { color: #e74c3c; }
|
1271
|
+
.status-running { color: #3498db; }
|
1272
|
+
|
1273
|
+
section {
|
1274
|
+
background: white;
|
1275
|
+
margin-bottom: 30px;
|
1276
|
+
padding: 30px;
|
1277
|
+
border-radius: 12px;
|
1278
|
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
1279
|
+
}
|
1280
|
+
|
1281
|
+
section h2 {
|
1282
|
+
color: #2c3e50;
|
1283
|
+
margin-bottom: 25px;
|
1284
|
+
font-size: 1.8em;
|
1285
|
+
border-bottom: 2px solid #ecf0f1;
|
1286
|
+
padding-bottom: 10px;
|
1287
|
+
}
|
1288
|
+
|
1289
|
+
.summary-grid {
|
1290
|
+
display: grid;
|
1291
|
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
1292
|
+
gap: 20px;
|
1293
|
+
}
|
1294
|
+
|
1295
|
+
.summary-card {
|
1296
|
+
text-align: center;
|
1297
|
+
padding: 20px;
|
1298
|
+
background: #f8f9fa;
|
1299
|
+
border-radius: 8px;
|
1300
|
+
border: 1px solid #ecf0f1;
|
1301
|
+
}
|
1302
|
+
|
1303
|
+
.metric-value {
|
1304
|
+
font-size: 2em;
|
1305
|
+
font-weight: bold;
|
1306
|
+
color: #3498db;
|
1307
|
+
display: block;
|
1308
|
+
}
|
1309
|
+
|
1310
|
+
.efficiency-score {
|
1311
|
+
color: #27ae60;
|
1312
|
+
}
|
1313
|
+
|
1314
|
+
.metric-label {
|
1315
|
+
color: #7f8c8d;
|
1316
|
+
font-size: 0.9em;
|
1317
|
+
margin-top: 5px;
|
1318
|
+
}
|
1319
|
+
|
1320
|
+
.insights-container {
|
1321
|
+
display: flex;
|
1322
|
+
flex-direction: column;
|
1323
|
+
gap: 20px;
|
1324
|
+
}
|
1325
|
+
|
1326
|
+
.insight-item {
|
1327
|
+
border-left: 4px solid #3498db;
|
1328
|
+
padding: 20px;
|
1329
|
+
background: #f8f9fa;
|
1330
|
+
border-radius: 0 8px 8px 0;
|
1331
|
+
}
|
1332
|
+
|
1333
|
+
.insight-item.severity-high {
|
1334
|
+
border-left-color: #e74c3c;
|
1335
|
+
}
|
1336
|
+
|
1337
|
+
.insight-item.severity-medium {
|
1338
|
+
border-left-color: #f39c12;
|
1339
|
+
}
|
1340
|
+
|
1341
|
+
.insight-item.severity-low {
|
1342
|
+
border-left-color: #27ae60;
|
1343
|
+
}
|
1344
|
+
|
1345
|
+
.insight-header {
|
1346
|
+
display: flex;
|
1347
|
+
align-items: center;
|
1348
|
+
gap: 10px;
|
1349
|
+
margin-bottom: 15px;
|
1350
|
+
}
|
1351
|
+
|
1352
|
+
.insight-icon {
|
1353
|
+
font-size: 1.2em;
|
1354
|
+
}
|
1355
|
+
|
1356
|
+
.insight-header h4 {
|
1357
|
+
flex: 1;
|
1358
|
+
color: #2c3e50;
|
1359
|
+
margin: 0;
|
1360
|
+
}
|
1361
|
+
|
1362
|
+
.severity-badge {
|
1363
|
+
padding: 4px 8px;
|
1364
|
+
border-radius: 4px;
|
1365
|
+
font-size: 0.8em;
|
1366
|
+
font-weight: bold;
|
1367
|
+
text-transform: uppercase;
|
1368
|
+
color: white;
|
1369
|
+
}
|
1370
|
+
|
1371
|
+
.severity-high {
|
1372
|
+
background: #e74c3c;
|
1373
|
+
}
|
1374
|
+
|
1375
|
+
.severity-medium {
|
1376
|
+
background: #f39c12;
|
1377
|
+
}
|
1378
|
+
|
1379
|
+
.severity-low {
|
1380
|
+
background: #27ae60;
|
1381
|
+
}
|
1382
|
+
|
1383
|
+
.description {
|
1384
|
+
margin-bottom: 10px;
|
1385
|
+
color: #555;
|
1386
|
+
}
|
1387
|
+
|
1388
|
+
.recommendation {
|
1389
|
+
color: #2c3e50;
|
1390
|
+
}
|
1391
|
+
|
1392
|
+
.charts-grid {
|
1393
|
+
display: grid;
|
1394
|
+
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
|
1395
|
+
gap: 20px;
|
1396
|
+
}
|
1397
|
+
|
1398
|
+
.chart-item {
|
1399
|
+
text-align: center;
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
.chart-item h4 {
|
1403
|
+
margin-bottom: 15px;
|
1404
|
+
color: #2c3e50;
|
1405
|
+
}
|
1406
|
+
|
1407
|
+
.chart-image {
|
1408
|
+
max-width: 100%;
|
1409
|
+
height: auto;
|
1410
|
+
border: 1px solid #ecf0f1;
|
1411
|
+
border-radius: 8px;
|
1412
|
+
}
|
1413
|
+
|
1414
|
+
.trends-grid {
|
1415
|
+
display: grid;
|
1416
|
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
1417
|
+
gap: 15px;
|
1418
|
+
margin-bottom: 20px;
|
1419
|
+
}
|
1420
|
+
|
1421
|
+
.trend-item {
|
1422
|
+
text-align: center;
|
1423
|
+
padding: 15px;
|
1424
|
+
background: #f8f9fa;
|
1425
|
+
border-radius: 8px;
|
1426
|
+
}
|
1427
|
+
|
1428
|
+
.trend-icon {
|
1429
|
+
font-size: 1.5em;
|
1430
|
+
display: block;
|
1431
|
+
margin-bottom: 5px;
|
1432
|
+
}
|
1433
|
+
|
1434
|
+
.trend-label {
|
1435
|
+
display: block;
|
1436
|
+
color: #7f8c8d;
|
1437
|
+
font-size: 0.9em;
|
1438
|
+
}
|
1439
|
+
|
1440
|
+
.trend-value {
|
1441
|
+
font-weight: bold;
|
1442
|
+
color: #2c3e50;
|
1443
|
+
}
|
1444
|
+
|
1445
|
+
.comparison-table {
|
1446
|
+
width: 100%;
|
1447
|
+
border-collapse: collapse;
|
1448
|
+
margin-top: 20px;
|
1449
|
+
}
|
1450
|
+
|
1451
|
+
.comparison-table th,
|
1452
|
+
.comparison-table td {
|
1453
|
+
padding: 12px;
|
1454
|
+
text-align: left;
|
1455
|
+
border-bottom: 1px solid #ecf0f1;
|
1456
|
+
}
|
1457
|
+
|
1458
|
+
.comparison-table th {
|
1459
|
+
background: #f8f9fa;
|
1460
|
+
font-weight: bold;
|
1461
|
+
color: #2c3e50;
|
1462
|
+
}
|
1463
|
+
|
1464
|
+
.report-footer {
|
1465
|
+
text-align: center;
|
1466
|
+
color: #7f8c8d;
|
1467
|
+
font-size: 0.9em;
|
1468
|
+
padding: 20px;
|
1469
|
+
border-top: 1px solid #ecf0f1;
|
1470
|
+
}
|
1471
|
+
"""
|