kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,808 @@
|
|
1
|
+
"""Performance visualization for task tracking metrics.
|
2
|
+
|
3
|
+
This module provides visualization capabilities for performance metrics collected
|
4
|
+
during workflow execution, integrating with the TaskManager to create comprehensive
|
5
|
+
performance reports and graphs.
|
6
|
+
|
7
|
+
Design Purpose:
|
8
|
+
- Visualize real-time performance data from task executions
|
9
|
+
- Support various chart types for different metrics
|
10
|
+
- Generate both static images and interactive visualizations
|
11
|
+
- Integrate with existing workflow visualization framework
|
12
|
+
|
13
|
+
Upstream Dependencies:
|
14
|
+
- TaskManager provides task run data with metrics
|
15
|
+
- MetricsCollector provides performance data format
|
16
|
+
- WorkflowVisualizer provides base visualization infrastructure
|
17
|
+
|
18
|
+
Downstream Consumers:
|
19
|
+
- Examples use this for performance reporting
|
20
|
+
- Export utilities include performance visualizations
|
21
|
+
- Web dashboards can embed generated charts
|
22
|
+
"""
|
23
|
+
|
24
|
+
import logging
|
25
|
+
from pathlib import Path
|
26
|
+
from typing import Any, Dict, List, Optional
|
27
|
+
|
28
|
+
import matplotlib.pyplot as plt
|
29
|
+
import numpy as np
|
30
|
+
|
31
|
+
from kailash.tracking.manager import TaskManager
|
32
|
+
from kailash.tracking.models import TaskRun, TaskStatus
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
class PerformanceVisualizer:
|
38
|
+
"""Creates performance visualizations from task execution metrics.
|
39
|
+
|
40
|
+
This class provides methods to generate various performance charts and
|
41
|
+
reports from task execution data collected by the TaskManager.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, task_manager: TaskManager):
|
45
|
+
"""Initialize performance visualizer.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
task_manager: TaskManager instance with execution data
|
49
|
+
"""
|
50
|
+
self.task_manager = task_manager
|
51
|
+
self.logger = logger
|
52
|
+
|
53
|
+
def create_run_performance_summary(
|
54
|
+
self, run_id: str, output_dir: Optional[Path] = None
|
55
|
+
) -> Dict[str, Path]:
|
56
|
+
"""Create comprehensive performance summary for a workflow run.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
run_id: Run ID to visualize
|
60
|
+
output_dir: Directory to save visualizations
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
Dictionary mapping chart names to file paths
|
64
|
+
"""
|
65
|
+
if output_dir is None:
|
66
|
+
# Use relative path that works from project root or create in current directory
|
67
|
+
output_dir = Path.cwd() / "outputs" / "performance"
|
68
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
69
|
+
|
70
|
+
# Get run data
|
71
|
+
run = self.task_manager.get_run(run_id)
|
72
|
+
if not run:
|
73
|
+
raise ValueError(f"Run {run_id} not found")
|
74
|
+
|
75
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
76
|
+
if not tasks:
|
77
|
+
self.logger.warning(f"No tasks found for run {run_id}")
|
78
|
+
return {}
|
79
|
+
|
80
|
+
outputs = {}
|
81
|
+
|
82
|
+
# Generate different visualizations
|
83
|
+
outputs["execution_timeline"] = self._create_execution_timeline(
|
84
|
+
tasks, output_dir / f"timeline_{run_id}.png"
|
85
|
+
)
|
86
|
+
|
87
|
+
outputs["resource_usage"] = self._create_resource_usage_chart(
|
88
|
+
tasks, output_dir / f"resources_{run_id}.png"
|
89
|
+
)
|
90
|
+
|
91
|
+
outputs["performance_comparison"] = self._create_node_performance_comparison(
|
92
|
+
tasks, output_dir / f"comparison_{run_id}.png"
|
93
|
+
)
|
94
|
+
|
95
|
+
outputs["io_analysis"] = self._create_io_analysis(
|
96
|
+
tasks, output_dir / f"io_analysis_{run_id}.png"
|
97
|
+
)
|
98
|
+
|
99
|
+
outputs["performance_heatmap"] = self._create_performance_heatmap(
|
100
|
+
tasks, output_dir / f"heatmap_{run_id}.png"
|
101
|
+
)
|
102
|
+
|
103
|
+
# Generate markdown report
|
104
|
+
outputs["report"] = self._create_performance_report(
|
105
|
+
run, tasks, output_dir / f"report_{run_id}.md"
|
106
|
+
)
|
107
|
+
|
108
|
+
return outputs
|
109
|
+
|
110
|
+
def _create_execution_timeline(
|
111
|
+
self, tasks: List[TaskRun], output_path: Path
|
112
|
+
) -> Path:
|
113
|
+
"""Create Gantt-style execution timeline."""
|
114
|
+
fig, ax = plt.subplots(figsize=(12, max(6, len(tasks) * 0.5)))
|
115
|
+
|
116
|
+
# Sort tasks by start time
|
117
|
+
tasks_with_times = []
|
118
|
+
for task in tasks:
|
119
|
+
if task.started_at and task.ended_at:
|
120
|
+
tasks_with_times.append(task)
|
121
|
+
|
122
|
+
if not tasks_with_times:
|
123
|
+
ax.text(
|
124
|
+
0.5,
|
125
|
+
0.5,
|
126
|
+
"No timing data available",
|
127
|
+
ha="center",
|
128
|
+
va="center",
|
129
|
+
transform=ax.transAxes,
|
130
|
+
)
|
131
|
+
plt.savefig(output_path)
|
132
|
+
plt.close()
|
133
|
+
return output_path
|
134
|
+
|
135
|
+
tasks_with_times.sort(key=lambda t: t.started_at)
|
136
|
+
|
137
|
+
# Calculate timeline bounds
|
138
|
+
min_time = min(t.started_at for t in tasks_with_times)
|
139
|
+
max_time = max(t.ended_at for t in tasks_with_times)
|
140
|
+
|
141
|
+
# Create timeline bars
|
142
|
+
y_positions = []
|
143
|
+
labels = []
|
144
|
+
colors = []
|
145
|
+
|
146
|
+
for i, task in enumerate(tasks_with_times):
|
147
|
+
start_offset = (task.started_at - min_time).total_seconds()
|
148
|
+
duration = (task.ended_at - task.started_at).total_seconds()
|
149
|
+
|
150
|
+
# Color based on status
|
151
|
+
color_map = {
|
152
|
+
TaskStatus.COMPLETED: "green",
|
153
|
+
TaskStatus.FAILED: "red",
|
154
|
+
TaskStatus.CANCELLED: "orange",
|
155
|
+
TaskStatus.RUNNING: "blue",
|
156
|
+
}
|
157
|
+
color = color_map.get(task.status, "gray")
|
158
|
+
|
159
|
+
ax.barh(
|
160
|
+
i,
|
161
|
+
duration,
|
162
|
+
left=start_offset,
|
163
|
+
height=0.8,
|
164
|
+
color=color,
|
165
|
+
alpha=0.7,
|
166
|
+
edgecolor="black",
|
167
|
+
linewidth=1,
|
168
|
+
)
|
169
|
+
|
170
|
+
# Add metrics annotations if available
|
171
|
+
if task.metrics and task.metrics.cpu_usage:
|
172
|
+
ax.text(
|
173
|
+
start_offset + duration / 2,
|
174
|
+
i,
|
175
|
+
f"CPU: {task.metrics.cpu_usage:.1f}%",
|
176
|
+
ha="center",
|
177
|
+
va="center",
|
178
|
+
fontsize=8,
|
179
|
+
)
|
180
|
+
|
181
|
+
y_positions.append(i)
|
182
|
+
labels.append(f"{task.node_id}\n({task.node_type})")
|
183
|
+
colors.append(color)
|
184
|
+
|
185
|
+
ax.set_yticks(y_positions)
|
186
|
+
ax.set_yticklabels(labels)
|
187
|
+
ax.set_xlabel("Time (seconds)")
|
188
|
+
ax.set_title("Task Execution Timeline")
|
189
|
+
ax.grid(True, axis="x", alpha=0.3)
|
190
|
+
|
191
|
+
# Add legend
|
192
|
+
from matplotlib.patches import Patch
|
193
|
+
|
194
|
+
legend_elements = [
|
195
|
+
Patch(facecolor="green", alpha=0.7, label="Completed"),
|
196
|
+
Patch(facecolor="red", alpha=0.7, label="Failed"),
|
197
|
+
Patch(facecolor="blue", alpha=0.7, label="Running"),
|
198
|
+
Patch(facecolor="orange", alpha=0.7, label="Cancelled"),
|
199
|
+
]
|
200
|
+
ax.legend(handles=legend_elements, loc="upper right")
|
201
|
+
|
202
|
+
plt.tight_layout()
|
203
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
204
|
+
plt.close()
|
205
|
+
|
206
|
+
return output_path
|
207
|
+
|
208
|
+
def _create_resource_usage_chart(
|
209
|
+
self, tasks: List[TaskRun], output_path: Path
|
210
|
+
) -> Path:
|
211
|
+
"""Create resource usage comparison chart."""
|
212
|
+
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12))
|
213
|
+
|
214
|
+
# Collect metrics data
|
215
|
+
node_names = []
|
216
|
+
cpu_usage = []
|
217
|
+
memory_usage = []
|
218
|
+
memory_delta = []
|
219
|
+
durations = []
|
220
|
+
|
221
|
+
for task in tasks:
|
222
|
+
if task.metrics:
|
223
|
+
node_names.append(f"{task.node_id}\n{task.node_type}")
|
224
|
+
cpu_usage.append(task.metrics.cpu_usage or 0)
|
225
|
+
memory_usage.append(task.metrics.memory_usage_mb or 0)
|
226
|
+
|
227
|
+
# Get memory delta from custom metrics
|
228
|
+
custom = task.metrics.custom_metrics or {}
|
229
|
+
memory_delta.append(custom.get("memory_delta_mb", 0))
|
230
|
+
durations.append(task.metrics.duration or 0)
|
231
|
+
|
232
|
+
if not node_names:
|
233
|
+
for ax in [ax1, ax2, ax3]:
|
234
|
+
ax.text(
|
235
|
+
0.5,
|
236
|
+
0.5,
|
237
|
+
"No metrics data available",
|
238
|
+
ha="center",
|
239
|
+
va="center",
|
240
|
+
transform=ax.transAxes,
|
241
|
+
)
|
242
|
+
plt.savefig(output_path)
|
243
|
+
plt.close()
|
244
|
+
return output_path
|
245
|
+
|
246
|
+
x = np.arange(len(node_names))
|
247
|
+
|
248
|
+
# CPU usage chart
|
249
|
+
bars1 = ax1.bar(x, cpu_usage, color="skyblue", edgecolor="black")
|
250
|
+
ax1.set_ylabel("CPU Usage (%)")
|
251
|
+
ax1.set_title("CPU Usage by Node")
|
252
|
+
ax1.set_xticks(x)
|
253
|
+
ax1.set_xticklabels(node_names, rotation=45, ha="right")
|
254
|
+
ax1.grid(True, axis="y", alpha=0.3)
|
255
|
+
|
256
|
+
# Add value labels
|
257
|
+
for bar, value in zip(bars1, cpu_usage):
|
258
|
+
if value > 0:
|
259
|
+
ax1.text(
|
260
|
+
bar.get_x() + bar.get_width() / 2,
|
261
|
+
bar.get_height() + 1,
|
262
|
+
f"{value:.1f}%",
|
263
|
+
ha="center",
|
264
|
+
va="bottom",
|
265
|
+
fontsize=8,
|
266
|
+
)
|
267
|
+
|
268
|
+
# Memory usage chart
|
269
|
+
bars2 = ax2.bar(x, memory_usage, color="lightgreen", edgecolor="black")
|
270
|
+
bars2_delta = ax2.bar(
|
271
|
+
x,
|
272
|
+
memory_delta,
|
273
|
+
bottom=memory_usage,
|
274
|
+
color="darkgreen",
|
275
|
+
alpha=0.5,
|
276
|
+
edgecolor="black",
|
277
|
+
)
|
278
|
+
ax2.set_ylabel("Memory (MB)")
|
279
|
+
ax2.set_title("Memory Usage by Node")
|
280
|
+
ax2.set_xticks(x)
|
281
|
+
ax2.set_xticklabels(node_names, rotation=45, ha="right")
|
282
|
+
ax2.grid(True, axis="y", alpha=0.3)
|
283
|
+
ax2.legend(["Peak Memory", "Memory Delta"])
|
284
|
+
|
285
|
+
# Duration chart
|
286
|
+
bars3 = ax3.bar(x, durations, color="lightcoral", edgecolor="black")
|
287
|
+
ax3.set_ylabel("Duration (seconds)")
|
288
|
+
ax3.set_title("Execution Time by Node")
|
289
|
+
ax3.set_xticks(x)
|
290
|
+
ax3.set_xticklabels(node_names, rotation=45, ha="right")
|
291
|
+
ax3.grid(True, axis="y", alpha=0.3)
|
292
|
+
|
293
|
+
# Add value labels
|
294
|
+
for bar, value in zip(bars3, durations):
|
295
|
+
if value > 0:
|
296
|
+
ax3.text(
|
297
|
+
bar.get_x() + bar.get_width() / 2,
|
298
|
+
bar.get_height() + 0.01,
|
299
|
+
f"{value:.2f}s",
|
300
|
+
ha="center",
|
301
|
+
va="bottom",
|
302
|
+
fontsize=8,
|
303
|
+
)
|
304
|
+
|
305
|
+
plt.suptitle("Resource Usage Analysis", fontsize=14)
|
306
|
+
plt.tight_layout()
|
307
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
308
|
+
plt.close()
|
309
|
+
|
310
|
+
return output_path
|
311
|
+
|
312
|
+
def _create_node_performance_comparison(
|
313
|
+
self, tasks: List[TaskRun], output_path: Path
|
314
|
+
) -> Path:
|
315
|
+
"""Create performance comparison radar chart."""
|
316
|
+
# Group tasks by node type
|
317
|
+
node_type_metrics = {}
|
318
|
+
|
319
|
+
for task in tasks:
|
320
|
+
if task.metrics and task.status == TaskStatus.COMPLETED:
|
321
|
+
node_type = task.node_type
|
322
|
+
if node_type not in node_type_metrics:
|
323
|
+
node_type_metrics[node_type] = {
|
324
|
+
"cpu": [],
|
325
|
+
"memory": [],
|
326
|
+
"duration": [],
|
327
|
+
"io_read": [],
|
328
|
+
"io_write": [],
|
329
|
+
}
|
330
|
+
|
331
|
+
metrics = node_type_metrics[node_type]
|
332
|
+
metrics["cpu"].append(task.metrics.cpu_usage or 0)
|
333
|
+
metrics["memory"].append(task.metrics.memory_usage_mb or 0)
|
334
|
+
metrics["duration"].append(task.metrics.duration or 0)
|
335
|
+
|
336
|
+
custom = task.metrics.custom_metrics or {}
|
337
|
+
metrics["io_read"].append(
|
338
|
+
custom.get("io_read_bytes", 0) / 1024 / 1024
|
339
|
+
) # MB
|
340
|
+
metrics["io_write"].append(
|
341
|
+
custom.get("io_write_bytes", 0) / 1024 / 1024
|
342
|
+
) # MB
|
343
|
+
|
344
|
+
if not node_type_metrics:
|
345
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
346
|
+
ax.text(
|
347
|
+
0.5,
|
348
|
+
0.5,
|
349
|
+
"No performance data available",
|
350
|
+
ha="center",
|
351
|
+
va="center",
|
352
|
+
transform=ax.transAxes,
|
353
|
+
)
|
354
|
+
plt.savefig(output_path)
|
355
|
+
plt.close()
|
356
|
+
return output_path
|
357
|
+
|
358
|
+
# Calculate averages
|
359
|
+
avg_metrics = {}
|
360
|
+
for node_type, metrics in node_type_metrics.items():
|
361
|
+
avg_metrics[node_type] = {
|
362
|
+
"CPU %": np.mean(metrics["cpu"]) if metrics["cpu"] else 0,
|
363
|
+
"Memory MB": np.mean(metrics["memory"]) if metrics["memory"] else 0,
|
364
|
+
"Duration s": (
|
365
|
+
np.mean(metrics["duration"]) if metrics["duration"] else 0
|
366
|
+
),
|
367
|
+
"I/O Read MB": np.mean(metrics["io_read"]) if metrics["io_read"] else 0,
|
368
|
+
"I/O Write MB": (
|
369
|
+
np.mean(metrics["io_write"]) if metrics["io_write"] else 0
|
370
|
+
),
|
371
|
+
}
|
372
|
+
|
373
|
+
# Create radar chart
|
374
|
+
fig, ax = plt.subplots(figsize=(10, 8), subplot_kw=dict(projection="polar"))
|
375
|
+
|
376
|
+
categories = list(next(iter(avg_metrics.values())).keys())
|
377
|
+
num_vars = len(categories)
|
378
|
+
|
379
|
+
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
|
380
|
+
angles += angles[:1] # Complete the circle
|
381
|
+
|
382
|
+
colors = plt.cm.tab10(np.linspace(0, 1, len(avg_metrics)))
|
383
|
+
|
384
|
+
for (node_type, metrics), color in zip(avg_metrics.items(), colors):
|
385
|
+
values = list(metrics.values())
|
386
|
+
|
387
|
+
# Normalize values to 0-100 scale for better visualization
|
388
|
+
max_vals = {
|
389
|
+
"CPU %": 100,
|
390
|
+
"Memory MB": max(max(m["Memory MB"] for m in avg_metrics.values()), 1),
|
391
|
+
"Duration s": max(
|
392
|
+
max(m["Duration s"] for m in avg_metrics.values()), 1
|
393
|
+
),
|
394
|
+
"I/O Read MB": max(
|
395
|
+
max(m["I/O Read MB"] for m in avg_metrics.values()), 1
|
396
|
+
),
|
397
|
+
"I/O Write MB": max(
|
398
|
+
max(m["I/O Write MB"] for m in avg_metrics.values()), 1
|
399
|
+
),
|
400
|
+
}
|
401
|
+
|
402
|
+
normalized_values = []
|
403
|
+
for cat, val in zip(categories, values):
|
404
|
+
normalized_values.append((val / max_vals[cat]) * 100)
|
405
|
+
|
406
|
+
normalized_values += normalized_values[:1]
|
407
|
+
|
408
|
+
ax.plot(
|
409
|
+
angles,
|
410
|
+
normalized_values,
|
411
|
+
"o-",
|
412
|
+
linewidth=2,
|
413
|
+
label=node_type,
|
414
|
+
color=color,
|
415
|
+
)
|
416
|
+
ax.fill(angles, normalized_values, alpha=0.25, color=color)
|
417
|
+
|
418
|
+
ax.set_xticks(angles[:-1])
|
419
|
+
ax.set_xticklabels(categories)
|
420
|
+
ax.set_ylim(0, 100)
|
421
|
+
ax.set_ylabel("Relative Performance (0-100)", labelpad=30)
|
422
|
+
ax.set_title(
|
423
|
+
"Node Type Performance Comparison\n(Normalized to 0-100 scale)", pad=20
|
424
|
+
)
|
425
|
+
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.0))
|
426
|
+
ax.grid(True)
|
427
|
+
|
428
|
+
plt.tight_layout()
|
429
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
430
|
+
plt.close()
|
431
|
+
|
432
|
+
return output_path
|
433
|
+
|
434
|
+
def _create_io_analysis(self, tasks: List[TaskRun], output_path: Path) -> Path:
|
435
|
+
"""Create I/O operations analysis chart."""
|
436
|
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
|
437
|
+
|
438
|
+
# Collect I/O data
|
439
|
+
node_names = []
|
440
|
+
io_read_bytes = []
|
441
|
+
io_write_bytes = []
|
442
|
+
io_read_count = []
|
443
|
+
io_write_count = []
|
444
|
+
|
445
|
+
for task in tasks:
|
446
|
+
if task.metrics and task.metrics.custom_metrics:
|
447
|
+
custom = task.metrics.custom_metrics
|
448
|
+
if any(
|
449
|
+
custom.get(k, 0) > 0
|
450
|
+
for k in [
|
451
|
+
"io_read_bytes",
|
452
|
+
"io_write_bytes",
|
453
|
+
"io_read_count",
|
454
|
+
"io_write_count",
|
455
|
+
]
|
456
|
+
):
|
457
|
+
node_names.append(f"{task.node_id}")
|
458
|
+
io_read_bytes.append(
|
459
|
+
custom.get("io_read_bytes", 0) / 1024 / 1024
|
460
|
+
) # MB
|
461
|
+
io_write_bytes.append(
|
462
|
+
custom.get("io_write_bytes", 0) / 1024 / 1024
|
463
|
+
) # MB
|
464
|
+
io_read_count.append(custom.get("io_read_count", 0))
|
465
|
+
io_write_count.append(custom.get("io_write_count", 0))
|
466
|
+
|
467
|
+
if not node_names:
|
468
|
+
for ax in [ax1, ax2]:
|
469
|
+
ax.text(
|
470
|
+
0.5,
|
471
|
+
0.5,
|
472
|
+
"No I/O data available",
|
473
|
+
ha="center",
|
474
|
+
va="center",
|
475
|
+
transform=ax.transAxes,
|
476
|
+
)
|
477
|
+
plt.savefig(output_path)
|
478
|
+
plt.close()
|
479
|
+
return output_path
|
480
|
+
|
481
|
+
x = np.arange(len(node_names))
|
482
|
+
width = 0.35
|
483
|
+
|
484
|
+
# I/O bytes chart
|
485
|
+
bars1 = ax1.bar(
|
486
|
+
x - width / 2,
|
487
|
+
io_read_bytes,
|
488
|
+
width,
|
489
|
+
label="Read",
|
490
|
+
color="lightblue",
|
491
|
+
edgecolor="black",
|
492
|
+
)
|
493
|
+
bars2 = ax1.bar(
|
494
|
+
x + width / 2,
|
495
|
+
io_write_bytes,
|
496
|
+
width,
|
497
|
+
label="Write",
|
498
|
+
color="lightcoral",
|
499
|
+
edgecolor="black",
|
500
|
+
)
|
501
|
+
|
502
|
+
ax1.set_ylabel("Data (MB)")
|
503
|
+
ax1.set_title("I/O Data Transfer by Node")
|
504
|
+
ax1.set_xticks(x)
|
505
|
+
ax1.set_xticklabels(node_names, rotation=45, ha="right")
|
506
|
+
ax1.legend()
|
507
|
+
ax1.grid(True, axis="y", alpha=0.3)
|
508
|
+
|
509
|
+
# I/O operations count chart
|
510
|
+
bars3 = ax2.bar(
|
511
|
+
x - width / 2,
|
512
|
+
io_read_count,
|
513
|
+
width,
|
514
|
+
label="Read Ops",
|
515
|
+
color="lightblue",
|
516
|
+
edgecolor="black",
|
517
|
+
)
|
518
|
+
bars4 = ax2.bar(
|
519
|
+
x + width / 2,
|
520
|
+
io_write_count,
|
521
|
+
width,
|
522
|
+
label="Write Ops",
|
523
|
+
color="lightcoral",
|
524
|
+
edgecolor="black",
|
525
|
+
)
|
526
|
+
|
527
|
+
ax2.set_ylabel("Operation Count")
|
528
|
+
ax2.set_title("I/O Operations Count by Node")
|
529
|
+
ax2.set_xticks(x)
|
530
|
+
ax2.set_xticklabels(node_names, rotation=45, ha="right")
|
531
|
+
ax2.legend()
|
532
|
+
ax2.grid(True, axis="y", alpha=0.3)
|
533
|
+
|
534
|
+
plt.suptitle("I/O Operations Analysis", fontsize=14)
|
535
|
+
plt.tight_layout()
|
536
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
537
|
+
plt.close()
|
538
|
+
|
539
|
+
return output_path
|
540
|
+
|
541
|
+
def _create_performance_heatmap(
|
542
|
+
self, tasks: List[TaskRun], output_path: Path
|
543
|
+
) -> Path:
|
544
|
+
"""Create performance metrics heatmap."""
|
545
|
+
# Prepare data matrix
|
546
|
+
metrics_data = []
|
547
|
+
node_labels = []
|
548
|
+
|
549
|
+
metric_names = [
|
550
|
+
"Duration (s)",
|
551
|
+
"CPU %",
|
552
|
+
"Memory (MB)",
|
553
|
+
"I/O Read (MB)",
|
554
|
+
"I/O Write (MB)",
|
555
|
+
]
|
556
|
+
|
557
|
+
for task in tasks:
|
558
|
+
if task.metrics:
|
559
|
+
node_labels.append(f"{task.node_id}")
|
560
|
+
custom = task.metrics.custom_metrics or {}
|
561
|
+
|
562
|
+
row = [
|
563
|
+
task.metrics.duration or 0,
|
564
|
+
task.metrics.cpu_usage or 0,
|
565
|
+
task.metrics.memory_usage_mb or 0,
|
566
|
+
custom.get("io_read_bytes", 0) / 1024 / 1024,
|
567
|
+
custom.get("io_write_bytes", 0) / 1024 / 1024,
|
568
|
+
]
|
569
|
+
metrics_data.append(row)
|
570
|
+
|
571
|
+
if not metrics_data:
|
572
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
573
|
+
ax.text(
|
574
|
+
0.5,
|
575
|
+
0.5,
|
576
|
+
"No metrics data available",
|
577
|
+
ha="center",
|
578
|
+
va="center",
|
579
|
+
transform=ax.transAxes,
|
580
|
+
)
|
581
|
+
plt.savefig(output_path)
|
582
|
+
plt.close()
|
583
|
+
return output_path
|
584
|
+
|
585
|
+
# Convert to numpy array and normalize
|
586
|
+
data = np.array(metrics_data).T
|
587
|
+
|
588
|
+
# Normalize each metric to 0-1 scale
|
589
|
+
normalized_data = np.zeros_like(data)
|
590
|
+
for i in range(data.shape[0]):
|
591
|
+
row_max = data[i].max()
|
592
|
+
if row_max > 0:
|
593
|
+
normalized_data[i] = data[i] / row_max
|
594
|
+
|
595
|
+
# Create heatmap
|
596
|
+
fig, ax = plt.subplots(figsize=(max(10, len(node_labels) * 0.8), 8))
|
597
|
+
|
598
|
+
im = ax.imshow(normalized_data, cmap="YlOrRd", aspect="auto")
|
599
|
+
|
600
|
+
# Set ticks and labels
|
601
|
+
ax.set_xticks(np.arange(len(node_labels)))
|
602
|
+
ax.set_yticks(np.arange(len(metric_names)))
|
603
|
+
ax.set_xticklabels(node_labels, rotation=45, ha="right")
|
604
|
+
ax.set_yticklabels(metric_names)
|
605
|
+
|
606
|
+
# Add text annotations
|
607
|
+
for i in range(len(metric_names)):
|
608
|
+
for j in range(len(node_labels)):
|
609
|
+
value = data[i, j]
|
610
|
+
color = "white" if normalized_data[i, j] > 0.5 else "black"
|
611
|
+
|
612
|
+
# Format based on metric type
|
613
|
+
if i == 0: # Duration
|
614
|
+
text = f"{value:.2f}"
|
615
|
+
elif i == 1: # CPU %
|
616
|
+
text = f"{value:.1f}%"
|
617
|
+
elif i in [2, 3, 4]: # Memory, I/O
|
618
|
+
text = f"{value:.1f}"
|
619
|
+
|
620
|
+
ax.text(j, i, text, ha="center", va="center", color=color, fontsize=8)
|
621
|
+
|
622
|
+
ax.set_title("Performance Metrics Heatmap\n(Normalized by metric type)", pad=20)
|
623
|
+
|
624
|
+
# Add colorbar
|
625
|
+
cbar = plt.colorbar(im, ax=ax)
|
626
|
+
cbar.set_label("Normalized Value (0-1)", rotation=270, labelpad=20)
|
627
|
+
|
628
|
+
plt.tight_layout()
|
629
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
630
|
+
plt.close()
|
631
|
+
|
632
|
+
return output_path
|
633
|
+
|
634
|
+
def _create_performance_report(
|
635
|
+
self, run: Any, tasks: List[TaskRun], output_path: Path
|
636
|
+
) -> Path:
|
637
|
+
"""Create markdown performance report."""
|
638
|
+
lines = []
|
639
|
+
lines.append(f"# Performance Report for Run {run.run_id}")
|
640
|
+
lines.append(f"\n**Workflow:** {run.workflow_name}")
|
641
|
+
lines.append(f"**Started:** {run.started_at}")
|
642
|
+
lines.append(f"**Status:** {run.status}")
|
643
|
+
lines.append(f"**Total Tasks:** {len(tasks)}")
|
644
|
+
|
645
|
+
# Calculate summary statistics
|
646
|
+
completed_tasks = [
|
647
|
+
t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
648
|
+
]
|
649
|
+
|
650
|
+
if completed_tasks:
|
651
|
+
total_duration = sum(t.metrics.duration or 0 for t in completed_tasks)
|
652
|
+
avg_cpu = np.mean([t.metrics.cpu_usage or 0 for t in completed_tasks])
|
653
|
+
max_memory = max((t.metrics.memory_usage_mb or 0) for t in completed_tasks)
|
654
|
+
|
655
|
+
lines.append("\n## Summary Statistics")
|
656
|
+
lines.append(f"- **Total Execution Time:** {total_duration:.2f} seconds")
|
657
|
+
lines.append(f"- **Average CPU Usage:** {avg_cpu:.1f}%")
|
658
|
+
lines.append(f"- **Peak Memory Usage:** {max_memory:.1f} MB")
|
659
|
+
|
660
|
+
# Task details table
|
661
|
+
lines.append("\n## Task Performance Details")
|
662
|
+
lines.append("| Node ID | Type | Status | Duration (s) | CPU % | Memory (MB) |")
|
663
|
+
lines.append("|---------|------|--------|-------------|-------|-------------|")
|
664
|
+
|
665
|
+
for task in tasks:
|
666
|
+
duration = (
|
667
|
+
f"{task.metrics.duration:.2f}"
|
668
|
+
if task.metrics and task.metrics.duration
|
669
|
+
else "N/A"
|
670
|
+
)
|
671
|
+
cpu = (
|
672
|
+
f"{task.metrics.cpu_usage:.1f}"
|
673
|
+
if task.metrics and task.metrics.cpu_usage
|
674
|
+
else "N/A"
|
675
|
+
)
|
676
|
+
memory = (
|
677
|
+
f"{task.metrics.memory_usage_mb:.1f}"
|
678
|
+
if task.metrics and task.metrics.memory_usage_mb
|
679
|
+
else "N/A"
|
680
|
+
)
|
681
|
+
|
682
|
+
lines.append(
|
683
|
+
f"| {task.node_id} | {task.node_type} | {task.status} | "
|
684
|
+
f"{duration} | {cpu} | {memory} |"
|
685
|
+
)
|
686
|
+
|
687
|
+
# Performance insights
|
688
|
+
lines.append("\n## Performance Insights")
|
689
|
+
|
690
|
+
if completed_tasks:
|
691
|
+
# Find bottlenecks
|
692
|
+
slowest = max(completed_tasks, key=lambda t: t.metrics.duration or 0)
|
693
|
+
lines.append("\n### Bottlenecks")
|
694
|
+
lines.append(
|
695
|
+
f"- **Slowest Node:** {slowest.node_id} ({slowest.metrics.duration:.2f}s)"
|
696
|
+
)
|
697
|
+
|
698
|
+
highest_cpu = max(completed_tasks, key=lambda t: t.metrics.cpu_usage or 0)
|
699
|
+
if highest_cpu.metrics.cpu_usage > 80:
|
700
|
+
lines.append(
|
701
|
+
f"- **High CPU Usage:** {highest_cpu.node_id} ({highest_cpu.metrics.cpu_usage:.1f}%)"
|
702
|
+
)
|
703
|
+
|
704
|
+
highest_memory = max(
|
705
|
+
completed_tasks, key=lambda t: t.metrics.memory_usage_mb or 0
|
706
|
+
)
|
707
|
+
lines.append(
|
708
|
+
f"- **Highest Memory:** {highest_memory.node_id} ({highest_memory.metrics.memory_usage_mb:.1f} MB)"
|
709
|
+
)
|
710
|
+
|
711
|
+
# Write report
|
712
|
+
with open(output_path, "w") as f:
|
713
|
+
f.write("\n".join(lines))
|
714
|
+
|
715
|
+
return output_path
|
716
|
+
|
717
|
+
def compare_runs(
|
718
|
+
self, run_ids: List[str], output_path: Optional[Path] = None
|
719
|
+
) -> Path:
|
720
|
+
"""Compare performance across multiple runs."""
|
721
|
+
if output_path is None:
|
722
|
+
output_path = Path.cwd() / "outputs" / "performance" / "comparison.png"
|
723
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
724
|
+
|
725
|
+
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
726
|
+
axes = axes.flatten()
|
727
|
+
|
728
|
+
# Collect metrics for each run
|
729
|
+
run_metrics = {}
|
730
|
+
for run_id in run_ids:
|
731
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
732
|
+
completed = [
|
733
|
+
t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
|
734
|
+
]
|
735
|
+
|
736
|
+
if completed:
|
737
|
+
run_metrics[run_id] = {
|
738
|
+
"total_duration": sum(t.metrics.duration or 0 for t in completed),
|
739
|
+
"avg_cpu": np.mean([t.metrics.cpu_usage or 0 for t in completed]),
|
740
|
+
"max_memory": max(
|
741
|
+
(t.metrics.memory_usage_mb or 0) for t in completed
|
742
|
+
),
|
743
|
+
"task_count": len(completed),
|
744
|
+
}
|
745
|
+
|
746
|
+
if not run_metrics:
|
747
|
+
for ax in axes:
|
748
|
+
ax.text(
|
749
|
+
0.5,
|
750
|
+
0.5,
|
751
|
+
"No metrics data available",
|
752
|
+
ha="center",
|
753
|
+
va="center",
|
754
|
+
transform=ax.transAxes,
|
755
|
+
)
|
756
|
+
plt.savefig(output_path)
|
757
|
+
plt.close()
|
758
|
+
return output_path
|
759
|
+
|
760
|
+
# Create comparison charts
|
761
|
+
run_labels = list(run_metrics.keys())
|
762
|
+
x = np.arange(len(run_labels))
|
763
|
+
|
764
|
+
# Total duration
|
765
|
+
durations = [run_metrics[r]["total_duration"] for r in run_labels]
|
766
|
+
axes[0].bar(x, durations, color="lightblue", edgecolor="black")
|
767
|
+
axes[0].set_ylabel("Total Duration (s)")
|
768
|
+
axes[0].set_title("Total Execution Time")
|
769
|
+
axes[0].set_xticks(x)
|
770
|
+
axes[0].set_xticklabels(run_labels, rotation=45, ha="right")
|
771
|
+
axes[0].grid(True, axis="y", alpha=0.3)
|
772
|
+
|
773
|
+
# Average CPU
|
774
|
+
cpu_avgs = [run_metrics[r]["avg_cpu"] for r in run_labels]
|
775
|
+
axes[1].bar(x, cpu_avgs, color="lightgreen", edgecolor="black")
|
776
|
+
axes[1].set_ylabel("Average CPU %")
|
777
|
+
axes[1].set_title("Average CPU Usage")
|
778
|
+
axes[1].set_xticks(x)
|
779
|
+
axes[1].set_xticklabels(run_labels, rotation=45, ha="right")
|
780
|
+
axes[1].grid(True, axis="y", alpha=0.3)
|
781
|
+
|
782
|
+
# Max memory
|
783
|
+
max_memories = [run_metrics[r]["max_memory"] for r in run_labels]
|
784
|
+
axes[2].bar(x, max_memories, color="lightcoral", edgecolor="black")
|
785
|
+
axes[2].set_ylabel("Peak Memory (MB)")
|
786
|
+
axes[2].set_title("Peak Memory Usage")
|
787
|
+
axes[2].set_xticks(x)
|
788
|
+
axes[2].set_xticklabels(run_labels, rotation=45, ha="right")
|
789
|
+
axes[2].grid(True, axis="y", alpha=0.3)
|
790
|
+
|
791
|
+
# Task efficiency (duration per task)
|
792
|
+
efficiencies = [
|
793
|
+
run_metrics[r]["total_duration"] / run_metrics[r]["task_count"]
|
794
|
+
for r in run_labels
|
795
|
+
]
|
796
|
+
axes[3].bar(x, efficiencies, color="lightyellow", edgecolor="black")
|
797
|
+
axes[3].set_ylabel("Avg Duration per Task (s)")
|
798
|
+
axes[3].set_title("Task Efficiency")
|
799
|
+
axes[3].set_xticks(x)
|
800
|
+
axes[3].set_xticklabels(run_labels, rotation=45, ha="right")
|
801
|
+
axes[3].grid(True, axis="y", alpha=0.3)
|
802
|
+
|
803
|
+
plt.suptitle("Performance Comparison Across Runs", fontsize=16)
|
804
|
+
plt.tight_layout()
|
805
|
+
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
806
|
+
plt.close()
|
807
|
+
|
808
|
+
return output_path
|