kailash 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. kailash/__init__.py +31 -0
  2. kailash/__main__.py +11 -0
  3. kailash/cli/__init__.py +5 -0
  4. kailash/cli/commands.py +563 -0
  5. kailash/manifest.py +778 -0
  6. kailash/nodes/__init__.py +23 -0
  7. kailash/nodes/ai/__init__.py +26 -0
  8. kailash/nodes/ai/agents.py +417 -0
  9. kailash/nodes/ai/models.py +488 -0
  10. kailash/nodes/api/__init__.py +52 -0
  11. kailash/nodes/api/auth.py +567 -0
  12. kailash/nodes/api/graphql.py +480 -0
  13. kailash/nodes/api/http.py +598 -0
  14. kailash/nodes/api/rate_limiting.py +572 -0
  15. kailash/nodes/api/rest.py +665 -0
  16. kailash/nodes/base.py +1032 -0
  17. kailash/nodes/base_async.py +128 -0
  18. kailash/nodes/code/__init__.py +32 -0
  19. kailash/nodes/code/python.py +1021 -0
  20. kailash/nodes/data/__init__.py +125 -0
  21. kailash/nodes/data/readers.py +496 -0
  22. kailash/nodes/data/sharepoint_graph.py +623 -0
  23. kailash/nodes/data/sql.py +380 -0
  24. kailash/nodes/data/streaming.py +1168 -0
  25. kailash/nodes/data/vector_db.py +964 -0
  26. kailash/nodes/data/writers.py +529 -0
  27. kailash/nodes/logic/__init__.py +6 -0
  28. kailash/nodes/logic/async_operations.py +702 -0
  29. kailash/nodes/logic/operations.py +551 -0
  30. kailash/nodes/transform/__init__.py +5 -0
  31. kailash/nodes/transform/processors.py +379 -0
  32. kailash/runtime/__init__.py +6 -0
  33. kailash/runtime/async_local.py +356 -0
  34. kailash/runtime/docker.py +697 -0
  35. kailash/runtime/local.py +434 -0
  36. kailash/runtime/parallel.py +557 -0
  37. kailash/runtime/runner.py +110 -0
  38. kailash/runtime/testing.py +347 -0
  39. kailash/sdk_exceptions.py +307 -0
  40. kailash/tracking/__init__.py +7 -0
  41. kailash/tracking/manager.py +885 -0
  42. kailash/tracking/metrics_collector.py +342 -0
  43. kailash/tracking/models.py +535 -0
  44. kailash/tracking/storage/__init__.py +0 -0
  45. kailash/tracking/storage/base.py +113 -0
  46. kailash/tracking/storage/database.py +619 -0
  47. kailash/tracking/storage/filesystem.py +543 -0
  48. kailash/utils/__init__.py +0 -0
  49. kailash/utils/export.py +924 -0
  50. kailash/utils/templates.py +680 -0
  51. kailash/visualization/__init__.py +62 -0
  52. kailash/visualization/api.py +732 -0
  53. kailash/visualization/dashboard.py +951 -0
  54. kailash/visualization/performance.py +808 -0
  55. kailash/visualization/reports.py +1471 -0
  56. kailash/workflow/__init__.py +15 -0
  57. kailash/workflow/builder.py +245 -0
  58. kailash/workflow/graph.py +827 -0
  59. kailash/workflow/mermaid_visualizer.py +628 -0
  60. kailash/workflow/mock_registry.py +63 -0
  61. kailash/workflow/runner.py +302 -0
  62. kailash/workflow/state.py +238 -0
  63. kailash/workflow/visualization.py +588 -0
  64. kailash-0.1.0.dist-info/METADATA +710 -0
  65. kailash-0.1.0.dist-info/RECORD +69 -0
  66. kailash-0.1.0.dist-info/WHEEL +5 -0
  67. kailash-0.1.0.dist-info/entry_points.txt +2 -0
  68. kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,808 @@
1
+ """Performance visualization for task tracking metrics.
2
+
3
+ This module provides visualization capabilities for performance metrics collected
4
+ during workflow execution, integrating with the TaskManager to create comprehensive
5
+ performance reports and graphs.
6
+
7
+ Design Purpose:
8
+ - Visualize real-time performance data from task executions
9
+ - Support various chart types for different metrics
10
+ - Generate both static images and interactive visualizations
11
+ - Integrate with existing workflow visualization framework
12
+
13
+ Upstream Dependencies:
14
+ - TaskManager provides task run data with metrics
15
+ - MetricsCollector provides performance data format
16
+ - WorkflowVisualizer provides base visualization infrastructure
17
+
18
+ Downstream Consumers:
19
+ - Examples use this for performance reporting
20
+ - Export utilities include performance visualizations
21
+ - Web dashboards can embed generated charts
22
+ """
23
+
24
+ import logging
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional
27
+
28
+ import matplotlib.pyplot as plt
29
+ import numpy as np
30
+
31
+ from kailash.tracking.manager import TaskManager
32
+ from kailash.tracking.models import TaskRun, TaskStatus
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class PerformanceVisualizer:
38
+ """Creates performance visualizations from task execution metrics.
39
+
40
+ This class provides methods to generate various performance charts and
41
+ reports from task execution data collected by the TaskManager.
42
+ """
43
+
44
+ def __init__(self, task_manager: TaskManager):
45
+ """Initialize performance visualizer.
46
+
47
+ Args:
48
+ task_manager: TaskManager instance with execution data
49
+ """
50
+ self.task_manager = task_manager
51
+ self.logger = logger
52
+
53
+ def create_run_performance_summary(
54
+ self, run_id: str, output_dir: Optional[Path] = None
55
+ ) -> Dict[str, Path]:
56
+ """Create comprehensive performance summary for a workflow run.
57
+
58
+ Args:
59
+ run_id: Run ID to visualize
60
+ output_dir: Directory to save visualizations
61
+
62
+ Returns:
63
+ Dictionary mapping chart names to file paths
64
+ """
65
+ if output_dir is None:
66
+ # Use relative path that works from project root or create in current directory
67
+ output_dir = Path.cwd() / "outputs" / "performance"
68
+ output_dir.mkdir(parents=True, exist_ok=True)
69
+
70
+ # Get run data
71
+ run = self.task_manager.get_run(run_id)
72
+ if not run:
73
+ raise ValueError(f"Run {run_id} not found")
74
+
75
+ tasks = self.task_manager.get_run_tasks(run_id)
76
+ if not tasks:
77
+ self.logger.warning(f"No tasks found for run {run_id}")
78
+ return {}
79
+
80
+ outputs = {}
81
+
82
+ # Generate different visualizations
83
+ outputs["execution_timeline"] = self._create_execution_timeline(
84
+ tasks, output_dir / f"timeline_{run_id}.png"
85
+ )
86
+
87
+ outputs["resource_usage"] = self._create_resource_usage_chart(
88
+ tasks, output_dir / f"resources_{run_id}.png"
89
+ )
90
+
91
+ outputs["performance_comparison"] = self._create_node_performance_comparison(
92
+ tasks, output_dir / f"comparison_{run_id}.png"
93
+ )
94
+
95
+ outputs["io_analysis"] = self._create_io_analysis(
96
+ tasks, output_dir / f"io_analysis_{run_id}.png"
97
+ )
98
+
99
+ outputs["performance_heatmap"] = self._create_performance_heatmap(
100
+ tasks, output_dir / f"heatmap_{run_id}.png"
101
+ )
102
+
103
+ # Generate markdown report
104
+ outputs["report"] = self._create_performance_report(
105
+ run, tasks, output_dir / f"report_{run_id}.md"
106
+ )
107
+
108
+ return outputs
109
+
110
+ def _create_execution_timeline(
111
+ self, tasks: List[TaskRun], output_path: Path
112
+ ) -> Path:
113
+ """Create Gantt-style execution timeline."""
114
+ fig, ax = plt.subplots(figsize=(12, max(6, len(tasks) * 0.5)))
115
+
116
+ # Sort tasks by start time
117
+ tasks_with_times = []
118
+ for task in tasks:
119
+ if task.started_at and task.ended_at:
120
+ tasks_with_times.append(task)
121
+
122
+ if not tasks_with_times:
123
+ ax.text(
124
+ 0.5,
125
+ 0.5,
126
+ "No timing data available",
127
+ ha="center",
128
+ va="center",
129
+ transform=ax.transAxes,
130
+ )
131
+ plt.savefig(output_path)
132
+ plt.close()
133
+ return output_path
134
+
135
+ tasks_with_times.sort(key=lambda t: t.started_at)
136
+
137
+ # Calculate timeline bounds
138
+ min_time = min(t.started_at for t in tasks_with_times)
139
+ max_time = max(t.ended_at for t in tasks_with_times)
140
+
141
+ # Create timeline bars
142
+ y_positions = []
143
+ labels = []
144
+ colors = []
145
+
146
+ for i, task in enumerate(tasks_with_times):
147
+ start_offset = (task.started_at - min_time).total_seconds()
148
+ duration = (task.ended_at - task.started_at).total_seconds()
149
+
150
+ # Color based on status
151
+ color_map = {
152
+ TaskStatus.COMPLETED: "green",
153
+ TaskStatus.FAILED: "red",
154
+ TaskStatus.CANCELLED: "orange",
155
+ TaskStatus.RUNNING: "blue",
156
+ }
157
+ color = color_map.get(task.status, "gray")
158
+
159
+ ax.barh(
160
+ i,
161
+ duration,
162
+ left=start_offset,
163
+ height=0.8,
164
+ color=color,
165
+ alpha=0.7,
166
+ edgecolor="black",
167
+ linewidth=1,
168
+ )
169
+
170
+ # Add metrics annotations if available
171
+ if task.metrics and task.metrics.cpu_usage:
172
+ ax.text(
173
+ start_offset + duration / 2,
174
+ i,
175
+ f"CPU: {task.metrics.cpu_usage:.1f}%",
176
+ ha="center",
177
+ va="center",
178
+ fontsize=8,
179
+ )
180
+
181
+ y_positions.append(i)
182
+ labels.append(f"{task.node_id}\n({task.node_type})")
183
+ colors.append(color)
184
+
185
+ ax.set_yticks(y_positions)
186
+ ax.set_yticklabels(labels)
187
+ ax.set_xlabel("Time (seconds)")
188
+ ax.set_title("Task Execution Timeline")
189
+ ax.grid(True, axis="x", alpha=0.3)
190
+
191
+ # Add legend
192
+ from matplotlib.patches import Patch
193
+
194
+ legend_elements = [
195
+ Patch(facecolor="green", alpha=0.7, label="Completed"),
196
+ Patch(facecolor="red", alpha=0.7, label="Failed"),
197
+ Patch(facecolor="blue", alpha=0.7, label="Running"),
198
+ Patch(facecolor="orange", alpha=0.7, label="Cancelled"),
199
+ ]
200
+ ax.legend(handles=legend_elements, loc="upper right")
201
+
202
+ plt.tight_layout()
203
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
204
+ plt.close()
205
+
206
+ return output_path
207
+
208
+ def _create_resource_usage_chart(
209
+ self, tasks: List[TaskRun], output_path: Path
210
+ ) -> Path:
211
+ """Create resource usage comparison chart."""
212
+ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12))
213
+
214
+ # Collect metrics data
215
+ node_names = []
216
+ cpu_usage = []
217
+ memory_usage = []
218
+ memory_delta = []
219
+ durations = []
220
+
221
+ for task in tasks:
222
+ if task.metrics:
223
+ node_names.append(f"{task.node_id}\n{task.node_type}")
224
+ cpu_usage.append(task.metrics.cpu_usage or 0)
225
+ memory_usage.append(task.metrics.memory_usage_mb or 0)
226
+
227
+ # Get memory delta from custom metrics
228
+ custom = task.metrics.custom_metrics or {}
229
+ memory_delta.append(custom.get("memory_delta_mb", 0))
230
+ durations.append(task.metrics.duration or 0)
231
+
232
+ if not node_names:
233
+ for ax in [ax1, ax2, ax3]:
234
+ ax.text(
235
+ 0.5,
236
+ 0.5,
237
+ "No metrics data available",
238
+ ha="center",
239
+ va="center",
240
+ transform=ax.transAxes,
241
+ )
242
+ plt.savefig(output_path)
243
+ plt.close()
244
+ return output_path
245
+
246
+ x = np.arange(len(node_names))
247
+
248
+ # CPU usage chart
249
+ bars1 = ax1.bar(x, cpu_usage, color="skyblue", edgecolor="black")
250
+ ax1.set_ylabel("CPU Usage (%)")
251
+ ax1.set_title("CPU Usage by Node")
252
+ ax1.set_xticks(x)
253
+ ax1.set_xticklabels(node_names, rotation=45, ha="right")
254
+ ax1.grid(True, axis="y", alpha=0.3)
255
+
256
+ # Add value labels
257
+ for bar, value in zip(bars1, cpu_usage):
258
+ if value > 0:
259
+ ax1.text(
260
+ bar.get_x() + bar.get_width() / 2,
261
+ bar.get_height() + 1,
262
+ f"{value:.1f}%",
263
+ ha="center",
264
+ va="bottom",
265
+ fontsize=8,
266
+ )
267
+
268
+ # Memory usage chart
269
+ bars2 = ax2.bar(x, memory_usage, color="lightgreen", edgecolor="black")
270
+ bars2_delta = ax2.bar(
271
+ x,
272
+ memory_delta,
273
+ bottom=memory_usage,
274
+ color="darkgreen",
275
+ alpha=0.5,
276
+ edgecolor="black",
277
+ )
278
+ ax2.set_ylabel("Memory (MB)")
279
+ ax2.set_title("Memory Usage by Node")
280
+ ax2.set_xticks(x)
281
+ ax2.set_xticklabels(node_names, rotation=45, ha="right")
282
+ ax2.grid(True, axis="y", alpha=0.3)
283
+ ax2.legend(["Peak Memory", "Memory Delta"])
284
+
285
+ # Duration chart
286
+ bars3 = ax3.bar(x, durations, color="lightcoral", edgecolor="black")
287
+ ax3.set_ylabel("Duration (seconds)")
288
+ ax3.set_title("Execution Time by Node")
289
+ ax3.set_xticks(x)
290
+ ax3.set_xticklabels(node_names, rotation=45, ha="right")
291
+ ax3.grid(True, axis="y", alpha=0.3)
292
+
293
+ # Add value labels
294
+ for bar, value in zip(bars3, durations):
295
+ if value > 0:
296
+ ax3.text(
297
+ bar.get_x() + bar.get_width() / 2,
298
+ bar.get_height() + 0.01,
299
+ f"{value:.2f}s",
300
+ ha="center",
301
+ va="bottom",
302
+ fontsize=8,
303
+ )
304
+
305
+ plt.suptitle("Resource Usage Analysis", fontsize=14)
306
+ plt.tight_layout()
307
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
308
+ plt.close()
309
+
310
+ return output_path
311
+
312
+ def _create_node_performance_comparison(
313
+ self, tasks: List[TaskRun], output_path: Path
314
+ ) -> Path:
315
+ """Create performance comparison radar chart."""
316
+ # Group tasks by node type
317
+ node_type_metrics = {}
318
+
319
+ for task in tasks:
320
+ if task.metrics and task.status == TaskStatus.COMPLETED:
321
+ node_type = task.node_type
322
+ if node_type not in node_type_metrics:
323
+ node_type_metrics[node_type] = {
324
+ "cpu": [],
325
+ "memory": [],
326
+ "duration": [],
327
+ "io_read": [],
328
+ "io_write": [],
329
+ }
330
+
331
+ metrics = node_type_metrics[node_type]
332
+ metrics["cpu"].append(task.metrics.cpu_usage or 0)
333
+ metrics["memory"].append(task.metrics.memory_usage_mb or 0)
334
+ metrics["duration"].append(task.metrics.duration or 0)
335
+
336
+ custom = task.metrics.custom_metrics or {}
337
+ metrics["io_read"].append(
338
+ custom.get("io_read_bytes", 0) / 1024 / 1024
339
+ ) # MB
340
+ metrics["io_write"].append(
341
+ custom.get("io_write_bytes", 0) / 1024 / 1024
342
+ ) # MB
343
+
344
+ if not node_type_metrics:
345
+ fig, ax = plt.subplots(figsize=(8, 6))
346
+ ax.text(
347
+ 0.5,
348
+ 0.5,
349
+ "No performance data available",
350
+ ha="center",
351
+ va="center",
352
+ transform=ax.transAxes,
353
+ )
354
+ plt.savefig(output_path)
355
+ plt.close()
356
+ return output_path
357
+
358
+ # Calculate averages
359
+ avg_metrics = {}
360
+ for node_type, metrics in node_type_metrics.items():
361
+ avg_metrics[node_type] = {
362
+ "CPU %": np.mean(metrics["cpu"]) if metrics["cpu"] else 0,
363
+ "Memory MB": np.mean(metrics["memory"]) if metrics["memory"] else 0,
364
+ "Duration s": (
365
+ np.mean(metrics["duration"]) if metrics["duration"] else 0
366
+ ),
367
+ "I/O Read MB": np.mean(metrics["io_read"]) if metrics["io_read"] else 0,
368
+ "I/O Write MB": (
369
+ np.mean(metrics["io_write"]) if metrics["io_write"] else 0
370
+ ),
371
+ }
372
+
373
+ # Create radar chart
374
+ fig, ax = plt.subplots(figsize=(10, 8), subplot_kw=dict(projection="polar"))
375
+
376
+ categories = list(next(iter(avg_metrics.values())).keys())
377
+ num_vars = len(categories)
378
+
379
+ angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
380
+ angles += angles[:1] # Complete the circle
381
+
382
+ colors = plt.cm.tab10(np.linspace(0, 1, len(avg_metrics)))
383
+
384
+ for (node_type, metrics), color in zip(avg_metrics.items(), colors):
385
+ values = list(metrics.values())
386
+
387
+ # Normalize values to 0-100 scale for better visualization
388
+ max_vals = {
389
+ "CPU %": 100,
390
+ "Memory MB": max(max(m["Memory MB"] for m in avg_metrics.values()), 1),
391
+ "Duration s": max(
392
+ max(m["Duration s"] for m in avg_metrics.values()), 1
393
+ ),
394
+ "I/O Read MB": max(
395
+ max(m["I/O Read MB"] for m in avg_metrics.values()), 1
396
+ ),
397
+ "I/O Write MB": max(
398
+ max(m["I/O Write MB"] for m in avg_metrics.values()), 1
399
+ ),
400
+ }
401
+
402
+ normalized_values = []
403
+ for cat, val in zip(categories, values):
404
+ normalized_values.append((val / max_vals[cat]) * 100)
405
+
406
+ normalized_values += normalized_values[:1]
407
+
408
+ ax.plot(
409
+ angles,
410
+ normalized_values,
411
+ "o-",
412
+ linewidth=2,
413
+ label=node_type,
414
+ color=color,
415
+ )
416
+ ax.fill(angles, normalized_values, alpha=0.25, color=color)
417
+
418
+ ax.set_xticks(angles[:-1])
419
+ ax.set_xticklabels(categories)
420
+ ax.set_ylim(0, 100)
421
+ ax.set_ylabel("Relative Performance (0-100)", labelpad=30)
422
+ ax.set_title(
423
+ "Node Type Performance Comparison\n(Normalized to 0-100 scale)", pad=20
424
+ )
425
+ ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.0))
426
+ ax.grid(True)
427
+
428
+ plt.tight_layout()
429
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
430
+ plt.close()
431
+
432
+ return output_path
433
+
434
+ def _create_io_analysis(self, tasks: List[TaskRun], output_path: Path) -> Path:
435
+ """Create I/O operations analysis chart."""
436
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
437
+
438
+ # Collect I/O data
439
+ node_names = []
440
+ io_read_bytes = []
441
+ io_write_bytes = []
442
+ io_read_count = []
443
+ io_write_count = []
444
+
445
+ for task in tasks:
446
+ if task.metrics and task.metrics.custom_metrics:
447
+ custom = task.metrics.custom_metrics
448
+ if any(
449
+ custom.get(k, 0) > 0
450
+ for k in [
451
+ "io_read_bytes",
452
+ "io_write_bytes",
453
+ "io_read_count",
454
+ "io_write_count",
455
+ ]
456
+ ):
457
+ node_names.append(f"{task.node_id}")
458
+ io_read_bytes.append(
459
+ custom.get("io_read_bytes", 0) / 1024 / 1024
460
+ ) # MB
461
+ io_write_bytes.append(
462
+ custom.get("io_write_bytes", 0) / 1024 / 1024
463
+ ) # MB
464
+ io_read_count.append(custom.get("io_read_count", 0))
465
+ io_write_count.append(custom.get("io_write_count", 0))
466
+
467
+ if not node_names:
468
+ for ax in [ax1, ax2]:
469
+ ax.text(
470
+ 0.5,
471
+ 0.5,
472
+ "No I/O data available",
473
+ ha="center",
474
+ va="center",
475
+ transform=ax.transAxes,
476
+ )
477
+ plt.savefig(output_path)
478
+ plt.close()
479
+ return output_path
480
+
481
+ x = np.arange(len(node_names))
482
+ width = 0.35
483
+
484
+ # I/O bytes chart
485
+ bars1 = ax1.bar(
486
+ x - width / 2,
487
+ io_read_bytes,
488
+ width,
489
+ label="Read",
490
+ color="lightblue",
491
+ edgecolor="black",
492
+ )
493
+ bars2 = ax1.bar(
494
+ x + width / 2,
495
+ io_write_bytes,
496
+ width,
497
+ label="Write",
498
+ color="lightcoral",
499
+ edgecolor="black",
500
+ )
501
+
502
+ ax1.set_ylabel("Data (MB)")
503
+ ax1.set_title("I/O Data Transfer by Node")
504
+ ax1.set_xticks(x)
505
+ ax1.set_xticklabels(node_names, rotation=45, ha="right")
506
+ ax1.legend()
507
+ ax1.grid(True, axis="y", alpha=0.3)
508
+
509
+ # I/O operations count chart
510
+ bars3 = ax2.bar(
511
+ x - width / 2,
512
+ io_read_count,
513
+ width,
514
+ label="Read Ops",
515
+ color="lightblue",
516
+ edgecolor="black",
517
+ )
518
+ bars4 = ax2.bar(
519
+ x + width / 2,
520
+ io_write_count,
521
+ width,
522
+ label="Write Ops",
523
+ color="lightcoral",
524
+ edgecolor="black",
525
+ )
526
+
527
+ ax2.set_ylabel("Operation Count")
528
+ ax2.set_title("I/O Operations Count by Node")
529
+ ax2.set_xticks(x)
530
+ ax2.set_xticklabels(node_names, rotation=45, ha="right")
531
+ ax2.legend()
532
+ ax2.grid(True, axis="y", alpha=0.3)
533
+
534
+ plt.suptitle("I/O Operations Analysis", fontsize=14)
535
+ plt.tight_layout()
536
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
537
+ plt.close()
538
+
539
+ return output_path
540
+
541
+ def _create_performance_heatmap(
542
+ self, tasks: List[TaskRun], output_path: Path
543
+ ) -> Path:
544
+ """Create performance metrics heatmap."""
545
+ # Prepare data matrix
546
+ metrics_data = []
547
+ node_labels = []
548
+
549
+ metric_names = [
550
+ "Duration (s)",
551
+ "CPU %",
552
+ "Memory (MB)",
553
+ "I/O Read (MB)",
554
+ "I/O Write (MB)",
555
+ ]
556
+
557
+ for task in tasks:
558
+ if task.metrics:
559
+ node_labels.append(f"{task.node_id}")
560
+ custom = task.metrics.custom_metrics or {}
561
+
562
+ row = [
563
+ task.metrics.duration or 0,
564
+ task.metrics.cpu_usage or 0,
565
+ task.metrics.memory_usage_mb or 0,
566
+ custom.get("io_read_bytes", 0) / 1024 / 1024,
567
+ custom.get("io_write_bytes", 0) / 1024 / 1024,
568
+ ]
569
+ metrics_data.append(row)
570
+
571
+ if not metrics_data:
572
+ fig, ax = plt.subplots(figsize=(8, 6))
573
+ ax.text(
574
+ 0.5,
575
+ 0.5,
576
+ "No metrics data available",
577
+ ha="center",
578
+ va="center",
579
+ transform=ax.transAxes,
580
+ )
581
+ plt.savefig(output_path)
582
+ plt.close()
583
+ return output_path
584
+
585
+ # Convert to numpy array and normalize
586
+ data = np.array(metrics_data).T
587
+
588
+ # Normalize each metric to 0-1 scale
589
+ normalized_data = np.zeros_like(data)
590
+ for i in range(data.shape[0]):
591
+ row_max = data[i].max()
592
+ if row_max > 0:
593
+ normalized_data[i] = data[i] / row_max
594
+
595
+ # Create heatmap
596
+ fig, ax = plt.subplots(figsize=(max(10, len(node_labels) * 0.8), 8))
597
+
598
+ im = ax.imshow(normalized_data, cmap="YlOrRd", aspect="auto")
599
+
600
+ # Set ticks and labels
601
+ ax.set_xticks(np.arange(len(node_labels)))
602
+ ax.set_yticks(np.arange(len(metric_names)))
603
+ ax.set_xticklabels(node_labels, rotation=45, ha="right")
604
+ ax.set_yticklabels(metric_names)
605
+
606
+ # Add text annotations
607
+ for i in range(len(metric_names)):
608
+ for j in range(len(node_labels)):
609
+ value = data[i, j]
610
+ color = "white" if normalized_data[i, j] > 0.5 else "black"
611
+
612
+ # Format based on metric type
613
+ if i == 0: # Duration
614
+ text = f"{value:.2f}"
615
+ elif i == 1: # CPU %
616
+ text = f"{value:.1f}%"
617
+ elif i in [2, 3, 4]: # Memory, I/O
618
+ text = f"{value:.1f}"
619
+
620
+ ax.text(j, i, text, ha="center", va="center", color=color, fontsize=8)
621
+
622
+ ax.set_title("Performance Metrics Heatmap\n(Normalized by metric type)", pad=20)
623
+
624
+ # Add colorbar
625
+ cbar = plt.colorbar(im, ax=ax)
626
+ cbar.set_label("Normalized Value (0-1)", rotation=270, labelpad=20)
627
+
628
+ plt.tight_layout()
629
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
630
+ plt.close()
631
+
632
+ return output_path
633
+
634
+ def _create_performance_report(
635
+ self, run: Any, tasks: List[TaskRun], output_path: Path
636
+ ) -> Path:
637
+ """Create markdown performance report."""
638
+ lines = []
639
+ lines.append(f"# Performance Report for Run {run.run_id}")
640
+ lines.append(f"\n**Workflow:** {run.workflow_name}")
641
+ lines.append(f"**Started:** {run.started_at}")
642
+ lines.append(f"**Status:** {run.status}")
643
+ lines.append(f"**Total Tasks:** {len(tasks)}")
644
+
645
+ # Calculate summary statistics
646
+ completed_tasks = [
647
+ t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
648
+ ]
649
+
650
+ if completed_tasks:
651
+ total_duration = sum(t.metrics.duration or 0 for t in completed_tasks)
652
+ avg_cpu = np.mean([t.metrics.cpu_usage or 0 for t in completed_tasks])
653
+ max_memory = max((t.metrics.memory_usage_mb or 0) for t in completed_tasks)
654
+
655
+ lines.append("\n## Summary Statistics")
656
+ lines.append(f"- **Total Execution Time:** {total_duration:.2f} seconds")
657
+ lines.append(f"- **Average CPU Usage:** {avg_cpu:.1f}%")
658
+ lines.append(f"- **Peak Memory Usage:** {max_memory:.1f} MB")
659
+
660
+ # Task details table
661
+ lines.append("\n## Task Performance Details")
662
+ lines.append("| Node ID | Type | Status | Duration (s) | CPU % | Memory (MB) |")
663
+ lines.append("|---------|------|--------|-------------|-------|-------------|")
664
+
665
+ for task in tasks:
666
+ duration = (
667
+ f"{task.metrics.duration:.2f}"
668
+ if task.metrics and task.metrics.duration
669
+ else "N/A"
670
+ )
671
+ cpu = (
672
+ f"{task.metrics.cpu_usage:.1f}"
673
+ if task.metrics and task.metrics.cpu_usage
674
+ else "N/A"
675
+ )
676
+ memory = (
677
+ f"{task.metrics.memory_usage_mb:.1f}"
678
+ if task.metrics and task.metrics.memory_usage_mb
679
+ else "N/A"
680
+ )
681
+
682
+ lines.append(
683
+ f"| {task.node_id} | {task.node_type} | {task.status} | "
684
+ f"{duration} | {cpu} | {memory} |"
685
+ )
686
+
687
+ # Performance insights
688
+ lines.append("\n## Performance Insights")
689
+
690
+ if completed_tasks:
691
+ # Find bottlenecks
692
+ slowest = max(completed_tasks, key=lambda t: t.metrics.duration or 0)
693
+ lines.append("\n### Bottlenecks")
694
+ lines.append(
695
+ f"- **Slowest Node:** {slowest.node_id} ({slowest.metrics.duration:.2f}s)"
696
+ )
697
+
698
+ highest_cpu = max(completed_tasks, key=lambda t: t.metrics.cpu_usage or 0)
699
+ if highest_cpu.metrics.cpu_usage > 80:
700
+ lines.append(
701
+ f"- **High CPU Usage:** {highest_cpu.node_id} ({highest_cpu.metrics.cpu_usage:.1f}%)"
702
+ )
703
+
704
+ highest_memory = max(
705
+ completed_tasks, key=lambda t: t.metrics.memory_usage_mb or 0
706
+ )
707
+ lines.append(
708
+ f"- **Highest Memory:** {highest_memory.node_id} ({highest_memory.metrics.memory_usage_mb:.1f} MB)"
709
+ )
710
+
711
+ # Write report
712
+ with open(output_path, "w") as f:
713
+ f.write("\n".join(lines))
714
+
715
+ return output_path
716
+
717
+ def compare_runs(
718
+ self, run_ids: List[str], output_path: Optional[Path] = None
719
+ ) -> Path:
720
+ """Compare performance across multiple runs."""
721
+ if output_path is None:
722
+ output_path = Path.cwd() / "outputs" / "performance" / "comparison.png"
723
+ output_path.parent.mkdir(parents=True, exist_ok=True)
724
+
725
+ fig, axes = plt.subplots(2, 2, figsize=(15, 12))
726
+ axes = axes.flatten()
727
+
728
+ # Collect metrics for each run
729
+ run_metrics = {}
730
+ for run_id in run_ids:
731
+ tasks = self.task_manager.get_run_tasks(run_id)
732
+ completed = [
733
+ t for t in tasks if t.status == TaskStatus.COMPLETED and t.metrics
734
+ ]
735
+
736
+ if completed:
737
+ run_metrics[run_id] = {
738
+ "total_duration": sum(t.metrics.duration or 0 for t in completed),
739
+ "avg_cpu": np.mean([t.metrics.cpu_usage or 0 for t in completed]),
740
+ "max_memory": max(
741
+ (t.metrics.memory_usage_mb or 0) for t in completed
742
+ ),
743
+ "task_count": len(completed),
744
+ }
745
+
746
+ if not run_metrics:
747
+ for ax in axes:
748
+ ax.text(
749
+ 0.5,
750
+ 0.5,
751
+ "No metrics data available",
752
+ ha="center",
753
+ va="center",
754
+ transform=ax.transAxes,
755
+ )
756
+ plt.savefig(output_path)
757
+ plt.close()
758
+ return output_path
759
+
760
+ # Create comparison charts
761
+ run_labels = list(run_metrics.keys())
762
+ x = np.arange(len(run_labels))
763
+
764
+ # Total duration
765
+ durations = [run_metrics[r]["total_duration"] for r in run_labels]
766
+ axes[0].bar(x, durations, color="lightblue", edgecolor="black")
767
+ axes[0].set_ylabel("Total Duration (s)")
768
+ axes[0].set_title("Total Execution Time")
769
+ axes[0].set_xticks(x)
770
+ axes[0].set_xticklabels(run_labels, rotation=45, ha="right")
771
+ axes[0].grid(True, axis="y", alpha=0.3)
772
+
773
+ # Average CPU
774
+ cpu_avgs = [run_metrics[r]["avg_cpu"] for r in run_labels]
775
+ axes[1].bar(x, cpu_avgs, color="lightgreen", edgecolor="black")
776
+ axes[1].set_ylabel("Average CPU %")
777
+ axes[1].set_title("Average CPU Usage")
778
+ axes[1].set_xticks(x)
779
+ axes[1].set_xticklabels(run_labels, rotation=45, ha="right")
780
+ axes[1].grid(True, axis="y", alpha=0.3)
781
+
782
+ # Max memory
783
+ max_memories = [run_metrics[r]["max_memory"] for r in run_labels]
784
+ axes[2].bar(x, max_memories, color="lightcoral", edgecolor="black")
785
+ axes[2].set_ylabel("Peak Memory (MB)")
786
+ axes[2].set_title("Peak Memory Usage")
787
+ axes[2].set_xticks(x)
788
+ axes[2].set_xticklabels(run_labels, rotation=45, ha="right")
789
+ axes[2].grid(True, axis="y", alpha=0.3)
790
+
791
+ # Task efficiency (duration per task)
792
+ efficiencies = [
793
+ run_metrics[r]["total_duration"] / run_metrics[r]["task_count"]
794
+ for r in run_labels
795
+ ]
796
+ axes[3].bar(x, efficiencies, color="lightyellow", edgecolor="black")
797
+ axes[3].set_ylabel("Avg Duration per Task (s)")
798
+ axes[3].set_title("Task Efficiency")
799
+ axes[3].set_xticks(x)
800
+ axes[3].set_xticklabels(run_labels, rotation=45, ha="right")
801
+ axes[3].grid(True, axis="y", alpha=0.3)
802
+
803
+ plt.suptitle("Performance Comparison Across Runs", fontsize=16)
804
+ plt.tight_layout()
805
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
806
+ plt.close()
807
+
808
+ return output_path