kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,732 @@
|
|
1
|
+
"""API endpoints for real-time dashboard data access.
|
2
|
+
|
3
|
+
This module provides REST API endpoints for accessing real-time workflow
|
4
|
+
performance data, metrics, and dashboard components for web-based interfaces.
|
5
|
+
|
6
|
+
Design Purpose:
|
7
|
+
- Provide RESTful API access to live performance metrics
|
8
|
+
- Enable real-time dashboard updates via HTTP endpoints
|
9
|
+
- Support WebSocket connections for streaming data
|
10
|
+
- Integrate with web dashboard frameworks and monitoring tools
|
11
|
+
|
12
|
+
Upstream Dependencies:
|
13
|
+
- RealTimeDashboard provides live monitoring capabilities
|
14
|
+
- TaskManager provides workflow execution data
|
15
|
+
- WorkflowPerformanceReporter provides detailed analysis
|
16
|
+
- MetricsCollector provides performance metrics
|
17
|
+
|
18
|
+
Downstream Consumers:
|
19
|
+
- Web dashboard frontends consume these APIs
|
20
|
+
- Monitoring tools integrate via REST endpoints
|
21
|
+
- CI/CD systems access performance data
|
22
|
+
- Third-party analytics platforms
|
23
|
+
"""
|
24
|
+
|
25
|
+
import asyncio
|
26
|
+
import json
|
27
|
+
import logging
|
28
|
+
from datetime import datetime
|
29
|
+
from pathlib import Path
|
30
|
+
from typing import Any, Dict, List, Optional, Union
|
31
|
+
|
32
|
+
try:
|
33
|
+
from fastapi import (
|
34
|
+
BackgroundTasks,
|
35
|
+
FastAPI,
|
36
|
+
HTTPException,
|
37
|
+
WebSocket,
|
38
|
+
WebSocketDisconnect,
|
39
|
+
)
|
40
|
+
from fastapi.middleware.cors import CORSMiddleware
|
41
|
+
from fastapi.responses import FileResponse
|
42
|
+
from pydantic import BaseModel
|
43
|
+
|
44
|
+
FASTAPI_AVAILABLE = True
|
45
|
+
except ImportError:
|
46
|
+
FASTAPI_AVAILABLE = False
|
47
|
+
|
48
|
+
from kailash.tracking.manager import TaskManager
|
49
|
+
from kailash.tracking.models import TaskStatus
|
50
|
+
from kailash.visualization.dashboard import DashboardConfig, RealTimeDashboard
|
51
|
+
from kailash.visualization.reports import ReportFormat, WorkflowPerformanceReporter
|
52
|
+
|
53
|
+
logger = logging.getLogger(__name__)
|
54
|
+
|
55
|
+
|
56
|
+
# Pydantic models for API requests/responses
|
57
|
+
if FASTAPI_AVAILABLE:
|
58
|
+
|
59
|
+
class RunRequest(BaseModel):
|
60
|
+
"""Request model for starting monitoring."""
|
61
|
+
|
62
|
+
run_id: Optional[str] = None
|
63
|
+
config: Optional[Dict[str, Any]] = None
|
64
|
+
|
65
|
+
class MetricsResponse(BaseModel):
|
66
|
+
"""Response model for metrics data."""
|
67
|
+
|
68
|
+
timestamp: datetime
|
69
|
+
active_tasks: int
|
70
|
+
completed_tasks: int
|
71
|
+
failed_tasks: int
|
72
|
+
total_cpu_usage: float
|
73
|
+
total_memory_usage: float
|
74
|
+
throughput: float
|
75
|
+
avg_task_duration: float
|
76
|
+
|
77
|
+
class TaskResponse(BaseModel):
|
78
|
+
"""Response model for task data."""
|
79
|
+
|
80
|
+
node_id: str
|
81
|
+
node_type: str
|
82
|
+
status: str
|
83
|
+
started_at: Optional[datetime]
|
84
|
+
ended_at: Optional[datetime]
|
85
|
+
duration: Optional[float]
|
86
|
+
cpu_usage: Optional[float]
|
87
|
+
memory_usage_mb: Optional[float]
|
88
|
+
error_message: Optional[str]
|
89
|
+
|
90
|
+
class RunResponse(BaseModel):
|
91
|
+
"""Response model for run information."""
|
92
|
+
|
93
|
+
run_id: str
|
94
|
+
workflow_name: str
|
95
|
+
status: str
|
96
|
+
started_at: Optional[datetime]
|
97
|
+
ended_at: Optional[datetime]
|
98
|
+
total_tasks: int
|
99
|
+
completed_tasks: int
|
100
|
+
failed_tasks: int
|
101
|
+
|
102
|
+
class ReportRequest(BaseModel):
|
103
|
+
"""Request model for generating reports."""
|
104
|
+
|
105
|
+
run_id: str
|
106
|
+
format: str = "html"
|
107
|
+
include_charts: bool = True
|
108
|
+
compare_runs: Optional[List[str]] = None
|
109
|
+
detail_level: str = "detailed"
|
110
|
+
|
111
|
+
|
112
|
+
class DashboardAPIServer:
|
113
|
+
"""FastAPI server for dashboard API endpoints.
|
114
|
+
|
115
|
+
This class provides a complete REST API server for accessing real-time
|
116
|
+
workflow performance data and dashboard components.
|
117
|
+
|
118
|
+
Usage:
|
119
|
+
api_server = DashboardAPIServer(task_manager)
|
120
|
+
api_server.start_server(host="0.0.0.0", port=8000)
|
121
|
+
"""
|
122
|
+
|
123
|
+
def __init__(
|
124
|
+
self,
|
125
|
+
task_manager: TaskManager,
|
126
|
+
dashboard_config: Optional[DashboardConfig] = None,
|
127
|
+
):
|
128
|
+
"""Initialize API server.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
task_manager: TaskManager instance for data access
|
132
|
+
dashboard_config: Configuration for dashboard components
|
133
|
+
"""
|
134
|
+
if not FASTAPI_AVAILABLE:
|
135
|
+
raise ImportError(
|
136
|
+
"FastAPI is required for API server functionality. "
|
137
|
+
"Install with: pip install fastapi uvicorn"
|
138
|
+
)
|
139
|
+
|
140
|
+
self.task_manager = task_manager
|
141
|
+
self.dashboard_config = dashboard_config or DashboardConfig()
|
142
|
+
|
143
|
+
# Initialize dashboard and reporter
|
144
|
+
self.dashboard = RealTimeDashboard(task_manager, self.dashboard_config)
|
145
|
+
self.reporter = WorkflowPerformanceReporter(task_manager)
|
146
|
+
|
147
|
+
# WebSocket connections for real-time updates
|
148
|
+
self._websocket_connections: List[WebSocket] = []
|
149
|
+
self._broadcast_task: Optional[asyncio.Task] = None
|
150
|
+
|
151
|
+
# Create FastAPI app
|
152
|
+
self.app = FastAPI(
|
153
|
+
title="Kailash Dashboard API",
|
154
|
+
description="Real-time workflow performance monitoring API",
|
155
|
+
version="1.0.0",
|
156
|
+
)
|
157
|
+
|
158
|
+
# Add CORS middleware
|
159
|
+
self.app.add_middleware(
|
160
|
+
CORSMiddleware,
|
161
|
+
allow_origins=["*"], # Configure appropriately for production
|
162
|
+
allow_credentials=True,
|
163
|
+
allow_methods=["*"],
|
164
|
+
allow_headers=["*"],
|
165
|
+
)
|
166
|
+
|
167
|
+
# Register routes
|
168
|
+
self._register_routes()
|
169
|
+
|
170
|
+
self.logger = logger
|
171
|
+
|
172
|
+
def _register_routes(self):
|
173
|
+
"""Register all API routes."""
|
174
|
+
|
175
|
+
@self.app.get("/health")
|
176
|
+
async def health_check():
|
177
|
+
"""Health check endpoint."""
|
178
|
+
return {"status": "healthy", "timestamp": datetime.now()}
|
179
|
+
|
180
|
+
@self.app.get("/api/v1/runs", response_model=List[RunResponse])
|
181
|
+
async def list_runs(limit: int = 10, offset: int = 0):
|
182
|
+
"""Get list of workflow runs."""
|
183
|
+
try:
|
184
|
+
all_runs = self.task_manager.list_runs()
|
185
|
+
# Apply manual pagination
|
186
|
+
runs = all_runs[offset : offset + limit]
|
187
|
+
|
188
|
+
run_responses = []
|
189
|
+
for run in runs:
|
190
|
+
tasks = self.task_manager.get_run_tasks(run.run_id)
|
191
|
+
completed_count = sum(
|
192
|
+
1 for t in tasks if t.status == TaskStatus.COMPLETED
|
193
|
+
)
|
194
|
+
failed_count = sum(
|
195
|
+
1 for t in tasks if t.status == TaskStatus.FAILED
|
196
|
+
)
|
197
|
+
|
198
|
+
run_responses.append(
|
199
|
+
RunResponse(
|
200
|
+
run_id=run.run_id,
|
201
|
+
workflow_name=run.workflow_name,
|
202
|
+
status=run.status,
|
203
|
+
started_at=run.started_at,
|
204
|
+
ended_at=run.ended_at,
|
205
|
+
total_tasks=len(tasks),
|
206
|
+
completed_tasks=completed_count,
|
207
|
+
failed_tasks=failed_count,
|
208
|
+
)
|
209
|
+
)
|
210
|
+
|
211
|
+
return run_responses
|
212
|
+
except Exception as e:
|
213
|
+
self.logger.error(f"Failed to list runs: {e}")
|
214
|
+
raise HTTPException(status_code=500, detail=str(e))
|
215
|
+
|
216
|
+
@self.app.get("/api/v1/runs/{run_id}", response_model=RunResponse)
|
217
|
+
async def get_run(run_id: str):
|
218
|
+
"""Get details for a specific run."""
|
219
|
+
try:
|
220
|
+
run = self.task_manager.get_run(run_id)
|
221
|
+
if not run:
|
222
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
223
|
+
|
224
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
225
|
+
completed_count = sum(
|
226
|
+
1 for t in tasks if t.status == TaskStatus.COMPLETED
|
227
|
+
)
|
228
|
+
failed_count = sum(1 for t in tasks if t.status == TaskStatus.FAILED)
|
229
|
+
|
230
|
+
return RunResponse(
|
231
|
+
run_id=run.run_id,
|
232
|
+
workflow_name=run.workflow_name,
|
233
|
+
status=run.status,
|
234
|
+
started_at=run.started_at,
|
235
|
+
ended_at=run.ended_at,
|
236
|
+
total_tasks=len(tasks),
|
237
|
+
completed_tasks=completed_count,
|
238
|
+
failed_tasks=failed_count,
|
239
|
+
)
|
240
|
+
except HTTPException:
|
241
|
+
raise
|
242
|
+
except Exception as e:
|
243
|
+
self.logger.error(f"Failed to get run {run_id}: {e}")
|
244
|
+
raise HTTPException(status_code=500, detail=str(e))
|
245
|
+
|
246
|
+
@self.app.get("/api/v1/runs/{run_id}/tasks", response_model=List[TaskResponse])
|
247
|
+
async def get_run_tasks(run_id: str):
|
248
|
+
"""Get tasks for a specific run."""
|
249
|
+
try:
|
250
|
+
run = self.task_manager.get_run(run_id)
|
251
|
+
if not run:
|
252
|
+
raise HTTPException(status_code=404, detail="Run not found")
|
253
|
+
|
254
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
255
|
+
|
256
|
+
task_responses = []
|
257
|
+
for task in tasks:
|
258
|
+
task_responses.append(
|
259
|
+
TaskResponse(
|
260
|
+
node_id=task.node_id,
|
261
|
+
node_type=task.node_type,
|
262
|
+
status=task.status,
|
263
|
+
started_at=task.started_at,
|
264
|
+
ended_at=task.ended_at,
|
265
|
+
duration=task.metrics.duration if task.metrics else None,
|
266
|
+
cpu_usage=task.metrics.cpu_usage if task.metrics else None,
|
267
|
+
memory_usage_mb=(
|
268
|
+
task.metrics.memory_usage_mb if task.metrics else None
|
269
|
+
),
|
270
|
+
error_message=task.error,
|
271
|
+
)
|
272
|
+
)
|
273
|
+
|
274
|
+
return task_responses
|
275
|
+
except HTTPException:
|
276
|
+
raise
|
277
|
+
except Exception as e:
|
278
|
+
self.logger.error(f"Failed to get tasks for run {run_id}: {e}")
|
279
|
+
raise HTTPException(status_code=500, detail=str(e))
|
280
|
+
|
281
|
+
@self.app.post("/api/v1/monitoring/start")
|
282
|
+
async def start_monitoring(request: RunRequest):
|
283
|
+
"""Start real-time monitoring for a run."""
|
284
|
+
try:
|
285
|
+
# Update config if provided
|
286
|
+
if request.config:
|
287
|
+
for key, value in request.config.items():
|
288
|
+
if hasattr(self.dashboard.config, key):
|
289
|
+
setattr(self.dashboard.config, key, value)
|
290
|
+
|
291
|
+
# Start monitoring
|
292
|
+
self.dashboard.start_monitoring(request.run_id)
|
293
|
+
|
294
|
+
# Start WebSocket broadcasting if not already running
|
295
|
+
if not self._broadcast_task:
|
296
|
+
self._broadcast_task = asyncio.create_task(
|
297
|
+
self._broadcast_metrics()
|
298
|
+
)
|
299
|
+
|
300
|
+
return {"status": "started", "run_id": request.run_id}
|
301
|
+
except Exception as e:
|
302
|
+
self.logger.error(f"Failed to start monitoring: {e}")
|
303
|
+
raise HTTPException(status_code=500, detail=str(e))
|
304
|
+
|
305
|
+
@self.app.post("/api/v1/monitoring/stop")
|
306
|
+
async def stop_monitoring():
|
307
|
+
"""Stop real-time monitoring."""
|
308
|
+
try:
|
309
|
+
self.dashboard.stop_monitoring()
|
310
|
+
|
311
|
+
# Stop WebSocket broadcasting
|
312
|
+
if self._broadcast_task:
|
313
|
+
self._broadcast_task.cancel()
|
314
|
+
self._broadcast_task = None
|
315
|
+
|
316
|
+
return {"status": "stopped"}
|
317
|
+
except Exception as e:
|
318
|
+
self.logger.error(f"Failed to stop monitoring: {e}")
|
319
|
+
raise HTTPException(status_code=500, detail=str(e))
|
320
|
+
|
321
|
+
@self.app.get("/api/v1/monitoring/status")
|
322
|
+
async def get_monitoring_status():
|
323
|
+
"""Get current monitoring status."""
|
324
|
+
return {
|
325
|
+
"monitoring": self.dashboard._monitoring,
|
326
|
+
"run_id": self.dashboard._current_run_id,
|
327
|
+
"metrics_count": len(self.dashboard._metrics_history),
|
328
|
+
"websocket_connections": len(self._websocket_connections),
|
329
|
+
}
|
330
|
+
|
331
|
+
@self.app.get(
|
332
|
+
"/api/v1/metrics/current", response_model=Optional[MetricsResponse]
|
333
|
+
)
|
334
|
+
async def get_current_metrics():
|
335
|
+
"""Get current live metrics."""
|
336
|
+
try:
|
337
|
+
metrics = self.dashboard.get_current_metrics()
|
338
|
+
if not metrics:
|
339
|
+
return None
|
340
|
+
|
341
|
+
return MetricsResponse(
|
342
|
+
timestamp=metrics.timestamp,
|
343
|
+
active_tasks=metrics.active_tasks,
|
344
|
+
completed_tasks=metrics.completed_tasks,
|
345
|
+
failed_tasks=metrics.failed_tasks,
|
346
|
+
total_cpu_usage=metrics.total_cpu_usage,
|
347
|
+
total_memory_usage=metrics.total_memory_usage,
|
348
|
+
throughput=metrics.throughput,
|
349
|
+
avg_task_duration=metrics.avg_task_duration,
|
350
|
+
)
|
351
|
+
except Exception as e:
|
352
|
+
self.logger.error(f"Failed to get current metrics: {e}")
|
353
|
+
raise HTTPException(status_code=500, detail=str(e))
|
354
|
+
|
355
|
+
@self.app.get("/api/v1/metrics/history", response_model=List[MetricsResponse])
|
356
|
+
async def get_metrics_history(minutes: int = 30):
|
357
|
+
"""Get metrics history for specified time period."""
|
358
|
+
try:
|
359
|
+
history = self.dashboard.get_metrics_history(minutes=minutes)
|
360
|
+
|
361
|
+
return [
|
362
|
+
MetricsResponse(
|
363
|
+
timestamp=m.timestamp,
|
364
|
+
active_tasks=m.active_tasks,
|
365
|
+
completed_tasks=m.completed_tasks,
|
366
|
+
failed_tasks=m.failed_tasks,
|
367
|
+
total_cpu_usage=m.total_cpu_usage,
|
368
|
+
total_memory_usage=m.total_memory_usage,
|
369
|
+
throughput=m.throughput,
|
370
|
+
avg_task_duration=m.avg_task_duration,
|
371
|
+
)
|
372
|
+
for m in history
|
373
|
+
]
|
374
|
+
except Exception as e:
|
375
|
+
self.logger.error(f"Failed to get metrics history: {e}")
|
376
|
+
raise HTTPException(status_code=500, detail=str(e))
|
377
|
+
|
378
|
+
@self.app.post("/api/v1/reports/generate")
|
379
|
+
async def generate_report(
|
380
|
+
request: ReportRequest, background_tasks: BackgroundTasks
|
381
|
+
):
|
382
|
+
"""Generate performance report."""
|
383
|
+
try:
|
384
|
+
# Validate format
|
385
|
+
try:
|
386
|
+
report_format = ReportFormat(request.format.lower())
|
387
|
+
except ValueError:
|
388
|
+
raise HTTPException(
|
389
|
+
status_code=400,
|
390
|
+
detail=f"Invalid format. Supported: {[f.value for f in ReportFormat]}",
|
391
|
+
)
|
392
|
+
|
393
|
+
# Generate report in background
|
394
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
395
|
+
filename = (
|
396
|
+
f"report_{request.run_id[:8]}_{timestamp}.{report_format.value}"
|
397
|
+
)
|
398
|
+
output_path = Path.cwd() / "outputs" / "reports" / filename
|
399
|
+
|
400
|
+
background_tasks.add_task(
|
401
|
+
self._generate_report_background,
|
402
|
+
request.run_id,
|
403
|
+
output_path,
|
404
|
+
report_format,
|
405
|
+
request.compare_runs,
|
406
|
+
)
|
407
|
+
|
408
|
+
return {
|
409
|
+
"status": "generating",
|
410
|
+
"filename": filename,
|
411
|
+
"download_url": f"/api/v1/reports/download/{filename}",
|
412
|
+
}
|
413
|
+
except HTTPException:
|
414
|
+
raise
|
415
|
+
except Exception as e:
|
416
|
+
self.logger.error(f"Failed to generate report: {e}")
|
417
|
+
raise HTTPException(status_code=500, detail=str(e))
|
418
|
+
|
419
|
+
@self.app.get("/api/v1/reports/download/{filename}")
|
420
|
+
async def download_report(filename: str):
|
421
|
+
"""Download generated report file."""
|
422
|
+
try:
|
423
|
+
file_path = Path.cwd() / "outputs" / "reports" / filename
|
424
|
+
if not file_path.exists():
|
425
|
+
raise HTTPException(status_code=404, detail="Report file not found")
|
426
|
+
|
427
|
+
return FileResponse(
|
428
|
+
path=file_path,
|
429
|
+
filename=filename,
|
430
|
+
media_type="application/octet-stream",
|
431
|
+
)
|
432
|
+
except HTTPException:
|
433
|
+
raise
|
434
|
+
except Exception as e:
|
435
|
+
self.logger.error(f"Failed to download report {filename}: {e}")
|
436
|
+
raise HTTPException(status_code=500, detail=str(e))
|
437
|
+
|
438
|
+
@self.app.get("/api/v1/dashboard/live")
|
439
|
+
async def get_live_dashboard():
|
440
|
+
"""Generate live dashboard HTML."""
|
441
|
+
try:
|
442
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
443
|
+
filename = f"dashboard_{timestamp}.html"
|
444
|
+
output_path = Path.cwd() / "outputs" / "dashboards" / filename
|
445
|
+
|
446
|
+
self.dashboard.generate_live_report(output_path, include_charts=True)
|
447
|
+
|
448
|
+
return FileResponse(
|
449
|
+
path=output_path, filename=filename, media_type="text/html"
|
450
|
+
)
|
451
|
+
except Exception as e:
|
452
|
+
self.logger.error(f"Failed to generate live dashboard: {e}")
|
453
|
+
raise HTTPException(status_code=500, detail=str(e))
|
454
|
+
|
455
|
+
@self.app.websocket("/api/v1/metrics/stream")
|
456
|
+
async def websocket_metrics_stream(websocket: WebSocket):
|
457
|
+
"""WebSocket endpoint for real-time metrics streaming."""
|
458
|
+
await websocket.accept()
|
459
|
+
self._websocket_connections.append(websocket)
|
460
|
+
|
461
|
+
try:
|
462
|
+
while True:
|
463
|
+
# Keep connection alive
|
464
|
+
await websocket.receive_text()
|
465
|
+
except WebSocketDisconnect:
|
466
|
+
self._websocket_connections.remove(websocket)
|
467
|
+
self.logger.info("WebSocket client disconnected")
|
468
|
+
except Exception as e:
|
469
|
+
self.logger.error(f"WebSocket error: {e}")
|
470
|
+
if websocket in self._websocket_connections:
|
471
|
+
self._websocket_connections.remove(websocket)
|
472
|
+
|
473
|
+
async def _generate_report_background(
|
474
|
+
self,
|
475
|
+
run_id: str,
|
476
|
+
output_path: Path,
|
477
|
+
report_format: ReportFormat,
|
478
|
+
compare_runs: Optional[List[str]] = None,
|
479
|
+
):
|
480
|
+
"""Generate report in background task."""
|
481
|
+
try:
|
482
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
483
|
+
|
484
|
+
self.reporter.generate_report(
|
485
|
+
run_id=run_id,
|
486
|
+
output_path=output_path,
|
487
|
+
format=report_format,
|
488
|
+
compare_runs=compare_runs,
|
489
|
+
)
|
490
|
+
|
491
|
+
self.logger.info(f"Generated background report: {output_path}")
|
492
|
+
except Exception as e:
|
493
|
+
self.logger.error(f"Background report generation failed: {e}")
|
494
|
+
|
495
|
+
async def _broadcast_metrics(self):
|
496
|
+
"""Broadcast live metrics to WebSocket connections."""
|
497
|
+
while self.dashboard._monitoring:
|
498
|
+
try:
|
499
|
+
if self._websocket_connections:
|
500
|
+
current_metrics = self.dashboard.get_current_metrics()
|
501
|
+
if current_metrics:
|
502
|
+
metrics_data = {
|
503
|
+
"timestamp": current_metrics.timestamp.isoformat(),
|
504
|
+
"active_tasks": current_metrics.active_tasks,
|
505
|
+
"completed_tasks": current_metrics.completed_tasks,
|
506
|
+
"failed_tasks": current_metrics.failed_tasks,
|
507
|
+
"total_cpu_usage": current_metrics.total_cpu_usage,
|
508
|
+
"total_memory_usage": current_metrics.total_memory_usage,
|
509
|
+
"throughput": current_metrics.throughput,
|
510
|
+
"avg_task_duration": current_metrics.avg_task_duration,
|
511
|
+
}
|
512
|
+
|
513
|
+
# Send to all connected clients
|
514
|
+
disconnected = []
|
515
|
+
for websocket in self._websocket_connections:
|
516
|
+
try:
|
517
|
+
await websocket.send_text(json.dumps(metrics_data))
|
518
|
+
except Exception as e:
|
519
|
+
self.logger.warning(
|
520
|
+
f"Failed to send to WebSocket client: {e}"
|
521
|
+
)
|
522
|
+
disconnected.append(websocket)
|
523
|
+
|
524
|
+
# Remove disconnected clients
|
525
|
+
for ws in disconnected:
|
526
|
+
if ws in self._websocket_connections:
|
527
|
+
self._websocket_connections.remove(ws)
|
528
|
+
|
529
|
+
await asyncio.sleep(self.dashboard.config.update_interval)
|
530
|
+
|
531
|
+
except asyncio.CancelledError:
|
532
|
+
break
|
533
|
+
except Exception as e:
|
534
|
+
self.logger.error(f"Metrics broadcast error: {e}")
|
535
|
+
await asyncio.sleep(1.0)
|
536
|
+
|
537
|
+
def start_server(self, host: str = "0.0.0.0", port: int = 8000, **kwargs):
|
538
|
+
"""Start the API server.
|
539
|
+
|
540
|
+
Args:
|
541
|
+
host: Host to bind to
|
542
|
+
port: Port to bind to
|
543
|
+
**kwargs: Additional uvicorn server options
|
544
|
+
"""
|
545
|
+
try:
|
546
|
+
import uvicorn
|
547
|
+
|
548
|
+
self.logger.info(f"Starting dashboard API server on {host}:{port}")
|
549
|
+
uvicorn.run(self.app, host=host, port=port, **kwargs)
|
550
|
+
except ImportError:
|
551
|
+
raise ImportError(
|
552
|
+
"uvicorn is required to run the API server. "
|
553
|
+
"Install with: pip install uvicorn"
|
554
|
+
)
|
555
|
+
|
556
|
+
|
557
|
+
class SimpleDashboardAPI:
|
558
|
+
"""Simplified API interface for dashboard functionality without FastAPI.
|
559
|
+
|
560
|
+
This class provides dashboard API functionality using standard Python
|
561
|
+
libraries for environments where FastAPI is not available or desired.
|
562
|
+
"""
|
563
|
+
|
564
|
+
def __init__(
|
565
|
+
self,
|
566
|
+
task_manager: TaskManager,
|
567
|
+
dashboard_config: Optional[DashboardConfig] = None,
|
568
|
+
):
|
569
|
+
"""Initialize simple API interface.
|
570
|
+
|
571
|
+
Args:
|
572
|
+
task_manager: TaskManager instance for data access
|
573
|
+
dashboard_config: Configuration for dashboard components
|
574
|
+
"""
|
575
|
+
self.task_manager = task_manager
|
576
|
+
self.dashboard_config = dashboard_config or DashboardConfig()
|
577
|
+
self.dashboard = RealTimeDashboard(task_manager, self.dashboard_config)
|
578
|
+
self.reporter = WorkflowPerformanceReporter(task_manager)
|
579
|
+
self.logger = logger
|
580
|
+
|
581
|
+
def get_runs(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
|
582
|
+
"""Get list of workflow runs."""
|
583
|
+
all_runs = self.task_manager.list_runs()
|
584
|
+
runs = all_runs[offset : offset + limit]
|
585
|
+
|
586
|
+
result = []
|
587
|
+
for run in runs:
|
588
|
+
tasks = self.task_manager.get_run_tasks(run.run_id)
|
589
|
+
completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
|
590
|
+
failed_count = sum(1 for t in tasks if t.status == TaskStatus.FAILED)
|
591
|
+
|
592
|
+
result.append(
|
593
|
+
{
|
594
|
+
"run_id": run.run_id,
|
595
|
+
"workflow_name": run.workflow_name,
|
596
|
+
"status": run.status,
|
597
|
+
"started_at": run.started_at,
|
598
|
+
"ended_at": run.ended_at,
|
599
|
+
"total_tasks": len(tasks),
|
600
|
+
"completed_tasks": completed_count,
|
601
|
+
"failed_tasks": failed_count,
|
602
|
+
}
|
603
|
+
)
|
604
|
+
|
605
|
+
return result
|
606
|
+
|
607
|
+
def get_run_details(self, run_id: str) -> Optional[Dict[str, Any]]:
|
608
|
+
"""Get details for a specific run."""
|
609
|
+
run = self.task_manager.get_run(run_id)
|
610
|
+
if not run:
|
611
|
+
return None
|
612
|
+
|
613
|
+
tasks = self.task_manager.get_run_tasks(run_id)
|
614
|
+
completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
|
615
|
+
failed_count = sum(1 for t in tasks if t.status == TaskStatus.FAILED)
|
616
|
+
|
617
|
+
return {
|
618
|
+
"run_id": run.run_id,
|
619
|
+
"workflow_name": run.workflow_name,
|
620
|
+
"status": run.status,
|
621
|
+
"started_at": run.started_at,
|
622
|
+
"ended_at": run.ended_at,
|
623
|
+
"total_tasks": len(tasks),
|
624
|
+
"completed_tasks": completed_count,
|
625
|
+
"failed_tasks": failed_count,
|
626
|
+
"tasks": [
|
627
|
+
{
|
628
|
+
"node_id": task.node_id,
|
629
|
+
"node_type": task.node_type,
|
630
|
+
"status": task.status,
|
631
|
+
"started_at": task.started_at,
|
632
|
+
"ended_at": task.ended_at,
|
633
|
+
"duration": task.metrics.duration if task.metrics else None,
|
634
|
+
"cpu_usage": task.metrics.cpu_usage if task.metrics else None,
|
635
|
+
"memory_usage_mb": (
|
636
|
+
task.metrics.memory_usage_mb if task.metrics else None
|
637
|
+
),
|
638
|
+
"error_message": task.error,
|
639
|
+
}
|
640
|
+
for task in tasks
|
641
|
+
],
|
642
|
+
}
|
643
|
+
|
644
|
+
def start_monitoring(self, run_id: Optional[str] = None) -> Dict[str, Any]:
|
645
|
+
"""Start real-time monitoring."""
|
646
|
+
self.dashboard.start_monitoring(run_id)
|
647
|
+
return {"status": "started", "run_id": run_id}
|
648
|
+
|
649
|
+
def stop_monitoring(self) -> Dict[str, Any]:
|
650
|
+
"""Stop real-time monitoring."""
|
651
|
+
self.dashboard.stop_monitoring()
|
652
|
+
return {"status": "stopped"}
|
653
|
+
|
654
|
+
def get_current_metrics(self) -> Optional[Dict[str, Any]]:
|
655
|
+
"""Get current live metrics."""
|
656
|
+
metrics = self.dashboard.get_current_metrics()
|
657
|
+
if not metrics:
|
658
|
+
return None
|
659
|
+
|
660
|
+
return {
|
661
|
+
"timestamp": metrics.timestamp.isoformat(),
|
662
|
+
"active_tasks": metrics.active_tasks,
|
663
|
+
"completed_tasks": metrics.completed_tasks,
|
664
|
+
"failed_tasks": metrics.failed_tasks,
|
665
|
+
"total_cpu_usage": metrics.total_cpu_usage,
|
666
|
+
"total_memory_usage": metrics.total_memory_usage,
|
667
|
+
"throughput": metrics.throughput,
|
668
|
+
"avg_task_duration": metrics.avg_task_duration,
|
669
|
+
}
|
670
|
+
|
671
|
+
def get_metrics_history(self, minutes: int = 30) -> List[Dict[str, Any]]:
|
672
|
+
"""Get metrics history."""
|
673
|
+
history = self.dashboard.get_metrics_history(minutes=minutes)
|
674
|
+
|
675
|
+
return [
|
676
|
+
{
|
677
|
+
"timestamp": m.timestamp.isoformat(),
|
678
|
+
"active_tasks": m.active_tasks,
|
679
|
+
"completed_tasks": m.completed_tasks,
|
680
|
+
"failed_tasks": m.failed_tasks,
|
681
|
+
"total_cpu_usage": m.total_cpu_usage,
|
682
|
+
"total_memory_usage": m.total_memory_usage,
|
683
|
+
"throughput": m.throughput,
|
684
|
+
"avg_task_duration": m.avg_task_duration,
|
685
|
+
}
|
686
|
+
for m in history
|
687
|
+
]
|
688
|
+
|
689
|
+
def generate_report(
|
690
|
+
self,
|
691
|
+
run_id: str,
|
692
|
+
format: str = "html",
|
693
|
+
output_path: Optional[Union[str, Path]] = None,
|
694
|
+
compare_runs: Optional[List[str]] = None,
|
695
|
+
) -> Path:
|
696
|
+
"""Generate performance report."""
|
697
|
+
try:
|
698
|
+
report_format = ReportFormat(format.lower())
|
699
|
+
except ValueError:
|
700
|
+
raise ValueError(
|
701
|
+
f"Invalid format. Supported: {[f.value for f in ReportFormat]}"
|
702
|
+
)
|
703
|
+
|
704
|
+
return self.reporter.generate_report(
|
705
|
+
run_id=run_id,
|
706
|
+
output_path=output_path,
|
707
|
+
format=report_format,
|
708
|
+
compare_runs=compare_runs,
|
709
|
+
)
|
710
|
+
|
711
|
+
def generate_dashboard(
|
712
|
+
self, output_path: Optional[Union[str, Path]] = None
|
713
|
+
) -> Path:
|
714
|
+
"""Generate live dashboard HTML."""
|
715
|
+
if output_path is None:
|
716
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
717
|
+
output_path = Path.cwd() / "outputs" / f"dashboard_{timestamp}.html"
|
718
|
+
|
719
|
+
return self.dashboard.generate_live_report(output_path, include_charts=True)
|
720
|
+
|
721
|
+
def export_metrics_json(
|
722
|
+
self, output_path: Optional[Union[str, Path]] = None
|
723
|
+
) -> Path:
|
724
|
+
"""Export current metrics as JSON."""
|
725
|
+
if output_path is None:
|
726
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
727
|
+
output_path = Path.cwd() / "outputs" / f"metrics_{timestamp}.json"
|
728
|
+
|
729
|
+
from kailash.visualization.dashboard import DashboardExporter
|
730
|
+
|
731
|
+
exporter = DashboardExporter(self.dashboard)
|
732
|
+
return exporter.export_metrics_json(output_path)
|