loom-core 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom/common/config.py +1 -1
- loom/web/__init__.py +5 -0
- loom/web/api/__init__.py +4 -0
- loom/web/api/events.py +315 -0
- loom/web/api/graphs.py +236 -0
- loom/web/api/logs.py +342 -0
- loom/web/api/stats.py +283 -0
- loom/web/api/tasks.py +333 -0
- loom/web/api/workflows.py +524 -0
- loom/web/main.py +306 -0
- loom/web/schemas.py +656 -0
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/METADATA +1 -1
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/RECORD +17 -7
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/WHEEL +0 -0
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/entry_points.txt +0 -0
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {loom_core-1.0.1.dist-info → loom_core-1.0.3.dist-info}/top_level.txt +0 -0
loom/web/api/logs.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""Log API Endpoints
|
|
2
|
+
|
|
3
|
+
Provides REST endpoints for querying workflow logs across the system.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import math
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Depends, HTTPException
|
|
10
|
+
|
|
11
|
+
from ...database.db import Database
|
|
12
|
+
from ..schemas import (
|
|
13
|
+
ErrorResponse,
|
|
14
|
+
LogEntry,
|
|
15
|
+
LogLevel,
|
|
16
|
+
LogListParams,
|
|
17
|
+
PaginatedResponse,
|
|
18
|
+
PaginationMeta,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
router = APIRouter()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def get_db():
|
|
25
|
+
"""Database dependency"""
|
|
26
|
+
async with Database[Any, Any]() as db:
|
|
27
|
+
yield db
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@router.get(
|
|
31
|
+
"/",
|
|
32
|
+
response_model=PaginatedResponse[LogEntry],
|
|
33
|
+
summary="List logs",
|
|
34
|
+
description="""
|
|
35
|
+
Retrieve a paginated list of log entries across all workflows with optional filtering.
|
|
36
|
+
|
|
37
|
+
**Filtering Options:**
|
|
38
|
+
- `workflow_id`: Filter by specific workflow
|
|
39
|
+
- `level`: Filter by log level (DEBUG, INFO, WARNING, ERROR)
|
|
40
|
+
- `since`: Filter logs after specified timestamp
|
|
41
|
+
|
|
42
|
+
**Sorting Options:**
|
|
43
|
+
- `sort_by`: Field to sort by (created_at, level)
|
|
44
|
+
- `sort_order`: Sort direction (asc/desc, default desc for recent-first)
|
|
45
|
+
|
|
46
|
+
**Pagination:**
|
|
47
|
+
- `page`: Page number (1-based)
|
|
48
|
+
- `per_page`: Items per page (1-1000, default 100)
|
|
49
|
+
|
|
50
|
+
**Use Cases:**
|
|
51
|
+
- System-wide log monitoring
|
|
52
|
+
- Error investigation and troubleshooting
|
|
53
|
+
- Workflow execution debugging
|
|
54
|
+
- Log aggregation and analysis
|
|
55
|
+
""",
|
|
56
|
+
responses={
|
|
57
|
+
400: {"model": ErrorResponse, "description": "Invalid request parameters"},
|
|
58
|
+
500: {"model": ErrorResponse, "description": "Internal server error"},
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
async def list_logs(params: LogListParams = Depends(), db: Database = Depends(get_db)):
|
|
62
|
+
"""List logs with pagination and filtering"""
|
|
63
|
+
try:
|
|
64
|
+
# Build WHERE clause
|
|
65
|
+
where_conditions = []
|
|
66
|
+
query_params = []
|
|
67
|
+
|
|
68
|
+
if params.workflow_id:
|
|
69
|
+
where_conditions.append("l.workflow_id = ?")
|
|
70
|
+
query_params.append(params.workflow_id)
|
|
71
|
+
|
|
72
|
+
if params.level:
|
|
73
|
+
where_conditions.append("l.level = ?")
|
|
74
|
+
query_params.append(params.level.value)
|
|
75
|
+
|
|
76
|
+
if params.since:
|
|
77
|
+
where_conditions.append("l.created_at >= ?")
|
|
78
|
+
query_params.append(params.since.isoformat())
|
|
79
|
+
|
|
80
|
+
where_clause = (
|
|
81
|
+
f"WHERE {' AND '.join(where_conditions)}" if where_conditions else ""
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Get total count
|
|
85
|
+
count_sql = f"""
|
|
86
|
+
SELECT COUNT(*) as total
|
|
87
|
+
FROM logs l
|
|
88
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
89
|
+
{where_clause}
|
|
90
|
+
"""
|
|
91
|
+
count_result = await db.fetchone(count_sql, tuple(query_params))
|
|
92
|
+
total = count_result["total"] if count_result else 0
|
|
93
|
+
|
|
94
|
+
# Calculate pagination
|
|
95
|
+
pages = math.ceil(total / params.per_page) if total > 0 else 1
|
|
96
|
+
offset = (params.page - 1) * params.per_page
|
|
97
|
+
|
|
98
|
+
# Build ORDER BY clause
|
|
99
|
+
order_clause = f"ORDER BY l.{params.sort_by} {params.sort_order.upper()}"
|
|
100
|
+
|
|
101
|
+
# Get logs for current page with workflow info
|
|
102
|
+
logs_sql = f"""
|
|
103
|
+
SELECT
|
|
104
|
+
l.id,
|
|
105
|
+
l.workflow_id,
|
|
106
|
+
w.name as workflow_name,
|
|
107
|
+
l.level,
|
|
108
|
+
l.message,
|
|
109
|
+
l.created_at
|
|
110
|
+
FROM logs l
|
|
111
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
112
|
+
{where_clause}
|
|
113
|
+
{order_clause}
|
|
114
|
+
LIMIT {params.per_page} OFFSET {offset}
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
logs = await db.query(logs_sql, tuple(query_params))
|
|
118
|
+
|
|
119
|
+
# Convert to response models
|
|
120
|
+
log_entries = [
|
|
121
|
+
LogEntry(
|
|
122
|
+
id=log["id"],
|
|
123
|
+
workflow_id=log["workflow_id"],
|
|
124
|
+
workflow_name=log["workflow_name"],
|
|
125
|
+
level=LogLevel(log["level"]),
|
|
126
|
+
message=log["message"],
|
|
127
|
+
created_at=log["created_at"],
|
|
128
|
+
)
|
|
129
|
+
for log in logs
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
# Build pagination metadata
|
|
133
|
+
meta = PaginationMeta(
|
|
134
|
+
page=params.page,
|
|
135
|
+
per_page=params.per_page,
|
|
136
|
+
total=total,
|
|
137
|
+
pages=pages,
|
|
138
|
+
has_prev=params.page > 1,
|
|
139
|
+
has_next=params.page < pages,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return PaginatedResponse(data=log_entries, meta=meta)
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise HTTPException(status_code=500, detail=f"Failed to list logs: {str(e)}")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@router.get(
|
|
149
|
+
"/errors",
|
|
150
|
+
response_model=PaginatedResponse[LogEntry],
|
|
151
|
+
summary="List error logs",
|
|
152
|
+
description="""
|
|
153
|
+
Retrieve error-level log entries across all workflows.
|
|
154
|
+
|
|
155
|
+
**Optimized endpoint for:**
|
|
156
|
+
- Error monitoring and alerting
|
|
157
|
+
- Troubleshooting failed workflows
|
|
158
|
+
- System health monitoring
|
|
159
|
+
|
|
160
|
+
**Filtering Options:**
|
|
161
|
+
- `workflow_id`: Filter by specific workflow
|
|
162
|
+
- `since`: Filter logs after specified timestamp
|
|
163
|
+
|
|
164
|
+
**Sorting:**
|
|
165
|
+
- Default sort by created_at descending (most recent first)
|
|
166
|
+
- Shows critical errors in chronological order
|
|
167
|
+
""",
|
|
168
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
169
|
+
)
|
|
170
|
+
async def list_error_logs(
|
|
171
|
+
page: int = 1,
|
|
172
|
+
per_page: int = 100,
|
|
173
|
+
workflow_id: Optional[str] = None,
|
|
174
|
+
since: Optional[str] = None,
|
|
175
|
+
db: Database = Depends(get_db),
|
|
176
|
+
):
|
|
177
|
+
"""List error-level logs"""
|
|
178
|
+
try:
|
|
179
|
+
# Build WHERE clause for ERROR level logs
|
|
180
|
+
where_conditions = ["l.level = 'ERROR'"]
|
|
181
|
+
query_params = []
|
|
182
|
+
|
|
183
|
+
if workflow_id:
|
|
184
|
+
where_conditions.append("l.workflow_id = ?")
|
|
185
|
+
query_params.append(workflow_id)
|
|
186
|
+
|
|
187
|
+
if since:
|
|
188
|
+
where_conditions.append("l.created_at >= ?")
|
|
189
|
+
query_params.append(since)
|
|
190
|
+
|
|
191
|
+
where_clause = f"WHERE {' AND '.join(where_conditions)}"
|
|
192
|
+
|
|
193
|
+
# Get total count
|
|
194
|
+
count_sql = f"""
|
|
195
|
+
SELECT COUNT(*) as total
|
|
196
|
+
FROM logs l
|
|
197
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
198
|
+
{where_clause}
|
|
199
|
+
"""
|
|
200
|
+
count_result = await db.fetchone(count_sql, tuple(query_params))
|
|
201
|
+
total = count_result["total"] if count_result else 0
|
|
202
|
+
|
|
203
|
+
# Calculate pagination
|
|
204
|
+
pages = math.ceil(total / per_page) if total > 0 else 1
|
|
205
|
+
offset = (page - 1) * per_page
|
|
206
|
+
|
|
207
|
+
# Get error logs ordered by created_at desc (newest first)
|
|
208
|
+
logs_sql = f"""
|
|
209
|
+
SELECT
|
|
210
|
+
l.id,
|
|
211
|
+
l.workflow_id,
|
|
212
|
+
w.name as workflow_name,
|
|
213
|
+
l.level,
|
|
214
|
+
l.message,
|
|
215
|
+
l.created_at
|
|
216
|
+
FROM logs l
|
|
217
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
218
|
+
{where_clause}
|
|
219
|
+
ORDER BY l.created_at DESC
|
|
220
|
+
LIMIT {per_page} OFFSET {offset}
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
logs = await db.query(logs_sql, tuple(query_params))
|
|
224
|
+
|
|
225
|
+
# Convert to response models
|
|
226
|
+
log_entries = [
|
|
227
|
+
LogEntry(
|
|
228
|
+
id=log["id"],
|
|
229
|
+
workflow_id=log["workflow_id"],
|
|
230
|
+
workflow_name=log["workflow_name"],
|
|
231
|
+
level=LogLevel(log["level"]),
|
|
232
|
+
message=log["message"],
|
|
233
|
+
created_at=log["created_at"],
|
|
234
|
+
)
|
|
235
|
+
for log in logs
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
# Build pagination metadata
|
|
239
|
+
meta = PaginationMeta(
|
|
240
|
+
page=page,
|
|
241
|
+
per_page=per_page,
|
|
242
|
+
total=total,
|
|
243
|
+
pages=pages,
|
|
244
|
+
has_prev=page > 1,
|
|
245
|
+
has_next=page < pages,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
return PaginatedResponse(data=log_entries, meta=meta)
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
raise HTTPException(
|
|
252
|
+
status_code=500, detail=f"Failed to list error logs: {str(e)}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@router.get(
|
|
257
|
+
"/recent",
|
|
258
|
+
response_model=PaginatedResponse[LogEntry],
|
|
259
|
+
summary="List recent logs",
|
|
260
|
+
description="""
|
|
261
|
+
Retrieve the most recent log entries across all workflows.
|
|
262
|
+
|
|
263
|
+
**Optimized endpoint for:**
|
|
264
|
+
- Real-time log monitoring
|
|
265
|
+
- Live activity feeds
|
|
266
|
+
- Recent system activity overview
|
|
267
|
+
|
|
268
|
+
**Fixed Parameters:**
|
|
269
|
+
- Always sorted by created_at descending (newest first)
|
|
270
|
+
- No filtering (shows all log levels and workflows)
|
|
271
|
+
- Optimized for quick access to latest activity
|
|
272
|
+
|
|
273
|
+
**Response:**
|
|
274
|
+
- Shows system-wide recent activity
|
|
275
|
+
- Useful for dashboard "latest activity" sections
|
|
276
|
+
""",
|
|
277
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
278
|
+
)
|
|
279
|
+
async def list_recent_logs(
|
|
280
|
+
page: int = 1, per_page: int = 50, db: Database = Depends(get_db)
|
|
281
|
+
):
|
|
282
|
+
"""List recent logs across all workflows"""
|
|
283
|
+
try:
|
|
284
|
+
# Get total count
|
|
285
|
+
count_sql = """
|
|
286
|
+
SELECT COUNT(*) as total
|
|
287
|
+
FROM logs l
|
|
288
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
289
|
+
"""
|
|
290
|
+
count_result = await db.fetchone(count_sql, ())
|
|
291
|
+
total = count_result["total"] if count_result else 0
|
|
292
|
+
|
|
293
|
+
# Calculate pagination
|
|
294
|
+
pages = math.ceil(total / per_page) if total > 0 else 1
|
|
295
|
+
offset = (page - 1) * per_page
|
|
296
|
+
|
|
297
|
+
# Get recent logs ordered by created_at desc (newest first)
|
|
298
|
+
logs_sql = """
|
|
299
|
+
SELECT
|
|
300
|
+
l.id,
|
|
301
|
+
l.workflow_id,
|
|
302
|
+
w.name as workflow_name,
|
|
303
|
+
l.level,
|
|
304
|
+
l.message,
|
|
305
|
+
l.created_at
|
|
306
|
+
FROM logs l
|
|
307
|
+
JOIN workflows w ON l.workflow_id = w.id
|
|
308
|
+
ORDER BY l.created_at DESC
|
|
309
|
+
LIMIT ? OFFSET ?
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
logs = await db.query(logs_sql, (per_page, offset))
|
|
313
|
+
|
|
314
|
+
# Convert to response models
|
|
315
|
+
log_entries = [
|
|
316
|
+
LogEntry(
|
|
317
|
+
id=log["id"],
|
|
318
|
+
workflow_id=log["workflow_id"],
|
|
319
|
+
workflow_name=log["workflow_name"],
|
|
320
|
+
level=LogLevel(log["level"]),
|
|
321
|
+
message=log["message"],
|
|
322
|
+
created_at=log["created_at"],
|
|
323
|
+
)
|
|
324
|
+
for log in logs
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
# Build pagination metadata
|
|
328
|
+
meta = PaginationMeta(
|
|
329
|
+
page=page,
|
|
330
|
+
per_page=per_page,
|
|
331
|
+
total=total,
|
|
332
|
+
pages=pages,
|
|
333
|
+
has_prev=page > 1,
|
|
334
|
+
has_next=page < pages,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return PaginatedResponse(data=log_entries, meta=meta)
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise HTTPException(
|
|
341
|
+
status_code=500, detail=f"Failed to list recent logs: {str(e)}"
|
|
342
|
+
)
|
loom/web/api/stats.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Statistics API Endpoints
|
|
2
|
+
|
|
3
|
+
Provides REST endpoints for system statistics and metrics.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Depends, HTTPException
|
|
9
|
+
|
|
10
|
+
from ...database.db import Database
|
|
11
|
+
from ..schemas import (
|
|
12
|
+
ErrorResponse,
|
|
13
|
+
SystemStats,
|
|
14
|
+
TaskStats,
|
|
15
|
+
WorkflowStats,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
router = APIRouter()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def get_db():
|
|
22
|
+
"""Database dependency"""
|
|
23
|
+
async with Database[Any, Any]() as db:
|
|
24
|
+
yield db
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@router.get(
|
|
28
|
+
"/",
|
|
29
|
+
response_model=SystemStats,
|
|
30
|
+
summary="Get system statistics",
|
|
31
|
+
description="""
|
|
32
|
+
Retrieve comprehensive system statistics including workflow and task counts.
|
|
33
|
+
|
|
34
|
+
**Returns:**
|
|
35
|
+
- **Workflow Statistics**: Total, running, completed, failed, and canceled counts
|
|
36
|
+
- **Task Statistics**: Total, pending, running, completed, and failed counts
|
|
37
|
+
- **Event Count**: Total number of events across all workflows
|
|
38
|
+
- **Log Count**: Total number of log entries across all workflows
|
|
39
|
+
|
|
40
|
+
**Use Cases:**
|
|
41
|
+
- System health monitoring dashboard
|
|
42
|
+
- Capacity planning and resource utilization
|
|
43
|
+
- Performance metrics and reporting
|
|
44
|
+
- Overall system status overview
|
|
45
|
+
""",
|
|
46
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
47
|
+
)
|
|
48
|
+
async def get_system_stats(db: Database = Depends(get_db)):
|
|
49
|
+
"""Get comprehensive system statistics"""
|
|
50
|
+
try:
|
|
51
|
+
# Get workflow statistics
|
|
52
|
+
workflow_stats_sql = """
|
|
53
|
+
SELECT
|
|
54
|
+
COUNT(*) as total,
|
|
55
|
+
COUNT(CASE WHEN status = 'RUNNING' THEN 1 END) as running,
|
|
56
|
+
COUNT(CASE WHEN status = 'COMPLETED' THEN 1 END) as completed,
|
|
57
|
+
COUNT(CASE WHEN status = 'FAILED' THEN 1 END) as failed,
|
|
58
|
+
COUNT(CASE WHEN status = 'CANCELED' THEN 1 END) as canceled
|
|
59
|
+
FROM workflows
|
|
60
|
+
"""
|
|
61
|
+
workflow_result = await db.fetchone(workflow_stats_sql, ())
|
|
62
|
+
|
|
63
|
+
workflow_stats = WorkflowStats(
|
|
64
|
+
total=workflow_result["total"] if workflow_result else 0,
|
|
65
|
+
running=workflow_result["running"] if workflow_result else 0,
|
|
66
|
+
completed=workflow_result["completed"] if workflow_result else 0,
|
|
67
|
+
failed=workflow_result["failed"] if workflow_result else 0,
|
|
68
|
+
canceled=workflow_result["canceled"] if workflow_result else 0,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Get task statistics
|
|
72
|
+
task_stats_sql = """
|
|
73
|
+
SELECT
|
|
74
|
+
COUNT(*) as total,
|
|
75
|
+
COUNT(CASE WHEN status = 'PENDING' THEN 1 END) as pending,
|
|
76
|
+
COUNT(CASE WHEN status = 'RUNNING' THEN 1 END) as running,
|
|
77
|
+
COUNT(CASE WHEN status = 'COMPLETED' THEN 1 END) as completed,
|
|
78
|
+
COUNT(CASE WHEN status = 'FAILED' THEN 1 END) as failed
|
|
79
|
+
FROM tasks
|
|
80
|
+
"""
|
|
81
|
+
task_result = await db.fetchone(task_stats_sql, ())
|
|
82
|
+
|
|
83
|
+
task_stats = TaskStats(
|
|
84
|
+
total=task_result["total"] if task_result else 0,
|
|
85
|
+
pending=task_result["pending"] if task_result else 0,
|
|
86
|
+
running=task_result["running"] if task_result else 0,
|
|
87
|
+
completed=task_result["completed"] if task_result else 0,
|
|
88
|
+
failed=task_result["failed"] if task_result else 0,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Get event count
|
|
92
|
+
event_count_sql = "SELECT COUNT(*) as total FROM events"
|
|
93
|
+
event_result = await db.fetchone(event_count_sql, ())
|
|
94
|
+
|
|
95
|
+
# Get log count
|
|
96
|
+
log_count_sql = "SELECT COUNT(*) as total FROM logs"
|
|
97
|
+
log_result = await db.fetchone(log_count_sql, ())
|
|
98
|
+
|
|
99
|
+
return SystemStats(
|
|
100
|
+
workflows=workflow_stats,
|
|
101
|
+
tasks=task_stats,
|
|
102
|
+
events=event_result["total"] if event_result else 0,
|
|
103
|
+
logs=log_result["total"] if log_result else 0,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
raise HTTPException(
|
|
108
|
+
status_code=500, detail=f"Failed to get system stats: {str(e)}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@router.get(
|
|
113
|
+
"/workflows",
|
|
114
|
+
response_model=WorkflowStats,
|
|
115
|
+
summary="Get workflow statistics",
|
|
116
|
+
description="""
|
|
117
|
+
Retrieve workflow execution statistics by status.
|
|
118
|
+
|
|
119
|
+
**Returns:**
|
|
120
|
+
- Total number of workflows
|
|
121
|
+
- Count by status: RUNNING, COMPLETED, FAILED, CANCELED
|
|
122
|
+
|
|
123
|
+
**Use Cases:**
|
|
124
|
+
- Workflow success/failure rate monitoring
|
|
125
|
+
- Execution pipeline health checks
|
|
126
|
+
- Performance trend analysis
|
|
127
|
+
""",
|
|
128
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
129
|
+
)
|
|
130
|
+
async def get_workflow_stats(db: Database = Depends(get_db)):
|
|
131
|
+
"""Get workflow statistics"""
|
|
132
|
+
try:
|
|
133
|
+
workflow_stats_sql = """
|
|
134
|
+
SELECT
|
|
135
|
+
COUNT(*) as total,
|
|
136
|
+
COUNT(CASE WHEN status = 'RUNNING' THEN 1 END) as running,
|
|
137
|
+
COUNT(CASE WHEN status = 'COMPLETED' THEN 1 END) as completed,
|
|
138
|
+
COUNT(CASE WHEN status = 'FAILED' THEN 1 END) as failed,
|
|
139
|
+
COUNT(CASE WHEN status = 'CANCELED' THEN 1 END) as canceled
|
|
140
|
+
FROM workflows
|
|
141
|
+
"""
|
|
142
|
+
result = await db.fetchone(workflow_stats_sql, ())
|
|
143
|
+
|
|
144
|
+
return WorkflowStats(
|
|
145
|
+
total=result["total"] if result else 0,
|
|
146
|
+
running=result["running"] if result else 0,
|
|
147
|
+
completed=result["completed"] if result else 0,
|
|
148
|
+
failed=result["failed"] if result else 0,
|
|
149
|
+
canceled=result["canceled"] if result else 0,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
raise HTTPException(
|
|
154
|
+
status_code=500, detail=f"Failed to get workflow stats: {str(e)}"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@router.get(
|
|
159
|
+
"/tasks",
|
|
160
|
+
response_model=TaskStats,
|
|
161
|
+
summary="Get task statistics",
|
|
162
|
+
description="""
|
|
163
|
+
Retrieve task execution statistics by status.
|
|
164
|
+
|
|
165
|
+
**Returns:**
|
|
166
|
+
- Total number of tasks
|
|
167
|
+
- Count by status: PENDING, RUNNING, COMPLETED, FAILED
|
|
168
|
+
|
|
169
|
+
**Use Cases:**
|
|
170
|
+
- Task queue monitoring and load balancing
|
|
171
|
+
- Worker capacity planning
|
|
172
|
+
- Task execution performance analysis
|
|
173
|
+
""",
|
|
174
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
175
|
+
)
|
|
176
|
+
async def get_task_stats(db: Database = Depends(get_db)):
|
|
177
|
+
"""Get task statistics"""
|
|
178
|
+
try:
|
|
179
|
+
task_stats_sql = """
|
|
180
|
+
SELECT
|
|
181
|
+
COUNT(*) as total,
|
|
182
|
+
COUNT(CASE WHEN status = 'PENDING' THEN 1 END) as pending,
|
|
183
|
+
COUNT(CASE WHEN status = 'RUNNING' THEN 1 END) as running,
|
|
184
|
+
COUNT(CASE WHEN status = 'COMPLETED' THEN 1 END) as completed,
|
|
185
|
+
COUNT(CASE WHEN status = 'FAILED' THEN 1 END) as failed
|
|
186
|
+
FROM tasks
|
|
187
|
+
"""
|
|
188
|
+
result = await db.fetchone(task_stats_sql, ())
|
|
189
|
+
|
|
190
|
+
return TaskStats(
|
|
191
|
+
total=result["total"] if result else 0,
|
|
192
|
+
pending=result["pending"] if result else 0,
|
|
193
|
+
running=result["running"] if result else 0,
|
|
194
|
+
completed=result["completed"] if result else 0,
|
|
195
|
+
failed=result["failed"] if result else 0,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
except Exception as e:
|
|
199
|
+
raise HTTPException(
|
|
200
|
+
status_code=500, detail=f"Failed to get task stats: {str(e)}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@router.get(
|
|
205
|
+
"/health",
|
|
206
|
+
summary="Get system health indicators",
|
|
207
|
+
description="""
|
|
208
|
+
Retrieve key health indicators for system monitoring.
|
|
209
|
+
|
|
210
|
+
**Returns:**
|
|
211
|
+
- Active workflow count (RUNNING status)
|
|
212
|
+
- Pending task count (ready for execution)
|
|
213
|
+
- Recent error count (last hour)
|
|
214
|
+
- System status assessment
|
|
215
|
+
|
|
216
|
+
**Health Assessment:**
|
|
217
|
+
- `healthy`: Normal operation (< 10% failed workflows)
|
|
218
|
+
- `degraded`: Some issues (10-25% failed workflows)
|
|
219
|
+
- `unhealthy`: Major issues (> 25% failed workflows)
|
|
220
|
+
|
|
221
|
+
**Use Cases:**
|
|
222
|
+
- Health check endpoints for monitoring systems
|
|
223
|
+
- Dashboard status indicators
|
|
224
|
+
- Alerting system integration
|
|
225
|
+
""",
|
|
226
|
+
responses={500: {"model": ErrorResponse, "description": "Internal server error"}},
|
|
227
|
+
)
|
|
228
|
+
async def get_health_indicators(db: Database = Depends(get_db)):
|
|
229
|
+
"""Get system health indicators"""
|
|
230
|
+
try:
|
|
231
|
+
from datetime import datetime, timedelta
|
|
232
|
+
|
|
233
|
+
# Get active workflow count
|
|
234
|
+
active_sql = "SELECT COUNT(*) as count FROM workflows WHERE status = 'RUNNING'"
|
|
235
|
+
active_result = await db.fetchone(active_sql, ())
|
|
236
|
+
active_workflows = active_result["count"] if active_result else 0
|
|
237
|
+
|
|
238
|
+
# Get pending task count (ready to execute)
|
|
239
|
+
now = datetime.now().isoformat()
|
|
240
|
+
pending_sql = "SELECT COUNT(*) as count FROM tasks WHERE status = 'PENDING' AND run_at <= ?"
|
|
241
|
+
pending_result = await db.fetchone(pending_sql, (now,))
|
|
242
|
+
pending_tasks = pending_result["count"] if pending_result else 0
|
|
243
|
+
|
|
244
|
+
# Get recent error count (last hour)
|
|
245
|
+
one_hour_ago = (datetime.now() - timedelta(hours=1)).isoformat()
|
|
246
|
+
error_sql = "SELECT COUNT(*) as count FROM logs WHERE level = 'ERROR' AND created_at >= ?"
|
|
247
|
+
error_result = await db.fetchone(error_sql, (one_hour_ago,))
|
|
248
|
+
recent_errors = error_result["count"] if error_result else 0
|
|
249
|
+
|
|
250
|
+
# Calculate system health status
|
|
251
|
+
total_sql = "SELECT COUNT(*) as count FROM workflows"
|
|
252
|
+
failed_sql = "SELECT COUNT(*) as count FROM workflows WHERE status = 'FAILED'"
|
|
253
|
+
|
|
254
|
+
total_result = await db.fetchone(total_sql, ())
|
|
255
|
+
failed_result = await db.fetchone(failed_sql, ())
|
|
256
|
+
|
|
257
|
+
total_workflows = total_result["count"] if total_result else 0
|
|
258
|
+
failed_workflows = failed_result["count"] if failed_result else 0
|
|
259
|
+
|
|
260
|
+
if total_workflows == 0:
|
|
261
|
+
health_status = "healthy"
|
|
262
|
+
else:
|
|
263
|
+
failure_rate = failed_workflows / total_workflows
|
|
264
|
+
if failure_rate <= 0.1:
|
|
265
|
+
health_status = "healthy"
|
|
266
|
+
elif failure_rate <= 0.25:
|
|
267
|
+
health_status = "degraded"
|
|
268
|
+
else:
|
|
269
|
+
health_status = "unhealthy"
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
"status": health_status,
|
|
273
|
+
"active_workflows": active_workflows,
|
|
274
|
+
"pending_tasks": pending_tasks,
|
|
275
|
+
"recent_errors": recent_errors,
|
|
276
|
+
"failure_rate": round(failed_workflows / max(total_workflows, 1) * 100, 2),
|
|
277
|
+
"timestamp": datetime.now().isoformat(),
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
except Exception as e:
|
|
281
|
+
raise HTTPException(
|
|
282
|
+
status_code=500, detail=f"Failed to get health indicators: {str(e)}"
|
|
283
|
+
)
|