kailash 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. kailash/__init__.py +31 -0
  2. kailash/__main__.py +11 -0
  3. kailash/cli/__init__.py +5 -0
  4. kailash/cli/commands.py +563 -0
  5. kailash/manifest.py +778 -0
  6. kailash/nodes/__init__.py +23 -0
  7. kailash/nodes/ai/__init__.py +26 -0
  8. kailash/nodes/ai/agents.py +417 -0
  9. kailash/nodes/ai/models.py +488 -0
  10. kailash/nodes/api/__init__.py +52 -0
  11. kailash/nodes/api/auth.py +567 -0
  12. kailash/nodes/api/graphql.py +480 -0
  13. kailash/nodes/api/http.py +598 -0
  14. kailash/nodes/api/rate_limiting.py +572 -0
  15. kailash/nodes/api/rest.py +665 -0
  16. kailash/nodes/base.py +1032 -0
  17. kailash/nodes/base_async.py +128 -0
  18. kailash/nodes/code/__init__.py +32 -0
  19. kailash/nodes/code/python.py +1021 -0
  20. kailash/nodes/data/__init__.py +125 -0
  21. kailash/nodes/data/readers.py +496 -0
  22. kailash/nodes/data/sharepoint_graph.py +623 -0
  23. kailash/nodes/data/sql.py +380 -0
  24. kailash/nodes/data/streaming.py +1168 -0
  25. kailash/nodes/data/vector_db.py +964 -0
  26. kailash/nodes/data/writers.py +529 -0
  27. kailash/nodes/logic/__init__.py +6 -0
  28. kailash/nodes/logic/async_operations.py +702 -0
  29. kailash/nodes/logic/operations.py +551 -0
  30. kailash/nodes/transform/__init__.py +5 -0
  31. kailash/nodes/transform/processors.py +379 -0
  32. kailash/runtime/__init__.py +6 -0
  33. kailash/runtime/async_local.py +356 -0
  34. kailash/runtime/docker.py +697 -0
  35. kailash/runtime/local.py +434 -0
  36. kailash/runtime/parallel.py +557 -0
  37. kailash/runtime/runner.py +110 -0
  38. kailash/runtime/testing.py +347 -0
  39. kailash/sdk_exceptions.py +307 -0
  40. kailash/tracking/__init__.py +7 -0
  41. kailash/tracking/manager.py +885 -0
  42. kailash/tracking/metrics_collector.py +342 -0
  43. kailash/tracking/models.py +535 -0
  44. kailash/tracking/storage/__init__.py +0 -0
  45. kailash/tracking/storage/base.py +113 -0
  46. kailash/tracking/storage/database.py +619 -0
  47. kailash/tracking/storage/filesystem.py +543 -0
  48. kailash/utils/__init__.py +0 -0
  49. kailash/utils/export.py +924 -0
  50. kailash/utils/templates.py +680 -0
  51. kailash/visualization/__init__.py +62 -0
  52. kailash/visualization/api.py +732 -0
  53. kailash/visualization/dashboard.py +951 -0
  54. kailash/visualization/performance.py +808 -0
  55. kailash/visualization/reports.py +1471 -0
  56. kailash/workflow/__init__.py +15 -0
  57. kailash/workflow/builder.py +245 -0
  58. kailash/workflow/graph.py +827 -0
  59. kailash/workflow/mermaid_visualizer.py +628 -0
  60. kailash/workflow/mock_registry.py +63 -0
  61. kailash/workflow/runner.py +302 -0
  62. kailash/workflow/state.py +238 -0
  63. kailash/workflow/visualization.py +588 -0
  64. kailash-0.1.0.dist-info/METADATA +710 -0
  65. kailash-0.1.0.dist-info/RECORD +69 -0
  66. kailash-0.1.0.dist-info/WHEEL +5 -0
  67. kailash-0.1.0.dist-info/entry_points.txt +2 -0
  68. kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,543 @@
1
+ """Filesystem-based storage backend for task tracking."""
2
+
3
+ import json
4
+ import os
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Any, List, Optional
8
+ from uuid import uuid4
9
+
10
+ from kailash.sdk_exceptions import KailashStorageError
11
+
12
+ from ..models import TaskMetrics, TaskRun, TaskStatus, WorkflowRun
13
+ from .base import StorageBackend
14
+
15
+
16
+ class FileSystemStorage(StorageBackend):
17
+ """Filesystem-based storage backend."""
18
+
19
+ def __init__(self, base_path: Optional[str] = None):
20
+ """Initialize filesystem storage.
21
+
22
+ Args:
23
+ base_path: Base directory for storage. Defaults to ~/.kailash/tracking
24
+ """
25
+ if base_path is None:
26
+ base_path = os.path.expanduser("~/.kailash/tracking")
27
+
28
+ self.base_path = Path(base_path)
29
+ self.runs_dir = self.base_path / "runs"
30
+ self.tasks_dir = self.base_path / "tasks"
31
+ self.metrics_dir = self.base_path / "metrics"
32
+
33
+ # Create directories
34
+ self.runs_dir.mkdir(parents=True, exist_ok=True)
35
+ self.tasks_dir.mkdir(parents=True, exist_ok=True)
36
+ self.metrics_dir.mkdir(parents=True, exist_ok=True)
37
+
38
+ # Initialize index if it doesn't exist
39
+ index_path = self._get_index_file()
40
+ if not index_path.exists():
41
+ with open(index_path, "w") as f:
42
+ json.dump({"tasks": {}, "runs": {}}, f, indent=2)
43
+
44
+ def save_run(self, run: WorkflowRun) -> None:
45
+ """Save a workflow run."""
46
+ run_path = self.runs_dir / f"{run.run_id}.json"
47
+ with open(run_path, "w") as f:
48
+ json.dump(run.to_dict(), f, indent=2)
49
+
50
+ def load_run(self, run_id: str) -> Optional[WorkflowRun]:
51
+ """Load a workflow run by ID."""
52
+ run_path = self.runs_dir / f"{run_id}.json"
53
+ if not run_path.exists():
54
+ return None
55
+
56
+ with open(run_path, "r") as f:
57
+ data = json.load(f)
58
+
59
+ return WorkflowRun.model_validate(data)
60
+
61
+ def list_runs(
62
+ self, workflow_name: Optional[str] = None, status: Optional[str] = None
63
+ ) -> List[WorkflowRun]:
64
+ """List workflow runs."""
65
+ runs = []
66
+
67
+ for run_file in self.runs_dir.glob("*.json"):
68
+ try:
69
+ with open(run_file, "r") as f:
70
+ data = json.load(f)
71
+
72
+ run = WorkflowRun.model_validate(data)
73
+
74
+ # Apply filters
75
+ if workflow_name and run.workflow_name != workflow_name:
76
+ continue
77
+ if status and run.status != status:
78
+ continue
79
+
80
+ runs.append(run)
81
+ except Exception:
82
+ # Skip corrupted files
83
+ continue
84
+
85
+ # Sort by started_at (newest first)
86
+ runs.sort(key=lambda r: r.started_at, reverse=True)
87
+ return runs
88
+
89
+ def save_task(self, task: TaskRun) -> None:
90
+ """Save a task.
91
+
92
+ Args:
93
+ task: TaskRun to save
94
+
95
+ Raises:
96
+ KailashStorageError: If task cannot be saved
97
+ """
98
+ try:
99
+ # For compatibility with tests, save tasks directly in tasks dir if no run_id specified
100
+ if not task.run_id or task.run_id == "test-run-id":
101
+ task_path = self.tasks_dir / f"{task.task_id}.json"
102
+ else:
103
+ # Create run-specific directory
104
+ run_tasks_dir = self.tasks_dir / task.run_id
105
+ run_tasks_dir.mkdir(exist_ok=True)
106
+
107
+ # Save task data
108
+ task_path = run_tasks_dir / f"{task.task_id}.json"
109
+ with open(task_path, "w") as f:
110
+ json.dump(task.to_dict(), f, indent=2)
111
+
112
+ # Save metrics if present
113
+ if hasattr(task, "metrics") and task.metrics:
114
+ metrics_path = self.metrics_dir / f"{task.task_id}.json"
115
+ with open(metrics_path, "w") as f:
116
+ json.dump(task.metrics.model_dump(), f, indent=2)
117
+
118
+ # Update index
119
+ self._update_index(task)
120
+ except Exception as e:
121
+ raise KailashStorageError(f"Failed to save task: {e}") from e
122
+
123
+ def get_task(self, task_id: str) -> Optional[TaskRun]:
124
+ """Load a task by ID.
125
+
126
+ Args:
127
+ task_id: Task ID to load
128
+
129
+ Returns:
130
+ TaskRun or None if not found
131
+
132
+ Raises:
133
+ KailashStorageError: If task cannot be loaded
134
+ """
135
+ try:
136
+ # First check direct path for tests
137
+ task_path = self.tasks_dir / f"{task_id}.json"
138
+ if task_path.exists():
139
+ with open(task_path, "r") as tf:
140
+ task_data = json.load(tf)
141
+ task = TaskRun.model_validate(task_data)
142
+
143
+ # Load metrics if available
144
+ metrics_path = self.metrics_dir / f"{task_id}.json"
145
+ if metrics_path.exists():
146
+ with open(metrics_path, "r") as mf:
147
+ metrics_data = json.load(mf)
148
+ task.metrics = TaskMetrics.model_validate(metrics_data)
149
+
150
+ return task
151
+
152
+ # Then check index for run_id
153
+ index_path = self._get_index_file()
154
+ if index_path.exists():
155
+ with open(index_path, "r") as f:
156
+ index = json.load(f)
157
+ if task_id in index.get("tasks", {}):
158
+ run_id = index["tasks"][task_id]["run_id"]
159
+ run_task_path = self.tasks_dir / run_id / f"{task_id}.json"
160
+ if run_task_path.exists():
161
+ with open(run_task_path, "r") as tf:
162
+ task_data = json.load(tf)
163
+ task = TaskRun.model_validate(task_data)
164
+
165
+ # Load metrics if available
166
+ metrics_path = self.metrics_dir / f"{task_id}.json"
167
+ if metrics_path.exists():
168
+ with open(metrics_path, "r") as mf:
169
+ metrics_data = json.load(mf)
170
+ task.metrics = TaskMetrics.model_validate(metrics_data)
171
+
172
+ return task
173
+
174
+ # Fallback to search if index lookup fails
175
+ return self.load_task(task_id)
176
+ except Exception as e:
177
+ if isinstance(e, KailashStorageError):
178
+ raise
179
+ raise KailashStorageError(f"Failed to get task: {e}") from e
180
+
181
+ def load_task(self, task_id: str) -> Optional[TaskRun]:
182
+ """Load a task by ID."""
183
+ # Search all run directories
184
+ for run_dir in self.tasks_dir.iterdir():
185
+ if not run_dir.is_dir():
186
+ continue
187
+
188
+ task_path = run_dir / f"{task_id}.json"
189
+ if task_path.exists():
190
+ with open(task_path, "r") as f:
191
+ data = json.load(f)
192
+ return TaskRun.model_validate(data)
193
+
194
+ return None
195
+
196
+ def list_tasks(
197
+ self,
198
+ run_id: str,
199
+ node_id: Optional[str] = None,
200
+ status: Optional[TaskStatus] = None,
201
+ ) -> List[TaskRun]:
202
+ """List tasks for a run."""
203
+ tasks = []
204
+ run_tasks_dir = self.tasks_dir / run_id
205
+
206
+ if not run_tasks_dir.exists():
207
+ return tasks
208
+
209
+ for task_file in run_tasks_dir.glob("*.json"):
210
+ try:
211
+ with open(task_file, "r") as f:
212
+ data = json.load(f)
213
+
214
+ task = TaskRun.model_validate(data)
215
+
216
+ # Apply filters
217
+ if node_id and task.node_id != node_id:
218
+ continue
219
+ if status and task.status != status:
220
+ continue
221
+
222
+ tasks.append(task)
223
+ except Exception:
224
+ # Skip corrupted files
225
+ continue
226
+
227
+ # Sort by started_at
228
+ tasks.sort(key=lambda t: t.started_at or t.task_id)
229
+ return tasks
230
+
231
+ def clear(self) -> None:
232
+ """Clear all stored data."""
233
+ # Remove all files
234
+ for run_file in self.runs_dir.glob("*.json"):
235
+ run_file.unlink()
236
+
237
+ for task_dir in self.tasks_dir.iterdir():
238
+ if task_dir.is_dir():
239
+ for task_file in task_dir.glob("*.json"):
240
+ task_file.unlink()
241
+ task_dir.rmdir()
242
+
243
+ def export_run(self, run_id: str, output_path: str) -> None:
244
+ """Export a run and its tasks."""
245
+ # Load run
246
+ run = self.load_run(run_id)
247
+ if not run:
248
+ raise ValueError(f"Run {run_id} not found")
249
+
250
+ # Load tasks
251
+ tasks = self.list_tasks(run_id)
252
+
253
+ # Create export data
254
+ export_data = {
255
+ "run": run.to_dict(),
256
+ "tasks": [task.to_dict() for task in tasks],
257
+ }
258
+
259
+ # Write export
260
+ with open(output_path, "w") as f:
261
+ json.dump(export_data, f, indent=2)
262
+
263
+ def import_run(self, input_path: str) -> str:
264
+ """Import a run and its tasks."""
265
+ with open(input_path, "r") as f:
266
+ import_data = json.load(f)
267
+
268
+ # Import run
269
+ run_data = import_data["run"]
270
+ run = WorkflowRun.model_validate(run_data)
271
+
272
+ # Generate new run ID to avoid conflicts
273
+ original_run_id = run.run_id
274
+ run.run_id = str(uuid4())
275
+
276
+ # Save run
277
+ self.save_run(run)
278
+
279
+ # Import tasks with updated run ID
280
+ for task_data in import_data.get("tasks", []):
281
+ task = TaskRun.model_validate(task_data)
282
+ task.run_id = run.run_id
283
+ self.save_task(task)
284
+
285
+ return run.run_id
286
+
287
+ def update_task(self, task: TaskRun) -> None:
288
+ """Update an existing task.
289
+
290
+ Args:
291
+ task: TaskRun to update
292
+
293
+ Raises:
294
+ KailashStorageError: If task cannot be updated
295
+ """
296
+ try:
297
+ # First check direct path for tests
298
+ task_path = self.tasks_dir / f"{task.task_id}.json"
299
+ run_task_path = None
300
+
301
+ if not task_path.exists():
302
+ # Check run directory path
303
+ run_tasks_dir = self.tasks_dir / task.run_id
304
+ run_task_path = run_tasks_dir / f"{task.task_id}.json"
305
+ if not run_task_path.exists():
306
+ # For tests, save it in the direct path
307
+ task_path = self.tasks_dir / f"{task.task_id}.json"
308
+
309
+ # Update task file
310
+ path_to_use = task_path if task_path.exists() else run_task_path
311
+ with open(path_to_use, "w") as f:
312
+ json.dump(task.to_dict(), f, indent=2)
313
+
314
+ # Update metrics if present
315
+ if hasattr(task, "metrics") and task.metrics:
316
+ metrics_path = self.metrics_dir / f"{task.task_id}.json"
317
+ with open(metrics_path, "w") as f:
318
+ json.dump(task.metrics.model_dump(), f, indent=2)
319
+
320
+ # Update index
321
+ self._update_index(task)
322
+ except Exception as e:
323
+ if isinstance(e, KailashStorageError):
324
+ raise
325
+ raise KailashStorageError(f"Failed to update task: {e}") from e
326
+
327
+ def delete_task(self, task_id: str) -> None:
328
+ """Delete a task.
329
+
330
+ Args:
331
+ task_id: Task ID to delete
332
+
333
+ Raises:
334
+ KailashStorageError: If task cannot be deleted
335
+ """
336
+ try:
337
+ # Try direct path first for tests
338
+ direct_task_path = self.tasks_dir / f"{task_id}.json"
339
+ if direct_task_path.exists():
340
+ direct_task_path.unlink()
341
+
342
+ # Find task file from index (for nested paths)
343
+ task = self.get_task(task_id)
344
+ if task:
345
+ # Delete task file from run directory
346
+ run_tasks_dir = self.tasks_dir / task.run_id
347
+ task_path = run_tasks_dir / f"{task_id}.json"
348
+ if task_path.exists():
349
+ task_path.unlink()
350
+
351
+ # Delete metrics file if exists
352
+ metrics_path = self.metrics_dir / f"{task_id}.json"
353
+ if metrics_path.exists():
354
+ metrics_path.unlink()
355
+
356
+ # Update index
357
+ index_path = self._get_index_file()
358
+ if index_path.exists():
359
+ with open(index_path, "r") as f:
360
+ index = json.load(f)
361
+
362
+ if task_id in index.get("tasks", {}):
363
+ del index["tasks"][task_id]
364
+
365
+ with open(index_path, "w") as f:
366
+ json.dump(index, f, indent=2)
367
+ except Exception as e:
368
+ if isinstance(e, KailashStorageError):
369
+ raise
370
+ raise KailashStorageError(f"Failed to delete task: {e}") from e
371
+
372
+ def get_all_tasks(self) -> List[TaskRun]:
373
+ """Get all tasks.
374
+
375
+ Returns:
376
+ List of all TaskRun objects
377
+
378
+ Raises:
379
+ KailashStorageError: If tasks cannot be retrieved
380
+ """
381
+ try:
382
+ tasks = []
383
+
384
+ # First load tasks in the main tasks directory (for tests)
385
+ for task_file in self.tasks_dir.glob("*.json"):
386
+ if task_file.is_file():
387
+ with open(task_file, "r") as f:
388
+ task_data = json.load(f)
389
+
390
+ task = TaskRun.model_validate(task_data)
391
+
392
+ # Load metrics if available
393
+ metrics_path = self.metrics_dir / f"{task.task_id}.json"
394
+ if metrics_path.exists():
395
+ with open(metrics_path, "r") as f:
396
+ metrics_data = json.load(f)
397
+ task.metrics = TaskMetrics.model_validate(metrics_data)
398
+
399
+ tasks.append(task)
400
+
401
+ # Then iterate through all run directories
402
+ for run_dir in self.tasks_dir.iterdir():
403
+ if not run_dir.is_dir() or run_dir.name in ["metrics", "index.json"]:
404
+ continue
405
+
406
+ # Load all tasks in the run directory
407
+ for task_file in run_dir.glob("*.json"):
408
+ with open(task_file, "r") as f:
409
+ task_data = json.load(f)
410
+
411
+ task = TaskRun.model_validate(task_data)
412
+
413
+ # Load metrics if available
414
+ metrics_path = self.metrics_dir / f"{task.task_id}.json"
415
+ if metrics_path.exists():
416
+ with open(metrics_path, "r") as f:
417
+ metrics_data = json.load(f)
418
+ task.metrics = TaskMetrics.model_validate(metrics_data)
419
+
420
+ tasks.append(task)
421
+
422
+ return tasks
423
+ except Exception as e:
424
+ raise KailashStorageError(f"Failed to get all tasks: {e}") from e
425
+
426
+ def get_tasks_by_run(self, run_id: str) -> List[TaskRun]:
427
+ """Get all tasks for a specific run.
428
+
429
+ Args:
430
+ run_id: The run ID to filter tasks by
431
+
432
+ Returns:
433
+ List of TaskRun objects for the specified run
434
+
435
+ Raises:
436
+ KailashStorageError: If tasks cannot be retrieved
437
+ """
438
+ return self.list_tasks(run_id)
439
+
440
+ def query_tasks(
441
+ self,
442
+ node_id: Optional[str] = None,
443
+ status: Optional[TaskStatus] = None,
444
+ started_after: Optional[datetime] = None,
445
+ completed_before: Optional[datetime] = None,
446
+ ) -> List[TaskRun]:
447
+ """Query tasks with filters.
448
+
449
+ Args:
450
+ node_id: Filter by node ID
451
+ status: Filter by status
452
+ started_after: Filter by start time (inclusive)
453
+ completed_before: Filter by completion time (exclusive)
454
+
455
+ Returns:
456
+ List of matching TaskRun objects
457
+
458
+ Raises:
459
+ KailashStorageError: If tasks cannot be queried
460
+ """
461
+ try:
462
+ # Get all tasks first
463
+ all_tasks = self.get_all_tasks()
464
+
465
+ # Apply filters
466
+ filtered_tasks = []
467
+ for task in all_tasks:
468
+ # Apply node_id filter
469
+ if node_id is not None and task.node_id != node_id:
470
+ continue
471
+
472
+ # Apply status filter
473
+ if status is not None and task.status != status:
474
+ continue
475
+
476
+ # Apply time filters
477
+ if started_after is not None and (
478
+ not task.started_at or task.started_at < started_after
479
+ ):
480
+ continue
481
+
482
+ if completed_before is not None and (
483
+ not task.ended_at or task.ended_at >= completed_before
484
+ ):
485
+ continue
486
+
487
+ filtered_tasks.append(task)
488
+
489
+ return filtered_tasks
490
+ except Exception as e:
491
+ raise KailashStorageError(f"Failed to query tasks: {e}") from e
492
+
493
+ def _get_index_file(self) -> Path:
494
+ """Get path to index file."""
495
+ return self.base_path / "index.json"
496
+
497
+ def _update_index(self, obj: Any) -> None:
498
+ """Update the index file with run or task information.
499
+
500
+ Args:
501
+ obj: WorkflowRun or TaskRun to index
502
+
503
+ Raises:
504
+ KailashStorageError: If index cannot be updated
505
+ """
506
+ try:
507
+ index_path = self._get_index_file()
508
+
509
+ # Load existing index
510
+ if index_path.exists():
511
+ try:
512
+ with open(index_path, "r") as f:
513
+ index = json.load(f)
514
+ except json.JSONDecodeError:
515
+ # Handle case where the file is empty or invalid
516
+ index = {"runs": {}, "tasks": {}}
517
+ else:
518
+ index = {"runs": {}, "tasks": {}}
519
+
520
+ # Update index based on object type
521
+ if isinstance(obj, WorkflowRun):
522
+ index["runs"][obj.run_id] = {
523
+ "workflow_name": obj.workflow_name,
524
+ "status": obj.status,
525
+ "started_at": obj.started_at.isoformat(),
526
+ "ended_at": obj.ended_at.isoformat() if obj.ended_at else None,
527
+ }
528
+ elif isinstance(obj, TaskRun):
529
+ index["tasks"][obj.task_id] = {
530
+ "run_id": obj.run_id,
531
+ "node_id": obj.node_id,
532
+ "status": obj.status,
533
+ "started_at": (
534
+ obj.started_at.isoformat() if obj.started_at else None
535
+ ),
536
+ "ended_at": obj.ended_at.isoformat() if obj.ended_at else None,
537
+ }
538
+
539
+ # Save index
540
+ with open(index_path, "w") as f:
541
+ json.dump(index, f, indent=2)
542
+ except Exception as e:
543
+ raise KailashStorageError(f"Failed to update index: {e}") from e
File without changes