kailash 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/client/__init__.py +12 -0
  3. kailash/client/enhanced_client.py +306 -0
  4. kailash/core/actors/__init__.py +16 -0
  5. kailash/core/actors/connection_actor.py +566 -0
  6. kailash/core/actors/supervisor.py +364 -0
  7. kailash/edge/__init__.py +16 -0
  8. kailash/edge/compliance.py +834 -0
  9. kailash/edge/discovery.py +659 -0
  10. kailash/edge/location.py +582 -0
  11. kailash/gateway/__init__.py +33 -0
  12. kailash/gateway/api.py +289 -0
  13. kailash/gateway/enhanced_gateway.py +357 -0
  14. kailash/gateway/resource_resolver.py +217 -0
  15. kailash/gateway/security.py +227 -0
  16. kailash/middleware/auth/models.py +2 -2
  17. kailash/middleware/database/base_models.py +1 -7
  18. kailash/middleware/gateway/__init__.py +22 -0
  19. kailash/middleware/gateway/checkpoint_manager.py +398 -0
  20. kailash/middleware/gateway/deduplicator.py +382 -0
  21. kailash/middleware/gateway/durable_gateway.py +417 -0
  22. kailash/middleware/gateway/durable_request.py +498 -0
  23. kailash/middleware/gateway/event_store.py +459 -0
  24. kailash/nodes/admin/permission_check.py +817 -33
  25. kailash/nodes/admin/role_management.py +1242 -108
  26. kailash/nodes/admin/schema_manager.py +438 -0
  27. kailash/nodes/admin/user_management.py +1124 -1582
  28. kailash/nodes/code/__init__.py +8 -1
  29. kailash/nodes/code/async_python.py +1035 -0
  30. kailash/nodes/code/python.py +1 -0
  31. kailash/nodes/data/async_sql.py +9 -3
  32. kailash/nodes/data/sql.py +20 -11
  33. kailash/nodes/data/workflow_connection_pool.py +643 -0
  34. kailash/nodes/rag/__init__.py +1 -4
  35. kailash/resources/__init__.py +40 -0
  36. kailash/resources/factory.py +533 -0
  37. kailash/resources/health.py +319 -0
  38. kailash/resources/reference.py +288 -0
  39. kailash/resources/registry.py +392 -0
  40. kailash/runtime/async_local.py +711 -302
  41. kailash/testing/__init__.py +34 -0
  42. kailash/testing/async_test_case.py +353 -0
  43. kailash/testing/async_utils.py +345 -0
  44. kailash/testing/fixtures.py +458 -0
  45. kailash/testing/mock_registry.py +495 -0
  46. kailash/workflow/__init__.py +8 -0
  47. kailash/workflow/async_builder.py +621 -0
  48. kailash/workflow/async_patterns.py +766 -0
  49. kailash/workflow/cyclic_runner.py +107 -16
  50. kailash/workflow/graph.py +7 -2
  51. kailash/workflow/resilience.py +11 -1
  52. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/METADATA +7 -4
  53. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/RECORD +57 -22
  54. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/WHEEL +0 -0
  55. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/entry_points.txt +0 -0
  56. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/licenses/LICENSE +0 -0
  57. {kailash-0.5.0.dist-info → kailash-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,398 @@
1
+ """Checkpoint management with tiered storage and compression.
2
+
3
+ This module provides:
4
+ - Checkpoint creation and restoration
5
+ - Tiered storage (memory/disk/cloud)
6
+ - Automatic compression
7
+ - Garbage collection
8
+ """
9
+
10
+ import asyncio
11
+ import datetime as dt
12
+ import gzip
13
+ import json
14
+ import logging
15
+ import os
16
+ import time
17
+ from collections import OrderedDict
18
+ from datetime import datetime, timedelta
19
+ from pathlib import Path
20
+ from typing import Any, Dict, List, Optional, Protocol
21
+
22
+ from .durable_request import Checkpoint
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class StorageBackend(Protocol):
28
+ """Protocol for checkpoint storage backends."""
29
+
30
+ async def save(self, key: str, data: bytes) -> None:
31
+ """Save data to storage."""
32
+ ...
33
+
34
+ async def load(self, key: str) -> Optional[bytes]:
35
+ """Load data from storage."""
36
+ ...
37
+
38
+ async def delete(self, key: str) -> None:
39
+ """Delete data from storage."""
40
+ ...
41
+
42
+ async def list_keys(self, prefix: str) -> List[str]:
43
+ """List keys with prefix."""
44
+ ...
45
+
46
+
47
+ class MemoryStorage:
48
+ """In-memory storage backend with LRU eviction."""
49
+
50
+ def __init__(self, max_size_mb: int = 100):
51
+ self.max_size_bytes = max_size_mb * 1024 * 1024
52
+ self.data: OrderedDict[str, bytes] = OrderedDict()
53
+ self.current_size = 0
54
+ self._lock = asyncio.Lock()
55
+
56
+ async def save(self, key: str, data: bytes) -> None:
57
+ """Save to memory with LRU eviction."""
58
+ async with self._lock:
59
+ # Remove if exists to update position
60
+ if key in self.data:
61
+ self.current_size -= len(self.data[key])
62
+ del self.data[key]
63
+
64
+ # Evict oldest entries if needed
65
+ while self.current_size + len(data) > self.max_size_bytes and self.data:
66
+ evicted_key, evicted_data = self.data.popitem(last=False)
67
+ self.current_size -= len(evicted_data)
68
+ logger.debug(f"Evicted {evicted_key} from memory storage")
69
+
70
+ # Add new data
71
+ self.data[key] = data
72
+ self.current_size += len(data)
73
+
74
+ async def load(self, key: str) -> Optional[bytes]:
75
+ """Load from memory."""
76
+ async with self._lock:
77
+ if key in self.data:
78
+ # Move to end (most recently used)
79
+ self.data.move_to_end(key)
80
+ return self.data[key]
81
+ return None
82
+
83
+ async def delete(self, key: str) -> None:
84
+ """Delete from memory."""
85
+ async with self._lock:
86
+ if key in self.data:
87
+ self.current_size -= len(self.data[key])
88
+ del self.data[key]
89
+
90
+ async def list_keys(self, prefix: str) -> List[str]:
91
+ """List keys with prefix."""
92
+ async with self._lock:
93
+ return [k for k in self.data.keys() if k.startswith(prefix)]
94
+
95
+
96
+ class DiskStorage:
97
+ """Disk-based storage backend."""
98
+
99
+ def __init__(self, base_path: str = "/tmp/kailash_checkpoints"):
100
+ self.base_path = Path(base_path)
101
+ self.base_path.mkdir(parents=True, exist_ok=True)
102
+ self._lock = asyncio.Lock()
103
+
104
+ def _get_path(self, key: str) -> Path:
105
+ """Get file path for key."""
106
+ # Use subdirectories to avoid too many files in one directory
107
+ parts = key.split("_")
108
+ if len(parts) >= 2:
109
+ subdir = self.base_path / parts[0]
110
+ subdir.mkdir(exist_ok=True)
111
+ return subdir / f"{key}.ckpt"
112
+ return self.base_path / f"{key}.ckpt"
113
+
114
+ async def save(self, key: str, data: bytes) -> None:
115
+ """Save to disk."""
116
+ path = self._get_path(key)
117
+
118
+ # Write atomically
119
+ temp_path = path.with_suffix(".tmp")
120
+ try:
121
+ await asyncio.get_event_loop().run_in_executor(
122
+ None, lambda: temp_path.write_bytes(data)
123
+ )
124
+
125
+ # Atomic rename
126
+ await asyncio.get_event_loop().run_in_executor(
127
+ None, lambda: temp_path.rename(path)
128
+ )
129
+ except Exception as e:
130
+ logger.error(f"Failed to save checkpoint to disk: {e}")
131
+ if temp_path.exists():
132
+ temp_path.unlink()
133
+ raise
134
+
135
+ async def load(self, key: str) -> Optional[bytes]:
136
+ """Load from disk."""
137
+ path = self._get_path(key)
138
+
139
+ if not path.exists():
140
+ return None
141
+
142
+ try:
143
+ return await asyncio.get_event_loop().run_in_executor(None, path.read_bytes)
144
+ except Exception as e:
145
+ logger.error(f"Failed to load checkpoint from disk: {e}")
146
+ return None
147
+
148
+ async def delete(self, key: str) -> None:
149
+ """Delete from disk."""
150
+ path = self._get_path(key)
151
+
152
+ if path.exists():
153
+ await asyncio.get_event_loop().run_in_executor(None, path.unlink)
154
+
155
+ async def list_keys(self, prefix: str) -> List[str]:
156
+ """List keys with prefix."""
157
+ keys = []
158
+
159
+ for path in self.base_path.rglob("*.ckpt"):
160
+ key = path.stem
161
+ if key.startswith(prefix):
162
+ keys.append(key)
163
+
164
+ return keys
165
+
166
+
167
+ class CheckpointManager:
168
+ """Manages checkpoints with tiered storage and compression."""
169
+
170
+ def __init__(
171
+ self,
172
+ memory_storage: Optional[MemoryStorage] = None,
173
+ disk_storage: Optional[DiskStorage] = None,
174
+ cloud_storage: Optional[StorageBackend] = None,
175
+ compression_enabled: bool = True,
176
+ compression_threshold_bytes: int = 1024, # 1KB
177
+ retention_hours: int = 24,
178
+ ):
179
+ """Initialize checkpoint manager."""
180
+ self.memory_storage = memory_storage or MemoryStorage()
181
+ self.disk_storage = disk_storage or DiskStorage()
182
+ self.cloud_storage = cloud_storage # Optional cloud backend
183
+ self.compression_enabled = compression_enabled
184
+ self.compression_threshold = compression_threshold_bytes
185
+ self.retention_hours = retention_hours
186
+
187
+ # Metrics
188
+ self.save_count = 0
189
+ self.load_count = 0
190
+ self.compression_ratio_sum = 0.0
191
+
192
+ # Start garbage collection task
193
+ self._gc_task = asyncio.create_task(self._garbage_collection_loop())
194
+
195
+ async def save_checkpoint(self, checkpoint: Checkpoint) -> None:
196
+ """Save checkpoint to storage."""
197
+ start_time = time.time()
198
+
199
+ # Serialize checkpoint
200
+ data = json.dumps(checkpoint.to_dict()).encode("utf-8")
201
+ original_size = len(data)
202
+
203
+ # Compress if enabled and beneficial
204
+ compression_ratio = 1.0 # Default to no compression
205
+ if self.compression_enabled and original_size > self.compression_threshold:
206
+ compressed = gzip.compress(data, compresslevel=6)
207
+ if len(compressed) < original_size:
208
+ data = compressed
209
+ compression_ratio = len(compressed) / original_size
210
+ logger.debug(
211
+ f"Compressed checkpoint {checkpoint.checkpoint_id}: "
212
+ f"{original_size} -> {len(data)} bytes ({compression_ratio:.2f})"
213
+ )
214
+
215
+ # Always update compression ratio sum
216
+ self.compression_ratio_sum += compression_ratio
217
+
218
+ # Save to tiered storage
219
+ key = checkpoint.checkpoint_id
220
+
221
+ # Always save to memory for fast access
222
+ await self.memory_storage.save(key, data)
223
+
224
+ # Save to disk for durability
225
+ await self.disk_storage.save(key, data)
226
+
227
+ # Save to cloud if available (async, don't wait)
228
+ if self.cloud_storage:
229
+ asyncio.create_task(self._save_to_cloud(key, data))
230
+
231
+ self.save_count += 1
232
+ duration_ms = (time.time() - start_time) * 1000
233
+
234
+ logger.info(
235
+ f"Saved checkpoint {checkpoint.checkpoint_id} "
236
+ f"({len(data)} bytes) in {duration_ms:.1f}ms"
237
+ )
238
+
239
+ async def load_checkpoint(self, checkpoint_id: str) -> Optional[Checkpoint]:
240
+ """Load checkpoint from storage."""
241
+ start_time = time.time()
242
+
243
+ # Try memory first (fastest)
244
+ data = await self.memory_storage.load(checkpoint_id)
245
+ source = "memory"
246
+
247
+ # Try disk if not in memory
248
+ if not data:
249
+ data = await self.disk_storage.load(checkpoint_id)
250
+ source = "disk"
251
+
252
+ # Promote to memory if found
253
+ if data:
254
+ await self.memory_storage.save(checkpoint_id, data)
255
+
256
+ # Try cloud as last resort
257
+ if not data and self.cloud_storage:
258
+ data = await self.cloud_storage.load(checkpoint_id)
259
+ source = "cloud"
260
+
261
+ # Promote to memory and disk if found
262
+ if data:
263
+ await self.memory_storage.save(checkpoint_id, data)
264
+ await self.disk_storage.save(checkpoint_id, data)
265
+
266
+ if not data:
267
+ logger.warning(f"Checkpoint {checkpoint_id} not found")
268
+ return None
269
+
270
+ # Decompress if needed
271
+ try:
272
+ # Try to decompress first
273
+ decompressed = gzip.decompress(data)
274
+ data = decompressed
275
+ except:
276
+ # Not compressed or decompression failed
277
+ pass
278
+
279
+ # Deserialize
280
+ try:
281
+ checkpoint_dict = json.loads(data.decode("utf-8"))
282
+ checkpoint = Checkpoint.from_dict(checkpoint_dict)
283
+
284
+ self.load_count += 1
285
+ duration_ms = (time.time() - start_time) * 1000
286
+
287
+ logger.info(
288
+ f"Loaded checkpoint {checkpoint_id} from {source} "
289
+ f"in {duration_ms:.1f}ms"
290
+ )
291
+
292
+ return checkpoint
293
+
294
+ except Exception as e:
295
+ logger.error(f"Failed to deserialize checkpoint {checkpoint_id}: {e}")
296
+ return None
297
+
298
+ async def load_latest_checkpoint(self, request_id: str) -> Optional[Checkpoint]:
299
+ """Load the latest checkpoint for a request."""
300
+ # List all checkpoints for request
301
+ prefix = f"ckpt_{request_id}"
302
+
303
+ # Check all storage tiers
304
+ all_keys = set()
305
+ all_keys.update(await self.memory_storage.list_keys(prefix))
306
+ all_keys.update(await self.disk_storage.list_keys(prefix))
307
+ if self.cloud_storage:
308
+ all_keys.update(await self.cloud_storage.list_keys(prefix))
309
+
310
+ if not all_keys:
311
+ return None
312
+
313
+ # Load all checkpoints and find latest by sequence
314
+ checkpoints = []
315
+ for key in all_keys:
316
+ checkpoint = await self.load_checkpoint(key)
317
+ if checkpoint and checkpoint.request_id == request_id:
318
+ checkpoints.append(checkpoint)
319
+
320
+ if not checkpoints:
321
+ return None
322
+
323
+ # Return checkpoint with highest sequence number
324
+ return max(checkpoints, key=lambda c: c.sequence)
325
+
326
+ async def delete_checkpoint(self, checkpoint_id: str) -> None:
327
+ """Delete checkpoint from all storage tiers."""
328
+ await self.memory_storage.delete(checkpoint_id)
329
+ await self.disk_storage.delete(checkpoint_id)
330
+ if self.cloud_storage:
331
+ await self.cloud_storage.delete(checkpoint_id)
332
+
333
+ logger.info(f"Deleted checkpoint {checkpoint_id}")
334
+
335
+ async def _save_to_cloud(self, key: str, data: bytes) -> None:
336
+ """Save to cloud storage asynchronously."""
337
+ try:
338
+ await self.cloud_storage.save(key, data)
339
+ logger.debug(f"Saved checkpoint {key} to cloud storage")
340
+ except Exception as e:
341
+ logger.error(f"Failed to save checkpoint {key} to cloud: {e}")
342
+
343
+ async def _garbage_collection_loop(self) -> None:
344
+ """Periodically clean up old checkpoints."""
345
+ while True:
346
+ try:
347
+ await asyncio.sleep(3600) # Run every hour
348
+ await self._garbage_collection()
349
+ except asyncio.CancelledError:
350
+ break
351
+ except Exception as e:
352
+ logger.error(f"Garbage collection error: {e}")
353
+
354
+ async def _garbage_collection(self) -> None:
355
+ """Clean up old checkpoints."""
356
+ cutoff_time = datetime.now(dt.UTC) - timedelta(hours=self.retention_hours)
357
+ deleted_count = 0
358
+
359
+ # Get all checkpoint keys from disk (most complete list)
360
+ all_keys = await self.disk_storage.list_keys("ckpt_")
361
+
362
+ for key in all_keys:
363
+ checkpoint = await self.load_checkpoint(key)
364
+ if checkpoint:
365
+ # Handle both timezone-aware and naive datetimes
366
+ checkpoint_time = checkpoint.created_at
367
+ if checkpoint_time.tzinfo is None:
368
+ # Assume naive datetime is UTC
369
+ checkpoint_time = checkpoint_time.replace(tzinfo=dt.UTC)
370
+
371
+ if checkpoint_time < cutoff_time:
372
+ await self.delete_checkpoint(key)
373
+ deleted_count += 1
374
+
375
+ if deleted_count > 0:
376
+ logger.info(f"Garbage collection deleted {deleted_count} old checkpoints")
377
+
378
+ def get_stats(self) -> Dict[str, Any]:
379
+ """Get checkpoint manager statistics."""
380
+ avg_compression_ratio = (
381
+ self.compression_ratio_sum / self.save_count if self.save_count > 0 else 1.0
382
+ )
383
+
384
+ return {
385
+ "save_count": self.save_count,
386
+ "load_count": self.load_count,
387
+ "avg_compression_ratio": avg_compression_ratio,
388
+ "compression_enabled": self.compression_enabled,
389
+ "retention_hours": self.retention_hours,
390
+ }
391
+
392
+ async def close(self) -> None:
393
+ """Close checkpoint manager and cleanup."""
394
+ self._gc_task.cancel()
395
+ try:
396
+ await self._gc_task
397
+ except asyncio.CancelledError:
398
+ pass