kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/semantic_memory.py +2 -2
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +230 -4
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,673 @@
|
|
1
|
+
"""Edge state management for distributed stateful operations."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import hashlib
|
5
|
+
from datetime import UTC, datetime, timedelta
|
6
|
+
from enum import Enum
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Set
|
8
|
+
|
9
|
+
from kailash.edge.location import EdgeLocation
|
10
|
+
from kailash.nodes.base import NodeParameter, register_node
|
11
|
+
|
12
|
+
from .base import EdgeNode
|
13
|
+
|
14
|
+
|
15
|
+
class StateOperation(Enum):
|
16
|
+
"""Operations for state management."""
|
17
|
+
|
18
|
+
GET = "get"
|
19
|
+
SET = "set"
|
20
|
+
UPDATE = "update"
|
21
|
+
DELETE = "delete"
|
22
|
+
INCREMENT = "increment"
|
23
|
+
APPEND = "append"
|
24
|
+
LOCK = "lock"
|
25
|
+
UNLOCK = "unlock"
|
26
|
+
|
27
|
+
|
28
|
+
@register_node()
|
29
|
+
class EdgeStateMachine(EdgeNode):
|
30
|
+
"""Distributed state machine with global uniqueness guarantees.
|
31
|
+
|
32
|
+
Similar to Cloudflare Durable Objects - ensures single instance
|
33
|
+
globally for a given state ID with automatic edge affinity.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
37
|
+
"""Get node parameters."""
|
38
|
+
return {
|
39
|
+
"state_id": NodeParameter(
|
40
|
+
name="state_id",
|
41
|
+
type=str,
|
42
|
+
required=True,
|
43
|
+
description="Unique identifier for this state instance",
|
44
|
+
),
|
45
|
+
"operation": NodeParameter(
|
46
|
+
name="operation",
|
47
|
+
type=str,
|
48
|
+
default="get",
|
49
|
+
required=False,
|
50
|
+
description="State operation (get|set|update|delete|increment|append|lock|unlock)",
|
51
|
+
),
|
52
|
+
"key": NodeParameter(
|
53
|
+
name="key",
|
54
|
+
type=str,
|
55
|
+
required=False,
|
56
|
+
description="State key for operations",
|
57
|
+
),
|
58
|
+
"value": NodeParameter(
|
59
|
+
name="value",
|
60
|
+
type=object, # Can be any type
|
61
|
+
required=False,
|
62
|
+
description="Value to set/append",
|
63
|
+
),
|
64
|
+
"update_fn": NodeParameter(
|
65
|
+
name="update_fn",
|
66
|
+
type=object, # Will be validated as callable
|
67
|
+
required=False,
|
68
|
+
description="Update function for update operations",
|
69
|
+
),
|
70
|
+
"increment": NodeParameter(
|
71
|
+
name="increment",
|
72
|
+
type=int,
|
73
|
+
default=1,
|
74
|
+
required=False,
|
75
|
+
description="Amount to increment by",
|
76
|
+
),
|
77
|
+
"lock_name": NodeParameter(
|
78
|
+
name="lock_name",
|
79
|
+
type=str,
|
80
|
+
required=False,
|
81
|
+
description="Name of lock to acquire/release",
|
82
|
+
),
|
83
|
+
"timeout_ms": NodeParameter(
|
84
|
+
name="timeout_ms",
|
85
|
+
type=int,
|
86
|
+
default=30000,
|
87
|
+
required=False,
|
88
|
+
description="Lock timeout in milliseconds",
|
89
|
+
),
|
90
|
+
"lease_duration_ms": NodeParameter(
|
91
|
+
name="lease_duration_ms",
|
92
|
+
type=int,
|
93
|
+
default=30000,
|
94
|
+
required=False,
|
95
|
+
description="Lease duration for global lock (ms)",
|
96
|
+
),
|
97
|
+
"enable_persistence": NodeParameter(
|
98
|
+
name="enable_persistence",
|
99
|
+
type=bool,
|
100
|
+
default=True,
|
101
|
+
required=False,
|
102
|
+
description="Whether to persist state to durable storage",
|
103
|
+
),
|
104
|
+
"enable_replication": NodeParameter(
|
105
|
+
name="enable_replication",
|
106
|
+
type=bool,
|
107
|
+
default=True,
|
108
|
+
required=False,
|
109
|
+
description="Whether to replicate state for availability",
|
110
|
+
),
|
111
|
+
}
|
112
|
+
|
113
|
+
# Class-level registry for global uniqueness
|
114
|
+
_global_instances: Dict[str, "EdgeStateMachine"] = {}
|
115
|
+
_global_locks: Dict[str, Dict[str, Any]] = {}
|
116
|
+
|
117
|
+
def __init__(self, **config):
|
118
|
+
"""Initialize edge state machine."""
|
119
|
+
self.state_id = config.get("state_id")
|
120
|
+
if not self.state_id:
|
121
|
+
raise ValueError("state_id is required for EdgeStateMachine")
|
122
|
+
|
123
|
+
super().__init__(**config)
|
124
|
+
|
125
|
+
# Instance state
|
126
|
+
self.state_data: Dict[str, Any] = {}
|
127
|
+
self.state_metadata: Dict[str, Any] = {
|
128
|
+
"created_at": datetime.now(UTC).isoformat(),
|
129
|
+
"version": 0,
|
130
|
+
"last_modified": datetime.now(UTC).isoformat(),
|
131
|
+
"access_count": 0,
|
132
|
+
}
|
133
|
+
|
134
|
+
# Locks and leases
|
135
|
+
self.local_locks: Set[str] = set()
|
136
|
+
self.lease_expiry: Optional[datetime] = None
|
137
|
+
|
138
|
+
# Replication tracking
|
139
|
+
self.replica_edges: List[EdgeLocation] = []
|
140
|
+
self.is_primary = False
|
141
|
+
self._background_tasks: List[asyncio.Task] = []
|
142
|
+
|
143
|
+
async def initialize(self):
|
144
|
+
"""Initialize with global uniqueness check."""
|
145
|
+
# Initialize parent edge infrastructure
|
146
|
+
await super().initialize()
|
147
|
+
|
148
|
+
# Ensure single global instance
|
149
|
+
await self._ensure_single_instance()
|
150
|
+
|
151
|
+
# Load persisted state if exists
|
152
|
+
if self.config.get("enable_persistence", True):
|
153
|
+
await self._load_persisted_state()
|
154
|
+
|
155
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
156
|
+
"""Execute state operation."""
|
157
|
+
operation = StateOperation(kwargs.get("operation", "get"))
|
158
|
+
|
159
|
+
# Check if we're still the primary instance
|
160
|
+
if not await self._verify_primary_status():
|
161
|
+
# Redirect to current primary
|
162
|
+
primary_edge = await self._find_primary_instance()
|
163
|
+
return {
|
164
|
+
"success": False,
|
165
|
+
"redirect": True,
|
166
|
+
"primary_edge": primary_edge.name if primary_edge else None,
|
167
|
+
"message": "State instance has moved to different edge",
|
168
|
+
}
|
169
|
+
|
170
|
+
# Update access metadata
|
171
|
+
self.state_metadata["access_count"] += 1
|
172
|
+
self.state_metadata["last_accessed"] = datetime.now(UTC).isoformat()
|
173
|
+
|
174
|
+
# Handle operation
|
175
|
+
if operation == StateOperation.GET:
|
176
|
+
return await self._handle_get(kwargs)
|
177
|
+
elif operation == StateOperation.SET:
|
178
|
+
return await self._handle_set(kwargs)
|
179
|
+
elif operation == StateOperation.UPDATE:
|
180
|
+
return await self._handle_update(kwargs)
|
181
|
+
elif operation == StateOperation.DELETE:
|
182
|
+
return await self._handle_delete(kwargs)
|
183
|
+
elif operation == StateOperation.INCREMENT:
|
184
|
+
return await self._handle_increment(kwargs)
|
185
|
+
elif operation == StateOperation.APPEND:
|
186
|
+
return await self._handle_append(kwargs)
|
187
|
+
elif operation == StateOperation.LOCK:
|
188
|
+
return await self._handle_lock(kwargs)
|
189
|
+
elif operation == StateOperation.UNLOCK:
|
190
|
+
return await self._handle_unlock(kwargs)
|
191
|
+
else:
|
192
|
+
raise ValueError(f"Unknown operation: {operation}")
|
193
|
+
|
194
|
+
async def _ensure_single_instance(self):
|
195
|
+
"""Ensure only one instance exists globally for this state_id."""
|
196
|
+
# Try to acquire global lock
|
197
|
+
lock_acquired = await self._acquire_global_lock()
|
198
|
+
|
199
|
+
if not lock_acquired:
|
200
|
+
# Another instance exists
|
201
|
+
existing_edge = await self._find_primary_instance()
|
202
|
+
if existing_edge:
|
203
|
+
raise RuntimeError(
|
204
|
+
f"State instance {self.state_id} already exists "
|
205
|
+
f"on edge {existing_edge.name}"
|
206
|
+
)
|
207
|
+
|
208
|
+
# Register as global instance
|
209
|
+
EdgeStateMachine._global_instances[self.state_id] = self
|
210
|
+
self.is_primary = True
|
211
|
+
|
212
|
+
# Set edge affinity for this state
|
213
|
+
self._set_edge_affinity()
|
214
|
+
|
215
|
+
async def _acquire_global_lock(self) -> bool:
|
216
|
+
"""Acquire global lock for state_id."""
|
217
|
+
lock_key = f"state:{self.state_id}"
|
218
|
+
|
219
|
+
# Check if lock exists
|
220
|
+
if lock_key in EdgeStateMachine._global_locks:
|
221
|
+
lock_info = EdgeStateMachine._global_locks[lock_key]
|
222
|
+
|
223
|
+
# Check if lock expired
|
224
|
+
if datetime.now(UTC) < lock_info["expiry"]:
|
225
|
+
return False
|
226
|
+
|
227
|
+
# Acquire lock
|
228
|
+
lease_duration_ms = self.config.get("lease_duration_ms", 30000)
|
229
|
+
expiry = datetime.now(UTC) + timedelta(milliseconds=lease_duration_ms)
|
230
|
+
|
231
|
+
EdgeStateMachine._global_locks[lock_key] = {
|
232
|
+
"owner": self.current_edge.name if self.current_edge else "unknown",
|
233
|
+
"expiry": expiry,
|
234
|
+
"state_id": self.state_id,
|
235
|
+
}
|
236
|
+
|
237
|
+
self.lease_expiry = expiry
|
238
|
+
|
239
|
+
# Start lease renewal task
|
240
|
+
self._lease_renewal_task = asyncio.create_task(self._renew_lease())
|
241
|
+
|
242
|
+
return True
|
243
|
+
|
244
|
+
async def _renew_lease(self):
|
245
|
+
"""Periodically renew global lock lease."""
|
246
|
+
lease_duration_ms = self.config.get("lease_duration_ms", 30000)
|
247
|
+
renewal_interval = lease_duration_ms * 0.5 / 1000 # Renew at 50%
|
248
|
+
|
249
|
+
while self.is_primary:
|
250
|
+
await asyncio.sleep(renewal_interval)
|
251
|
+
|
252
|
+
if self.is_primary and self.lease_expiry:
|
253
|
+
# Extend lease
|
254
|
+
self.lease_expiry = datetime.now(UTC) + timedelta(
|
255
|
+
milliseconds=lease_duration_ms
|
256
|
+
)
|
257
|
+
|
258
|
+
lock_key = f"state:{self.state_id}"
|
259
|
+
if lock_key in EdgeStateMachine._global_locks:
|
260
|
+
EdgeStateMachine._global_locks[lock_key][
|
261
|
+
"expiry"
|
262
|
+
] = self.lease_expiry
|
263
|
+
|
264
|
+
def _set_edge_affinity(self):
|
265
|
+
"""Set edge affinity based on state_id hash."""
|
266
|
+
# Use consistent hashing to determine preferred edge
|
267
|
+
state_hash = hashlib.md5(self.state_id.encode()).hexdigest()
|
268
|
+
hash_value = int(state_hash[:8], 16)
|
269
|
+
|
270
|
+
# Get all edges and sort by name for consistency
|
271
|
+
all_edges = sorted(self.edge_discovery.get_all_edges(), key=lambda e: e.name)
|
272
|
+
|
273
|
+
if all_edges:
|
274
|
+
# Select edge based on hash
|
275
|
+
preferred_index = hash_value % len(all_edges)
|
276
|
+
self.preferred_locations = [all_edges[preferred_index].name]
|
277
|
+
|
278
|
+
async def _find_primary_instance(self) -> Optional[EdgeLocation]:
|
279
|
+
"""Find which edge hosts the primary instance."""
|
280
|
+
# In production, this would query a distributed registry
|
281
|
+
lock_key = f"state:{self.state_id}"
|
282
|
+
|
283
|
+
if lock_key in EdgeStateMachine._global_locks:
|
284
|
+
lock_info = EdgeStateMachine._global_locks[lock_key]
|
285
|
+
edge_name = lock_info.get("owner")
|
286
|
+
|
287
|
+
if edge_name:
|
288
|
+
return self.edge_discovery.get_edge(edge_name)
|
289
|
+
|
290
|
+
return None
|
291
|
+
|
292
|
+
async def _verify_primary_status(self) -> bool:
|
293
|
+
"""Verify we're still the primary instance."""
|
294
|
+
if not self.is_primary:
|
295
|
+
return False
|
296
|
+
|
297
|
+
# Check if lease is still valid
|
298
|
+
if self.lease_expiry and datetime.now(UTC) > self.lease_expiry:
|
299
|
+
self.is_primary = False
|
300
|
+
return False
|
301
|
+
|
302
|
+
return True
|
303
|
+
|
304
|
+
async def _handle_get(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
305
|
+
"""Handle GET operation."""
|
306
|
+
key = params.get("key")
|
307
|
+
|
308
|
+
if key:
|
309
|
+
# Get specific key
|
310
|
+
value = self.state_data.get(key)
|
311
|
+
return {
|
312
|
+
"success": True,
|
313
|
+
"key": key,
|
314
|
+
"value": value,
|
315
|
+
"exists": key in self.state_data,
|
316
|
+
"metadata": self.state_metadata,
|
317
|
+
}
|
318
|
+
else:
|
319
|
+
# Get entire state
|
320
|
+
return {
|
321
|
+
"success": True,
|
322
|
+
"state": self.state_data.copy(),
|
323
|
+
"metadata": self.state_metadata,
|
324
|
+
}
|
325
|
+
|
326
|
+
async def _handle_set(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
327
|
+
"""Handle SET operation."""
|
328
|
+
key = params.get("key")
|
329
|
+
value = params.get("value")
|
330
|
+
|
331
|
+
if not key:
|
332
|
+
raise ValueError("SET requires 'key'")
|
333
|
+
|
334
|
+
# Update state
|
335
|
+
old_value = self.state_data.get(key)
|
336
|
+
self.state_data[key] = value
|
337
|
+
|
338
|
+
# Update metadata
|
339
|
+
self.state_metadata["version"] += 1
|
340
|
+
self.state_metadata["last_modified"] = datetime.now(UTC).isoformat()
|
341
|
+
|
342
|
+
# Persist if enabled
|
343
|
+
if self.config.get("enable_persistence", True):
|
344
|
+
await self._persist_state()
|
345
|
+
|
346
|
+
# Replicate if enabled
|
347
|
+
if self.config.get("enable_replication", True):
|
348
|
+
task = asyncio.create_task(self._replicate_state())
|
349
|
+
self._background_tasks.append(task)
|
350
|
+
|
351
|
+
return {
|
352
|
+
"success": True,
|
353
|
+
"key": key,
|
354
|
+
"old_value": old_value,
|
355
|
+
"new_value": value,
|
356
|
+
"version": self.state_metadata["version"],
|
357
|
+
}
|
358
|
+
|
359
|
+
async def _handle_update(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
360
|
+
"""Handle UPDATE operation with function."""
|
361
|
+
key = params.get("key")
|
362
|
+
update_fn = params.get("update_fn")
|
363
|
+
|
364
|
+
if not key or not callable(update_fn):
|
365
|
+
raise ValueError("UPDATE requires 'key' and callable 'update_fn'")
|
366
|
+
|
367
|
+
# Get current value
|
368
|
+
current_value = self.state_data.get(key)
|
369
|
+
|
370
|
+
# Apply update function
|
371
|
+
try:
|
372
|
+
new_value = update_fn(current_value)
|
373
|
+
except Exception as e:
|
374
|
+
return {"success": False, "error": f"Update function failed: {str(e)}"}
|
375
|
+
|
376
|
+
# Update state
|
377
|
+
self.state_data[key] = new_value
|
378
|
+
|
379
|
+
# Update metadata
|
380
|
+
self.state_metadata["version"] += 1
|
381
|
+
self.state_metadata["last_modified"] = datetime.now(UTC).isoformat()
|
382
|
+
|
383
|
+
# Persist and replicate
|
384
|
+
if self.config.get("enable_persistence", True):
|
385
|
+
await self._persist_state()
|
386
|
+
|
387
|
+
if self.config.get("enable_replication", True):
|
388
|
+
task = asyncio.create_task(self._replicate_state())
|
389
|
+
self._background_tasks.append(task)
|
390
|
+
|
391
|
+
return {
|
392
|
+
"success": True,
|
393
|
+
"key": key,
|
394
|
+
"old_value": current_value,
|
395
|
+
"new_value": new_value,
|
396
|
+
"version": self.state_metadata["version"],
|
397
|
+
}
|
398
|
+
|
399
|
+
async def _handle_delete(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
400
|
+
"""Handle DELETE operation."""
|
401
|
+
key = params.get("key")
|
402
|
+
|
403
|
+
if not key:
|
404
|
+
raise ValueError("DELETE requires 'key'")
|
405
|
+
|
406
|
+
# Delete from state
|
407
|
+
old_value = self.state_data.pop(key, None)
|
408
|
+
|
409
|
+
# Update metadata
|
410
|
+
self.state_metadata["version"] += 1
|
411
|
+
self.state_metadata["last_modified"] = datetime.now(UTC).isoformat()
|
412
|
+
|
413
|
+
# Persist and replicate
|
414
|
+
if self.config.get("enable_persistence", True):
|
415
|
+
await self._persist_state()
|
416
|
+
|
417
|
+
if self.config.get("enable_replication", True):
|
418
|
+
task = asyncio.create_task(self._replicate_state())
|
419
|
+
self._background_tasks.append(task)
|
420
|
+
|
421
|
+
return {
|
422
|
+
"success": True,
|
423
|
+
"key": key,
|
424
|
+
"deleted": old_value is not None,
|
425
|
+
"old_value": old_value,
|
426
|
+
"version": self.state_metadata["version"],
|
427
|
+
}
|
428
|
+
|
429
|
+
async def _handle_increment(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
430
|
+
"""Handle INCREMENT operation for numeric values."""
|
431
|
+
key = params.get("key")
|
432
|
+
increment = params.get("increment", 1)
|
433
|
+
|
434
|
+
if not key:
|
435
|
+
raise ValueError("INCREMENT requires 'key'")
|
436
|
+
|
437
|
+
# Get current value
|
438
|
+
current_value = self.state_data.get(key, 0)
|
439
|
+
|
440
|
+
# Validate numeric
|
441
|
+
if not isinstance(current_value, (int, float)):
|
442
|
+
return {
|
443
|
+
"success": False,
|
444
|
+
"error": f"Cannot increment non-numeric value: {type(current_value)}",
|
445
|
+
}
|
446
|
+
|
447
|
+
# Increment
|
448
|
+
new_value = current_value + increment
|
449
|
+
self.state_data[key] = new_value
|
450
|
+
|
451
|
+
# Update metadata
|
452
|
+
self.state_metadata["version"] += 1
|
453
|
+
self.state_metadata["last_modified"] = datetime.now(UTC).isoformat()
|
454
|
+
|
455
|
+
# Persist and replicate
|
456
|
+
if self.config.get("enable_persistence", True):
|
457
|
+
await self._persist_state()
|
458
|
+
|
459
|
+
if self.config.get("enable_replication", True):
|
460
|
+
task = asyncio.create_task(self._replicate_state())
|
461
|
+
self._background_tasks.append(task)
|
462
|
+
|
463
|
+
return {
|
464
|
+
"success": True,
|
465
|
+
"key": key,
|
466
|
+
"old_value": current_value,
|
467
|
+
"new_value": new_value,
|
468
|
+
"increment": increment,
|
469
|
+
"version": self.state_metadata["version"],
|
470
|
+
}
|
471
|
+
|
472
|
+
async def _handle_append(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
473
|
+
"""Handle APPEND operation for list values."""
|
474
|
+
key = params.get("key")
|
475
|
+
value = params.get("value")
|
476
|
+
|
477
|
+
if not key:
|
478
|
+
raise ValueError("APPEND requires 'key'")
|
479
|
+
|
480
|
+
# Get current value
|
481
|
+
current_value = self.state_data.get(key, [])
|
482
|
+
|
483
|
+
# Ensure it's a list
|
484
|
+
if not isinstance(current_value, list):
|
485
|
+
return {
|
486
|
+
"success": False,
|
487
|
+
"error": f"Cannot append to non-list value: {type(current_value)}",
|
488
|
+
}
|
489
|
+
|
490
|
+
# Append
|
491
|
+
new_value = current_value + [value]
|
492
|
+
self.state_data[key] = new_value
|
493
|
+
|
494
|
+
# Update metadata
|
495
|
+
self.state_metadata["version"] += 1
|
496
|
+
self.state_metadata["last_modified"] = datetime.now(UTC).isoformat()
|
497
|
+
|
498
|
+
# Persist and replicate
|
499
|
+
if self.config.get("enable_persistence", True):
|
500
|
+
await self._persist_state()
|
501
|
+
|
502
|
+
if self.config.get("enable_replication", True):
|
503
|
+
task = asyncio.create_task(self._replicate_state())
|
504
|
+
self._background_tasks.append(task)
|
505
|
+
|
506
|
+
return {
|
507
|
+
"success": True,
|
508
|
+
"key": key,
|
509
|
+
"list_size": len(new_value),
|
510
|
+
"appended_value": value,
|
511
|
+
"version": self.state_metadata["version"],
|
512
|
+
}
|
513
|
+
|
514
|
+
async def _handle_lock(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
515
|
+
"""Handle LOCK operation for distributed locking."""
|
516
|
+
lock_name = params.get("lock_name")
|
517
|
+
timeout_ms = params.get("timeout_ms", 5000)
|
518
|
+
|
519
|
+
if not lock_name:
|
520
|
+
raise ValueError("LOCK requires 'lock_name'")
|
521
|
+
|
522
|
+
# Check if already locked
|
523
|
+
if lock_name in self.local_locks:
|
524
|
+
return {
|
525
|
+
"success": False,
|
526
|
+
"lock_name": lock_name,
|
527
|
+
"error": "Lock already held",
|
528
|
+
}
|
529
|
+
|
530
|
+
# Acquire lock
|
531
|
+
self.local_locks.add(lock_name)
|
532
|
+
|
533
|
+
# Set up auto-release
|
534
|
+
task = asyncio.create_task(self._auto_release_lock(lock_name, timeout_ms))
|
535
|
+
self._background_tasks.append(task)
|
536
|
+
|
537
|
+
return {
|
538
|
+
"success": True,
|
539
|
+
"lock_name": lock_name,
|
540
|
+
"timeout_ms": timeout_ms,
|
541
|
+
"holder": self.current_edge.name if self.current_edge else "unknown",
|
542
|
+
}
|
543
|
+
|
544
|
+
async def _handle_unlock(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
545
|
+
"""Handle UNLOCK operation."""
|
546
|
+
lock_name = params.get("lock_name")
|
547
|
+
|
548
|
+
if not lock_name:
|
549
|
+
raise ValueError("UNLOCK requires 'lock_name'")
|
550
|
+
|
551
|
+
# Release lock
|
552
|
+
released = lock_name in self.local_locks
|
553
|
+
self.local_locks.discard(lock_name)
|
554
|
+
|
555
|
+
return {"success": True, "lock_name": lock_name, "released": released}
|
556
|
+
|
557
|
+
async def _auto_release_lock(self, lock_name: str, timeout_ms: int):
|
558
|
+
"""Auto-release lock after timeout."""
|
559
|
+
await asyncio.sleep(timeout_ms / 1000)
|
560
|
+
self.local_locks.discard(lock_name)
|
561
|
+
|
562
|
+
async def _persist_state(self):
|
563
|
+
"""Persist state to durable storage."""
|
564
|
+
# In production, this would write to distributed storage
|
565
|
+
# For now, simulate with delay
|
566
|
+
await asyncio.sleep(0.01)
|
567
|
+
|
568
|
+
self.logger.debug(
|
569
|
+
f"Persisted state for {self.state_id} "
|
570
|
+
f"(version: {self.state_metadata['version']})"
|
571
|
+
)
|
572
|
+
|
573
|
+
async def _load_persisted_state(self):
|
574
|
+
"""Load state from durable storage."""
|
575
|
+
# In production, this would read from distributed storage
|
576
|
+
# For now, start with empty state
|
577
|
+
pass
|
578
|
+
|
579
|
+
async def _replicate_state(self):
|
580
|
+
"""Replicate state to backup edges."""
|
581
|
+
if not self.config.get("enable_replication", True):
|
582
|
+
return
|
583
|
+
|
584
|
+
# Select replica edges if not already done
|
585
|
+
if not self.replica_edges:
|
586
|
+
await self._select_replica_edges()
|
587
|
+
|
588
|
+
# Replicate to each edge
|
589
|
+
replication_tasks = []
|
590
|
+
for edge in self.replica_edges:
|
591
|
+
replication_tasks.append(self._replicate_to_edge(edge))
|
592
|
+
|
593
|
+
await asyncio.gather(*replication_tasks, return_exceptions=True)
|
594
|
+
|
595
|
+
async def _select_replica_edges(self):
|
596
|
+
"""Select edges for state replication."""
|
597
|
+
all_edges = self.edge_discovery.get_all_edges()
|
598
|
+
|
599
|
+
# Remove current edge
|
600
|
+
candidate_edges = [
|
601
|
+
e
|
602
|
+
for e in all_edges
|
603
|
+
if e.name != (self.current_edge.name if self.current_edge else None)
|
604
|
+
]
|
605
|
+
|
606
|
+
# Select based on different regions for availability
|
607
|
+
regions_seen = set()
|
608
|
+
for edge in candidate_edges:
|
609
|
+
if edge.region not in regions_seen:
|
610
|
+
self.replica_edges.append(edge)
|
611
|
+
regions_seen.add(edge.region)
|
612
|
+
|
613
|
+
if len(self.replica_edges) >= 2: # Keep 2 replicas
|
614
|
+
break
|
615
|
+
|
616
|
+
async def _replicate_to_edge(self, edge: EdgeLocation):
|
617
|
+
"""Replicate state to specific edge."""
|
618
|
+
# In production, this would use edge-to-edge communication
|
619
|
+
await asyncio.sleep(0.02) # Simulate replication
|
620
|
+
|
621
|
+
self.logger.debug(f"Replicated state {self.state_id} to edge {edge.name}")
|
622
|
+
|
623
|
+
async def migrate_to_edge(
|
624
|
+
self, target_edge: EdgeLocation, state_data: Optional[Dict[str, Any]] = None
|
625
|
+
) -> bool:
|
626
|
+
"""Migrate state machine to different edge."""
|
627
|
+
if not self.is_primary:
|
628
|
+
return False
|
629
|
+
|
630
|
+
try:
|
631
|
+
# Transfer primary status
|
632
|
+
self.is_primary = False
|
633
|
+
|
634
|
+
# Update global registry
|
635
|
+
lock_key = f"state:{self.state_id}"
|
636
|
+
if lock_key in EdgeStateMachine._global_locks:
|
637
|
+
EdgeStateMachine._global_locks[lock_key]["owner"] = target_edge.name
|
638
|
+
|
639
|
+
# Persist final state
|
640
|
+
await self._persist_state()
|
641
|
+
|
642
|
+
# Clean up
|
643
|
+
if self.state_id in EdgeStateMachine._global_instances:
|
644
|
+
del EdgeStateMachine._global_instances[self.state_id]
|
645
|
+
|
646
|
+
return True
|
647
|
+
|
648
|
+
except Exception as e:
|
649
|
+
self.logger.error(f"State migration failed: {e}")
|
650
|
+
self.is_primary = True # Restore primary status
|
651
|
+
return False
|
652
|
+
|
653
|
+
async def cleanup(self):
|
654
|
+
"""Cleanup resources including background tasks."""
|
655
|
+
# Cancel lease renewal task if running
|
656
|
+
if hasattr(self, "_lease_renewal_task") and self._lease_renewal_task:
|
657
|
+
self._lease_renewal_task.cancel()
|
658
|
+
try:
|
659
|
+
await self._lease_renewal_task
|
660
|
+
except asyncio.CancelledError:
|
661
|
+
pass
|
662
|
+
|
663
|
+
# Cancel all background tasks
|
664
|
+
for task in self._background_tasks:
|
665
|
+
if not task.done():
|
666
|
+
task.cancel()
|
667
|
+
|
668
|
+
# Wait for all tasks to complete
|
669
|
+
if self._background_tasks:
|
670
|
+
await asyncio.gather(*self._background_tasks, return_exceptions=True)
|
671
|
+
|
672
|
+
# Mark as not primary to stop renewal loop
|
673
|
+
self.is_primary = False
|