smartify-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartify/__init__.py +3 -0
- smartify/agents/__init__.py +0 -0
- smartify/agents/adapters/__init__.py +13 -0
- smartify/agents/adapters/anthropic.py +253 -0
- smartify/agents/adapters/openai.py +289 -0
- smartify/api/__init__.py +26 -0
- smartify/api/auth.py +352 -0
- smartify/api/errors.py +380 -0
- smartify/api/events.py +345 -0
- smartify/api/server.py +992 -0
- smartify/cli/__init__.py +1 -0
- smartify/cli/main.py +430 -0
- smartify/engine/__init__.py +64 -0
- smartify/engine/approval.py +479 -0
- smartify/engine/orchestrator.py +1365 -0
- smartify/engine/scheduler.py +380 -0
- smartify/engine/spark.py +294 -0
- smartify/guardrails/__init__.py +22 -0
- smartify/guardrails/breakers.py +409 -0
- smartify/models/__init__.py +61 -0
- smartify/models/grid.py +625 -0
- smartify/notifications/__init__.py +22 -0
- smartify/notifications/webhook.py +556 -0
- smartify/state/__init__.py +46 -0
- smartify/state/checkpoint.py +558 -0
- smartify/state/resume.py +301 -0
- smartify/state/store.py +370 -0
- smartify/tools/__init__.py +17 -0
- smartify/tools/base.py +196 -0
- smartify/tools/builtin/__init__.py +79 -0
- smartify/tools/builtin/file.py +464 -0
- smartify/tools/builtin/http.py +195 -0
- smartify/tools/builtin/shell.py +137 -0
- smartify/tools/mcp/__init__.py +33 -0
- smartify/tools/mcp/adapter.py +157 -0
- smartify/tools/mcp/client.py +334 -0
- smartify/tools/mcp/registry.py +130 -0
- smartify/validator/__init__.py +0 -0
- smartify/validator/validate.py +271 -0
- smartify/workspace/__init__.py +5 -0
- smartify/workspace/manager.py +248 -0
- smartify_ai-0.1.0.dist-info/METADATA +201 -0
- smartify_ai-0.1.0.dist-info/RECORD +46 -0
- smartify_ai-0.1.0.dist-info/WHEEL +4 -0
- smartify_ai-0.1.0.dist-info/entry_points.txt +2 -0
- smartify_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""DAG-based scheduler for Grid execution.
|
|
2
|
+
|
|
3
|
+
The scheduler determines node execution order based on:
|
|
4
|
+
1. Parent-child relationships (when executionMode is 'parent')
|
|
5
|
+
2. Explicit edges (when executionMode is 'explicit')
|
|
6
|
+
3. runAfter dependencies
|
|
7
|
+
4. Parallel execution settings
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Dict, List, Optional, Set
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
|
|
15
|
+
from smartify.models.grid import (
|
|
16
|
+
GridSpec,
|
|
17
|
+
NodeSpec,
|
|
18
|
+
NodeKind,
|
|
19
|
+
ExecutionMode,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NodeState(str, Enum):
|
|
24
|
+
"""Execution state of a node."""
|
|
25
|
+
PENDING = "pending" # Not yet ready to run
|
|
26
|
+
READY = "ready" # Dependencies satisfied, can run
|
|
27
|
+
RUNNING = "running" # Currently executing
|
|
28
|
+
COMPLETED = "completed" # Finished successfully
|
|
29
|
+
FAILED = "failed" # Finished with error
|
|
30
|
+
SKIPPED = "skipped" # Skipped due to condition
|
|
31
|
+
BLOCKED = "blocked" # Waiting on external input
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class NodeExecution:
|
|
36
|
+
"""Tracks execution state for a node."""
|
|
37
|
+
node: NodeSpec
|
|
38
|
+
state: NodeState = NodeState.PENDING
|
|
39
|
+
dependencies: Set[str] = field(default_factory=set)
|
|
40
|
+
dependents: Set[str] = field(default_factory=set)
|
|
41
|
+
output: Optional[dict] = None
|
|
42
|
+
error: Optional[str] = None
|
|
43
|
+
attempt: int = 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DAGScheduler:
|
|
47
|
+
"""Schedules node execution based on dependency graph.
|
|
48
|
+
|
|
49
|
+
Usage:
|
|
50
|
+
scheduler = DAGScheduler(grid)
|
|
51
|
+
scheduler.build_graph()
|
|
52
|
+
|
|
53
|
+
while not scheduler.is_complete():
|
|
54
|
+
ready_nodes = scheduler.get_ready_nodes()
|
|
55
|
+
for node_id in ready_nodes:
|
|
56
|
+
scheduler.mark_running(node_id)
|
|
57
|
+
# Execute node...
|
|
58
|
+
scheduler.mark_completed(node_id, output)
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, grid: GridSpec):
|
|
62
|
+
self.grid = grid
|
|
63
|
+
self.nodes: Dict[str, NodeExecution] = {}
|
|
64
|
+
self.execution_order: List[str] = []
|
|
65
|
+
self._built = False
|
|
66
|
+
|
|
67
|
+
def build_graph(self) -> None:
|
|
68
|
+
"""Build the dependency graph from grid topology."""
|
|
69
|
+
if self._built:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Initialize all nodes
|
|
73
|
+
for node in self.grid.topology.nodes:
|
|
74
|
+
self.nodes[node.id] = NodeExecution(node=node)
|
|
75
|
+
|
|
76
|
+
# Build dependencies based on execution mode
|
|
77
|
+
if self.grid.topology.executionMode == ExecutionMode.EXPLICIT:
|
|
78
|
+
self._build_explicit_dependencies()
|
|
79
|
+
else:
|
|
80
|
+
# Default: parent mode
|
|
81
|
+
self._build_parent_dependencies()
|
|
82
|
+
|
|
83
|
+
# Add runAfter dependencies (applies to both modes)
|
|
84
|
+
self._add_run_after_dependencies()
|
|
85
|
+
|
|
86
|
+
# Validate no cycles
|
|
87
|
+
self._detect_cycles()
|
|
88
|
+
|
|
89
|
+
# Mark initial ready nodes
|
|
90
|
+
self._update_ready_nodes()
|
|
91
|
+
|
|
92
|
+
self._built = True
|
|
93
|
+
|
|
94
|
+
def _build_parent_dependencies(self) -> None:
|
|
95
|
+
"""Build dependencies from parent-child relationships.
|
|
96
|
+
|
|
97
|
+
In parent mode:
|
|
98
|
+
- Children depend on their parent completing
|
|
99
|
+
- Siblings run in parallel by default (unless parallel=False)
|
|
100
|
+
- Controller runs first, then its children, etc.
|
|
101
|
+
"""
|
|
102
|
+
# Find the controller (root)
|
|
103
|
+
controller_id = None
|
|
104
|
+
for node in self.grid.topology.nodes:
|
|
105
|
+
if node.kind == NodeKind.CONTROLLER:
|
|
106
|
+
controller_id = node.id
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
if not controller_id:
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# Build parent -> children map
|
|
113
|
+
children_map: Dict[str, List[str]] = defaultdict(list)
|
|
114
|
+
for node in self.grid.topology.nodes:
|
|
115
|
+
if node.parent:
|
|
116
|
+
children_map[node.parent].append(node.id)
|
|
117
|
+
|
|
118
|
+
# Set dependencies: children depend on parent
|
|
119
|
+
for node in self.grid.topology.nodes:
|
|
120
|
+
if node.parent and node.parent in self.nodes:
|
|
121
|
+
self.nodes[node.id].dependencies.add(node.parent)
|
|
122
|
+
self.nodes[node.parent].dependents.add(node.id)
|
|
123
|
+
|
|
124
|
+
# Handle sequential execution (parallel=False)
|
|
125
|
+
for parent_id, child_ids in children_map.items():
|
|
126
|
+
sequential_children = []
|
|
127
|
+
for child_id in child_ids:
|
|
128
|
+
node = self.nodes[child_id].node
|
|
129
|
+
if not node.parallel:
|
|
130
|
+
sequential_children.append(child_id)
|
|
131
|
+
|
|
132
|
+
# Sequential children depend on all parallel siblings completing
|
|
133
|
+
if sequential_children:
|
|
134
|
+
parallel_siblings = [c for c in child_ids if c not in sequential_children]
|
|
135
|
+
for seq_child in sequential_children:
|
|
136
|
+
for par_sibling in parallel_siblings:
|
|
137
|
+
self.nodes[seq_child].dependencies.add(par_sibling)
|
|
138
|
+
self.nodes[par_sibling].dependents.add(seq_child)
|
|
139
|
+
|
|
140
|
+
def _build_explicit_dependencies(self) -> None:
|
|
141
|
+
"""Build dependencies from explicit edges."""
|
|
142
|
+
if not self.grid.topology.edges:
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
for edge in self.grid.topology.edges:
|
|
146
|
+
from_id = edge.from_
|
|
147
|
+
targets = edge.to if isinstance(edge.to, list) else [edge.to]
|
|
148
|
+
|
|
149
|
+
for to_id in targets:
|
|
150
|
+
if from_id in self.nodes and to_id in self.nodes:
|
|
151
|
+
self.nodes[to_id].dependencies.add(from_id)
|
|
152
|
+
self.nodes[from_id].dependents.add(to_id)
|
|
153
|
+
|
|
154
|
+
def _add_run_after_dependencies(self) -> None:
|
|
155
|
+
"""Add runAfter dependencies."""
|
|
156
|
+
for node in self.grid.topology.nodes:
|
|
157
|
+
if node.runAfter:
|
|
158
|
+
for dep_id in node.runAfter:
|
|
159
|
+
if dep_id in self.nodes:
|
|
160
|
+
self.nodes[node.id].dependencies.add(dep_id)
|
|
161
|
+
self.nodes[dep_id].dependents.add(node.id)
|
|
162
|
+
|
|
163
|
+
def _detect_cycles(self) -> None:
|
|
164
|
+
"""Detect cycles in the dependency graph using DFS."""
|
|
165
|
+
WHITE, GRAY, BLACK = 0, 1, 2
|
|
166
|
+
color: Dict[str, int] = {node_id: WHITE for node_id in self.nodes}
|
|
167
|
+
|
|
168
|
+
def dfs(node_id: str, path: List[str]) -> Optional[List[str]]:
|
|
169
|
+
color[node_id] = GRAY
|
|
170
|
+
path.append(node_id)
|
|
171
|
+
|
|
172
|
+
for dep_id in self.nodes[node_id].dependents:
|
|
173
|
+
if color[dep_id] == GRAY:
|
|
174
|
+
# Found cycle
|
|
175
|
+
cycle_start = path.index(dep_id)
|
|
176
|
+
return path[cycle_start:] + [dep_id]
|
|
177
|
+
elif color[dep_id] == WHITE:
|
|
178
|
+
cycle = dfs(dep_id, path)
|
|
179
|
+
if cycle:
|
|
180
|
+
return cycle
|
|
181
|
+
|
|
182
|
+
color[node_id] = BLACK
|
|
183
|
+
path.pop()
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
for node_id in self.nodes:
|
|
187
|
+
if color[node_id] == WHITE:
|
|
188
|
+
cycle = dfs(node_id, [])
|
|
189
|
+
if cycle:
|
|
190
|
+
raise ValueError(f"Cycle detected in dependency graph: {' -> '.join(cycle)}")
|
|
191
|
+
|
|
192
|
+
def _update_ready_nodes(self) -> None:
|
|
193
|
+
"""Update which nodes are ready to execute."""
|
|
194
|
+
for node_id, execution in self.nodes.items():
|
|
195
|
+
if execution.state == NodeState.PENDING:
|
|
196
|
+
# Check if all dependencies are completed
|
|
197
|
+
deps_satisfied = all(
|
|
198
|
+
self.nodes[dep].state == NodeState.COMPLETED
|
|
199
|
+
for dep in execution.dependencies
|
|
200
|
+
)
|
|
201
|
+
if deps_satisfied:
|
|
202
|
+
# Check 'when' condition if present
|
|
203
|
+
if self._evaluate_when_condition(execution.node):
|
|
204
|
+
execution.state = NodeState.READY
|
|
205
|
+
else:
|
|
206
|
+
execution.state = NodeState.SKIPPED
|
|
207
|
+
# Propagate skip to dependents that have no other path
|
|
208
|
+
self._propagate_skip(node_id)
|
|
209
|
+
|
|
210
|
+
def _evaluate_when_condition(self, node: NodeSpec) -> bool:
|
|
211
|
+
"""Evaluate a node's 'when' condition.
|
|
212
|
+
|
|
213
|
+
TODO: Implement proper expression evaluation.
|
|
214
|
+
For now, always returns True.
|
|
215
|
+
"""
|
|
216
|
+
if not node.when:
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
# TODO: Parse and evaluate expression with context
|
|
220
|
+
# Expression context includes: inputs, outputs (from completed nodes), env
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
def _propagate_skip(self, skipped_node_id: str) -> None:
|
|
224
|
+
"""Propagate skip status to dependents if they have no other active path."""
|
|
225
|
+
# Simple implementation: just mark direct dependents that only depend on skipped nodes
|
|
226
|
+
for dep_id in self.nodes[skipped_node_id].dependents:
|
|
227
|
+
dep_execution = self.nodes[dep_id]
|
|
228
|
+
if dep_execution.state == NodeState.PENDING:
|
|
229
|
+
# Check if ALL dependencies are skipped
|
|
230
|
+
all_skipped = all(
|
|
231
|
+
self.nodes[d].state == NodeState.SKIPPED
|
|
232
|
+
for d in dep_execution.dependencies
|
|
233
|
+
)
|
|
234
|
+
if all_skipped:
|
|
235
|
+
dep_execution.state = NodeState.SKIPPED
|
|
236
|
+
self._propagate_skip(dep_id)
|
|
237
|
+
|
|
238
|
+
def get_ready_nodes(self) -> List[str]:
|
|
239
|
+
"""Get list of node IDs that are ready to execute."""
|
|
240
|
+
return [
|
|
241
|
+
node_id for node_id, execution in self.nodes.items()
|
|
242
|
+
if execution.state == NodeState.READY
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
def get_running_nodes(self) -> List[str]:
|
|
246
|
+
"""Get list of node IDs currently running."""
|
|
247
|
+
return [
|
|
248
|
+
node_id for node_id, execution in self.nodes.items()
|
|
249
|
+
if execution.state == NodeState.RUNNING
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
def mark_running(self, node_id: str) -> None:
|
|
253
|
+
"""Mark a node as running."""
|
|
254
|
+
if node_id not in self.nodes:
|
|
255
|
+
raise ValueError(f"Unknown node: {node_id}")
|
|
256
|
+
|
|
257
|
+
execution = self.nodes[node_id]
|
|
258
|
+
if execution.state != NodeState.READY:
|
|
259
|
+
raise ValueError(f"Node {node_id} is not ready (state: {execution.state})")
|
|
260
|
+
|
|
261
|
+
execution.state = NodeState.RUNNING
|
|
262
|
+
execution.attempt += 1
|
|
263
|
+
|
|
264
|
+
def mark_completed(self, node_id: str, output: Optional[dict] = None) -> None:
|
|
265
|
+
"""Mark a node as completed."""
|
|
266
|
+
if node_id not in self.nodes:
|
|
267
|
+
raise ValueError(f"Unknown node: {node_id}")
|
|
268
|
+
|
|
269
|
+
execution = self.nodes[node_id]
|
|
270
|
+
if execution.state != NodeState.RUNNING:
|
|
271
|
+
raise ValueError(f"Node {node_id} is not running (state: {execution.state})")
|
|
272
|
+
|
|
273
|
+
execution.state = NodeState.COMPLETED
|
|
274
|
+
execution.output = output
|
|
275
|
+
self.execution_order.append(node_id)
|
|
276
|
+
|
|
277
|
+
# Update ready status for dependents
|
|
278
|
+
self._update_ready_nodes()
|
|
279
|
+
|
|
280
|
+
def mark_failed(self, node_id: str, error: str) -> None:
|
|
281
|
+
"""Mark a node as failed."""
|
|
282
|
+
if node_id not in self.nodes:
|
|
283
|
+
raise ValueError(f"Unknown node: {node_id}")
|
|
284
|
+
|
|
285
|
+
execution = self.nodes[node_id]
|
|
286
|
+
execution.state = NodeState.FAILED
|
|
287
|
+
execution.error = error
|
|
288
|
+
|
|
289
|
+
def can_retry(self, node_id: str) -> bool:
|
|
290
|
+
"""Check if a node can be retried."""
|
|
291
|
+
if node_id not in self.nodes:
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
execution = self.nodes[node_id]
|
|
295
|
+
node = execution.node
|
|
296
|
+
|
|
297
|
+
if not node.retry:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
return execution.attempt < node.retry.maxAttempts
|
|
301
|
+
|
|
302
|
+
def reset_for_retry(self, node_id: str) -> None:
|
|
303
|
+
"""Reset a failed node for retry."""
|
|
304
|
+
if node_id not in self.nodes:
|
|
305
|
+
raise ValueError(f"Unknown node: {node_id}")
|
|
306
|
+
|
|
307
|
+
execution = self.nodes[node_id]
|
|
308
|
+
if execution.state != NodeState.FAILED:
|
|
309
|
+
raise ValueError(f"Node {node_id} is not failed (state: {execution.state})")
|
|
310
|
+
|
|
311
|
+
if not self.can_retry(node_id):
|
|
312
|
+
raise ValueError(f"Node {node_id} has exceeded retry limit")
|
|
313
|
+
|
|
314
|
+
execution.state = NodeState.READY
|
|
315
|
+
execution.error = None
|
|
316
|
+
|
|
317
|
+
def is_complete(self) -> bool:
|
|
318
|
+
"""Check if all nodes have finished (completed, failed, or skipped)."""
|
|
319
|
+
terminal_states = {NodeState.COMPLETED, NodeState.FAILED, NodeState.SKIPPED}
|
|
320
|
+
return all(
|
|
321
|
+
execution.state in terminal_states
|
|
322
|
+
for execution in self.nodes.values()
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def is_successful(self) -> bool:
|
|
326
|
+
"""Check if grid completed successfully (all nodes completed or skipped)."""
|
|
327
|
+
success_states = {NodeState.COMPLETED, NodeState.SKIPPED}
|
|
328
|
+
return all(
|
|
329
|
+
execution.state in success_states
|
|
330
|
+
for execution in self.nodes.values()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
def get_state_summary(self) -> Dict[NodeState, int]:
|
|
334
|
+
"""Get count of nodes in each state."""
|
|
335
|
+
summary: Dict[NodeState, int] = defaultdict(int)
|
|
336
|
+
for execution in self.nodes.values():
|
|
337
|
+
summary[execution.state] += 1
|
|
338
|
+
return dict(summary)
|
|
339
|
+
|
|
340
|
+
def get_node_state(self, node_id: str) -> Optional[NodeState]:
|
|
341
|
+
"""Get the state of a specific node."""
|
|
342
|
+
if node_id in self.nodes:
|
|
343
|
+
return self.nodes[node_id].state
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
def get_node_output(self, node_id: str) -> Optional[dict]:
|
|
347
|
+
"""Get the output of a completed node."""
|
|
348
|
+
if node_id in self.nodes:
|
|
349
|
+
return self.nodes[node_id].output
|
|
350
|
+
return None
|
|
351
|
+
|
|
352
|
+
def get_execution_order(self) -> List[str]:
|
|
353
|
+
"""Get the order in which nodes completed."""
|
|
354
|
+
return self.execution_order.copy()
|
|
355
|
+
|
|
356
|
+
def visualize(self) -> str:
|
|
357
|
+
"""Generate a simple text visualization of the DAG."""
|
|
358
|
+
lines = ["DAG Scheduler State:", "=" * 40]
|
|
359
|
+
|
|
360
|
+
state_symbols = {
|
|
361
|
+
NodeState.PENDING: "○",
|
|
362
|
+
NodeState.READY: "◎",
|
|
363
|
+
NodeState.RUNNING: "●",
|
|
364
|
+
NodeState.COMPLETED: "✓",
|
|
365
|
+
NodeState.FAILED: "✗",
|
|
366
|
+
NodeState.SKIPPED: "⊘",
|
|
367
|
+
NodeState.BLOCKED: "⊗",
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
for node_id, execution in self.nodes.items():
|
|
371
|
+
symbol = state_symbols.get(execution.state, "?")
|
|
372
|
+
deps = ", ".join(execution.dependencies) if execution.dependencies else "none"
|
|
373
|
+
lines.append(f" {symbol} {node_id} ({execution.state.value})")
|
|
374
|
+
lines.append(f" deps: {deps}")
|
|
375
|
+
|
|
376
|
+
lines.append("=" * 40)
|
|
377
|
+
summary = self.get_state_summary()
|
|
378
|
+
lines.append(f"Summary: {dict(summary)}")
|
|
379
|
+
|
|
380
|
+
return "\n".join(lines)
|
smartify/engine/spark.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Spark node spawning for dynamic agent creation.
|
|
2
|
+
|
|
3
|
+
Sparks are lightweight helper agents spawned at runtime to assist
|
|
4
|
+
substations with parallelizable workloads. They are created dynamically
|
|
5
|
+
based on workload analysis and subject to spawning limits.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
13
|
+
from uuid import uuid4
|
|
14
|
+
|
|
15
|
+
from smartify.models.grid import (
|
|
16
|
+
NodeSpec,
|
|
17
|
+
NodeKind,
|
|
18
|
+
DynamicSpawningSpec,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from smartify.engine.orchestrator import GridRun
|
|
23
|
+
from smartify.engine.scheduler import DAGScheduler
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class SparkRequest:
|
|
30
|
+
"""Request to spawn a new Spark node."""
|
|
31
|
+
parent_id: str # Substation requesting the spawn
|
|
32
|
+
task: str # Task description for the spark
|
|
33
|
+
context: Dict[str, Any] = field(default_factory=dict) # Context from parent
|
|
34
|
+
priority: int = 0 # Higher = more important
|
|
35
|
+
|
|
36
|
+
# Optional constraints
|
|
37
|
+
tools: Optional[List[str]] = None
|
|
38
|
+
max_tokens: Optional[int] = None
|
|
39
|
+
timeout_seconds: Optional[int] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SparkNode:
|
|
44
|
+
"""Runtime representation of a spawned Spark node."""
|
|
45
|
+
id: str
|
|
46
|
+
parent_id: str
|
|
47
|
+
task: str
|
|
48
|
+
context: Dict[str, Any]
|
|
49
|
+
created_at: datetime
|
|
50
|
+
|
|
51
|
+
# Runtime state
|
|
52
|
+
status: str = "pending" # pending, running, completed, failed
|
|
53
|
+
output: Optional[Dict[str, Any]] = None
|
|
54
|
+
error: Optional[str] = None
|
|
55
|
+
tokens_used: int = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SparkManager:
|
|
59
|
+
"""Manages dynamic Spark node spawning and execution.
|
|
60
|
+
|
|
61
|
+
Usage:
|
|
62
|
+
manager = SparkManager(spawning_config, scheduler)
|
|
63
|
+
|
|
64
|
+
# Request a spark from a substation
|
|
65
|
+
spark = await manager.spawn(SparkRequest(
|
|
66
|
+
parent_id="substation-1",
|
|
67
|
+
task="Analyze this file",
|
|
68
|
+
context={"file": "data.csv"}
|
|
69
|
+
))
|
|
70
|
+
|
|
71
|
+
# Execute all pending sparks
|
|
72
|
+
results = await manager.execute_pending(run, adapter)
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
config: DynamicSpawningSpec,
|
|
78
|
+
scheduler: "DAGScheduler",
|
|
79
|
+
):
|
|
80
|
+
self.config = config
|
|
81
|
+
self.scheduler = scheduler
|
|
82
|
+
|
|
83
|
+
# Track spawned sparks
|
|
84
|
+
self.sparks: Dict[str, SparkNode] = {}
|
|
85
|
+
self.sparks_by_parent: Dict[str, List[str]] = {}
|
|
86
|
+
|
|
87
|
+
# Counters for limit enforcement
|
|
88
|
+
self._total_spawned = 0
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def enabled(self) -> bool:
|
|
92
|
+
"""Check if spark spawning is enabled."""
|
|
93
|
+
return self.config.enabled and self.config.enableSparks
|
|
94
|
+
|
|
95
|
+
def can_spawn(self, parent_id: str) -> bool:
|
|
96
|
+
"""Check if a new spark can be spawned for the given parent."""
|
|
97
|
+
if not self.enabled:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
limits = self.config.limits
|
|
101
|
+
|
|
102
|
+
# Check total limit
|
|
103
|
+
if len(self.sparks) >= limits.maxTotalNodes:
|
|
104
|
+
logger.warning(f"Total node limit reached: {limits.maxTotalNodes}")
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
# Check per-substation limit
|
|
108
|
+
parent_sparks = self.sparks_by_parent.get(parent_id, [])
|
|
109
|
+
if len(parent_sparks) >= limits.maxSparksPerSubstation:
|
|
110
|
+
logger.warning(
|
|
111
|
+
f"Spark limit for {parent_id} reached: {limits.maxSparksPerSubstation}"
|
|
112
|
+
)
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
async def spawn(self, request: SparkRequest) -> Optional[SparkNode]:
|
|
118
|
+
"""Spawn a new Spark node.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
request: SparkRequest with task and context
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
SparkNode if spawned, None if limits exceeded or disabled
|
|
125
|
+
"""
|
|
126
|
+
if not self.can_spawn(request.parent_id):
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
# Check if approval is required
|
|
130
|
+
if self.config.requireApproval:
|
|
131
|
+
logger.warning(
|
|
132
|
+
f"Spark spawn requires approval (not implemented) - auto-approving"
|
|
133
|
+
)
|
|
134
|
+
# TODO: Implement approval flow
|
|
135
|
+
|
|
136
|
+
# Create spark node
|
|
137
|
+
spark_id = f"spark-{request.parent_id}-{uuid4().hex[:8]}"
|
|
138
|
+
|
|
139
|
+
spark = SparkNode(
|
|
140
|
+
id=spark_id,
|
|
141
|
+
parent_id=request.parent_id,
|
|
142
|
+
task=request.task,
|
|
143
|
+
context=request.context,
|
|
144
|
+
created_at=datetime.now(),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Register spark
|
|
148
|
+
self.sparks[spark_id] = spark
|
|
149
|
+
|
|
150
|
+
if request.parent_id not in self.sparks_by_parent:
|
|
151
|
+
self.sparks_by_parent[request.parent_id] = []
|
|
152
|
+
self.sparks_by_parent[request.parent_id].append(spark_id)
|
|
153
|
+
|
|
154
|
+
self._total_spawned += 1
|
|
155
|
+
|
|
156
|
+
logger.info(
|
|
157
|
+
f"Spawned spark {spark_id} for {request.parent_id}: {request.task[:50]}..."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return spark
|
|
161
|
+
|
|
162
|
+
async def spawn_batch(
|
|
163
|
+
self,
|
|
164
|
+
requests: List[SparkRequest],
|
|
165
|
+
) -> List[SparkNode]:
|
|
166
|
+
"""Spawn multiple sparks (respects limits)."""
|
|
167
|
+
spawned = []
|
|
168
|
+
for request in requests:
|
|
169
|
+
spark = await self.spawn(request)
|
|
170
|
+
if spark:
|
|
171
|
+
spawned.append(spark)
|
|
172
|
+
else:
|
|
173
|
+
logger.warning(
|
|
174
|
+
f"Could not spawn spark for {request.parent_id} - limits reached"
|
|
175
|
+
)
|
|
176
|
+
break # Stop if we hit limits
|
|
177
|
+
return spawned
|
|
178
|
+
|
|
179
|
+
def get_pending_sparks(self) -> List[SparkNode]:
|
|
180
|
+
"""Get all sparks in pending status."""
|
|
181
|
+
return [s for s in self.sparks.values() if s.status == "pending"]
|
|
182
|
+
|
|
183
|
+
def get_sparks_for_parent(self, parent_id: str) -> List[SparkNode]:
|
|
184
|
+
"""Get all sparks spawned by a specific parent."""
|
|
185
|
+
spark_ids = self.sparks_by_parent.get(parent_id, [])
|
|
186
|
+
return [self.sparks[sid] for sid in spark_ids if sid in self.sparks]
|
|
187
|
+
|
|
188
|
+
def mark_running(self, spark_id: str) -> None:
|
|
189
|
+
"""Mark a spark as running."""
|
|
190
|
+
if spark_id in self.sparks:
|
|
191
|
+
self.sparks[spark_id].status = "running"
|
|
192
|
+
|
|
193
|
+
def mark_completed(
|
|
194
|
+
self,
|
|
195
|
+
spark_id: str,
|
|
196
|
+
output: Dict[str, Any],
|
|
197
|
+
tokens_used: int = 0,
|
|
198
|
+
) -> None:
|
|
199
|
+
"""Mark a spark as completed."""
|
|
200
|
+
if spark_id in self.sparks:
|
|
201
|
+
spark = self.sparks[spark_id]
|
|
202
|
+
spark.status = "completed"
|
|
203
|
+
spark.output = output
|
|
204
|
+
spark.tokens_used = tokens_used
|
|
205
|
+
|
|
206
|
+
def mark_failed(self, spark_id: str, error: str) -> None:
|
|
207
|
+
"""Mark a spark as failed."""
|
|
208
|
+
if spark_id in self.sparks:
|
|
209
|
+
spark = self.sparks[spark_id]
|
|
210
|
+
spark.status = "failed"
|
|
211
|
+
spark.error = error
|
|
212
|
+
|
|
213
|
+
def get_completed_outputs(self, parent_id: str) -> List[Dict[str, Any]]:
|
|
214
|
+
"""Get outputs from all completed sparks for a parent."""
|
|
215
|
+
outputs = []
|
|
216
|
+
for spark in self.get_sparks_for_parent(parent_id):
|
|
217
|
+
if spark.status == "completed" and spark.output:
|
|
218
|
+
outputs.append({
|
|
219
|
+
"spark_id": spark.id,
|
|
220
|
+
"task": spark.task,
|
|
221
|
+
"output": spark.output,
|
|
222
|
+
})
|
|
223
|
+
return outputs
|
|
224
|
+
|
|
225
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
226
|
+
"""Get spark manager statistics."""
|
|
227
|
+
by_status = {}
|
|
228
|
+
for spark in self.sparks.values():
|
|
229
|
+
by_status[spark.status] = by_status.get(spark.status, 0) + 1
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
"enabled": self.enabled,
|
|
233
|
+
"total_spawned": self._total_spawned,
|
|
234
|
+
"active": len(self.sparks),
|
|
235
|
+
"by_status": by_status,
|
|
236
|
+
"limits": {
|
|
237
|
+
"max_total": self.config.limits.maxTotalNodes,
|
|
238
|
+
"max_per_substation": self.config.limits.maxSparksPerSubstation,
|
|
239
|
+
},
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def analyze_workload_for_sparks(
|
|
244
|
+
task: str,
|
|
245
|
+
context: Dict[str, Any],
|
|
246
|
+
) -> List[SparkRequest]:
|
|
247
|
+
"""Analyze a task to determine if it can be parallelized with sparks.
|
|
248
|
+
|
|
249
|
+
This is a simple heuristic-based analyzer. In production, this could
|
|
250
|
+
use an LLM to intelligently decompose tasks.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
List of SparkRequests for parallel subtasks
|
|
254
|
+
"""
|
|
255
|
+
requests = []
|
|
256
|
+
|
|
257
|
+
# Check for list processing patterns
|
|
258
|
+
for key, value in context.items():
|
|
259
|
+
if isinstance(value, list) and len(value) > 1:
|
|
260
|
+
# Potential fan-out: process each item in parallel
|
|
261
|
+
for i, item in enumerate(value):
|
|
262
|
+
requests.append(SparkRequest(
|
|
263
|
+
parent_id="", # Will be set by caller
|
|
264
|
+
task=f"Process item {i+1}/{len(value)}: {task}",
|
|
265
|
+
context={key: item, "index": i, "total": len(value)},
|
|
266
|
+
priority=i, # Process in order if needed
|
|
267
|
+
))
|
|
268
|
+
|
|
269
|
+
return requests
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def create_spark_node_spec(
|
|
273
|
+
spark: SparkNode,
|
|
274
|
+
default_agent: Optional[str] = None,
|
|
275
|
+
) -> NodeSpec:
|
|
276
|
+
"""Create a NodeSpec for a spawned Spark.
|
|
277
|
+
|
|
278
|
+
This allows the spark to be tracked in the scheduler alongside
|
|
279
|
+
static nodes if needed.
|
|
280
|
+
|
|
281
|
+
Uses model_construct() to bypass validation since SPARK nodes
|
|
282
|
+
can only be created programmatically at runtime, not in user YAML.
|
|
283
|
+
"""
|
|
284
|
+
# Use model_construct to bypass the validator that blocks SPARK kind
|
|
285
|
+
# from user-defined specs. This is intentional for runtime-created sparks.
|
|
286
|
+
return NodeSpec.model_construct(
|
|
287
|
+
id=spark.id,
|
|
288
|
+
kind=NodeKind.SPARK,
|
|
289
|
+
name=f"Spark: {spark.task[:30]}...",
|
|
290
|
+
description=spark.task,
|
|
291
|
+
parent=spark.parent_id,
|
|
292
|
+
agent=default_agent,
|
|
293
|
+
capabilities=["execute", "report"],
|
|
294
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Guardrails module for Smartify.
|
|
2
|
+
|
|
3
|
+
Provides breaker management and safety limits for grid execution.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from smartify.guardrails.breakers import (
|
|
7
|
+
BreakerManager,
|
|
8
|
+
BreakerState,
|
|
9
|
+
BreakerTrip,
|
|
10
|
+
BreakerType,
|
|
11
|
+
BreakerError,
|
|
12
|
+
RateLimitState,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"BreakerManager",
|
|
17
|
+
"BreakerState",
|
|
18
|
+
"BreakerTrip",
|
|
19
|
+
"BreakerType",
|
|
20
|
+
"BreakerError",
|
|
21
|
+
"RateLimitState",
|
|
22
|
+
]
|