smartify-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. smartify/__init__.py +3 -0
  2. smartify/agents/__init__.py +0 -0
  3. smartify/agents/adapters/__init__.py +13 -0
  4. smartify/agents/adapters/anthropic.py +253 -0
  5. smartify/agents/adapters/openai.py +289 -0
  6. smartify/api/__init__.py +26 -0
  7. smartify/api/auth.py +352 -0
  8. smartify/api/errors.py +380 -0
  9. smartify/api/events.py +345 -0
  10. smartify/api/server.py +992 -0
  11. smartify/cli/__init__.py +1 -0
  12. smartify/cli/main.py +430 -0
  13. smartify/engine/__init__.py +64 -0
  14. smartify/engine/approval.py +479 -0
  15. smartify/engine/orchestrator.py +1365 -0
  16. smartify/engine/scheduler.py +380 -0
  17. smartify/engine/spark.py +294 -0
  18. smartify/guardrails/__init__.py +22 -0
  19. smartify/guardrails/breakers.py +409 -0
  20. smartify/models/__init__.py +61 -0
  21. smartify/models/grid.py +625 -0
  22. smartify/notifications/__init__.py +22 -0
  23. smartify/notifications/webhook.py +556 -0
  24. smartify/state/__init__.py +46 -0
  25. smartify/state/checkpoint.py +558 -0
  26. smartify/state/resume.py +301 -0
  27. smartify/state/store.py +370 -0
  28. smartify/tools/__init__.py +17 -0
  29. smartify/tools/base.py +196 -0
  30. smartify/tools/builtin/__init__.py +79 -0
  31. smartify/tools/builtin/file.py +464 -0
  32. smartify/tools/builtin/http.py +195 -0
  33. smartify/tools/builtin/shell.py +137 -0
  34. smartify/tools/mcp/__init__.py +33 -0
  35. smartify/tools/mcp/adapter.py +157 -0
  36. smartify/tools/mcp/client.py +334 -0
  37. smartify/tools/mcp/registry.py +130 -0
  38. smartify/validator/__init__.py +0 -0
  39. smartify/validator/validate.py +271 -0
  40. smartify/workspace/__init__.py +5 -0
  41. smartify/workspace/manager.py +248 -0
  42. smartify_ai-0.1.0.dist-info/METADATA +201 -0
  43. smartify_ai-0.1.0.dist-info/RECORD +46 -0
  44. smartify_ai-0.1.0.dist-info/WHEEL +4 -0
  45. smartify_ai-0.1.0.dist-info/entry_points.txt +2 -0
  46. smartify_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
smartify/api/server.py ADDED
@@ -0,0 +1,992 @@
1
+ """FastAPI server for Smartify Grid execution.
2
+
3
+ Provides HTTP API for:
4
+ - Loading and validating grids
5
+ - Energizing grids (initialization)
6
+ - Executing grids
7
+ - Managing grid lifecycle (pause/resume/stop)
8
+ - Status and monitoring
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import os
14
+ from contextlib import asynccontextmanager
15
+ from datetime import datetime
16
+ from typing import Any, Dict, List, Optional
17
+ from uuid import uuid4
18
+
19
+ from fastapi import Depends, FastAPI, HTTPException, BackgroundTasks, APIRouter, Query, Request
20
+ from fastapi.middleware.cors import CORSMiddleware
21
+ from fastapi.responses import JSONResponse
22
+ from pydantic import BaseModel, Field, ConfigDict
23
+
24
+ from smartify.api.auth import AuthMiddleware, verify_api_key_dep, get_auth_config
25
+ from smartify.api.errors import (
26
+ ErrorCode,
27
+ ErrorResponse,
28
+ ErrorDetail,
29
+ SmartifyAPIError,
30
+ GridNotFoundError,
31
+ NodeNotFoundError,
32
+ GridStateError,
33
+ ValidationError,
34
+ ExecutionError as APIExecutionError,
35
+ register_error_handlers,
36
+ )
37
+ from smartify.api.events import (
38
+ EventType,
39
+ GridEvent,
40
+ EventsResponse,
41
+ EventFilter,
42
+ event_manager,
43
+ )
44
+
45
+ from smartify.engine.orchestrator import (
46
+ Orchestrator,
47
+ GridRun,
48
+ ExecutionError,
49
+ GridLifecycleError,
50
+ )
51
+ from smartify.models.grid import GridState
52
+ from smartify.agents.adapters import AnthropicAdapter, OpenAIAdapter
53
+
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+ # API Version
58
+ API_VERSION = "1.0.0"
59
+ API_VERSION_MAJOR = "v1"
60
+
61
+
62
+ # ============================================================================
63
+ # Request/Response Models
64
+ # ============================================================================
65
+
66
+ class GridLoadRequest(BaseModel):
67
+ """Request to load a grid."""
68
+ yaml_content: Optional[str] = Field(None, description="Grid YAML content")
69
+ file_path: Optional[str] = Field(None, description="Path to grid YAML file")
70
+ grid_id: Optional[str] = Field(None, description="Custom grid ID (auto-generated if not provided)")
71
+
72
+ model_config = ConfigDict(
73
+ json_schema_extra={
74
+ "examples": [
75
+ {
76
+ "yaml_content": """apiVersion: smartify.ai/v1
77
+ kind: GridSpec
78
+ metadata:
79
+ id: my-grid
80
+ name: My Grid
81
+ topology:
82
+ nodes:
83
+ - id: controller
84
+ kind: controller
85
+ name: Main Controller""",
86
+ },
87
+ {
88
+ "file_path": "/path/to/grid.yaml",
89
+ "grid_id": "custom-id"
90
+ }
91
+ ]
92
+ }
93
+ )
94
+
95
+
96
+ class GridInputsRequest(BaseModel):
97
+ """Request to provide grid inputs."""
98
+ inputs: Dict[str, Any] = Field(default_factory=dict)
99
+
100
+
101
+ class GridRunRequest(BaseModel):
102
+ """Request to run a grid."""
103
+ inputs: Dict[str, Any] = Field(default_factory=dict, description="Grid inputs")
104
+ async_execution: bool = Field(False, description="Run in background")
105
+
106
+
107
+ class GridResponse(BaseModel):
108
+ """Response containing grid information."""
109
+ grid_id: str
110
+ name: str
111
+ state: str
112
+ created_at: datetime
113
+ message: Optional[str] = None
114
+
115
+
116
+ class GridStatusResponse(BaseModel):
117
+ """Detailed grid status response."""
118
+ grid_id: str
119
+ name: str
120
+ state: str
121
+ node_count: int
122
+ completed_nodes: int
123
+ failed_nodes: int
124
+ total_tokens: int
125
+ total_cost: float
126
+ elapsed_seconds: float
127
+ outputs: Optional[Dict[str, Any]] = None
128
+ error: Optional[str] = None
129
+
130
+
131
+ class GridListResponse(BaseModel):
132
+ """Paginated list of grids."""
133
+ grids: List[GridResponse]
134
+ total: int
135
+ limit: int
136
+ offset: int
137
+ has_more: bool
138
+
139
+
140
+ class NodeInfo(BaseModel):
141
+ """Node information."""
142
+ id: str
143
+ kind: str
144
+ parent: Optional[str]
145
+ state: str
146
+ has_output: bool
147
+
148
+
149
+ class NodeListResponse(BaseModel):
150
+ """List of nodes in a grid."""
151
+ nodes: List[NodeInfo]
152
+ total: int
153
+
154
+
155
+ class NodeOutputResponse(BaseModel):
156
+ """Node output response."""
157
+ node_id: str
158
+ kind: str
159
+ state: str
160
+ output: Optional[Any] = None
161
+
162
+
163
+ class HealthResponse(BaseModel):
164
+ """Health check response."""
165
+ status: str
166
+ version: str
167
+ api_version: str
168
+ active_grids: int
169
+
170
+
171
+ class APIInfoResponse(BaseModel):
172
+ """API information response."""
173
+ name: str
174
+ version: str
175
+ api_version: str
176
+ docs: str
177
+
178
+
179
+ # ============================================================================
180
+ # Application State
181
+ # ============================================================================
182
+
183
+ class AppState:
184
+ """Global application state."""
185
+ def __init__(self):
186
+ self.orchestrator = Orchestrator()
187
+ self.runs: Dict[str, GridRun] = {}
188
+ self.run_tasks: Dict[str, asyncio.Task] = {}
189
+ self.run_created_at: Dict[str, datetime] = {}
190
+
191
+
192
+ state = AppState()
193
+
194
+
195
+ # ============================================================================
196
+ # Lifespan Management
197
+ # ============================================================================
198
+
199
+ def _register_adapters(orchestrator: Orchestrator) -> None:
200
+ """Auto-register LLM adapters from environment."""
201
+ default_set = False
202
+ preferred_default = os.environ.get("SMARTIFY_DEFAULT_LLM", "").lower()
203
+
204
+ if os.environ.get("ANTHROPIC_API_KEY"):
205
+ try:
206
+ adapter = AnthropicAdapter()
207
+ orchestrator.register_llm_adapter("anthropic", adapter)
208
+ logger.info("Registered Anthropic LLM adapter")
209
+
210
+ if preferred_default == "anthropic" or (not default_set and preferred_default != "openai"):
211
+ orchestrator.register_llm_adapter("default", adapter)
212
+ default_set = True
213
+ logger.info("Set Anthropic as default LLM adapter")
214
+ except Exception as e:
215
+ logger.warning(f"Failed to register Anthropic adapter: {e}")
216
+
217
+ if os.environ.get("OPENAI_API_KEY"):
218
+ try:
219
+ adapter = OpenAIAdapter()
220
+ orchestrator.register_llm_adapter("openai", adapter)
221
+ logger.info("Registered OpenAI LLM adapter")
222
+
223
+ if preferred_default == "openai" or not default_set:
224
+ orchestrator.register_llm_adapter("default", adapter)
225
+ default_set = True
226
+ logger.info("Set OpenAI as default LLM adapter")
227
+ except Exception as e:
228
+ logger.warning(f"Failed to register OpenAI adapter: {e}")
229
+
230
+ if not default_set:
231
+ logger.warning("No LLM API keys found - set ANTHROPIC_API_KEY or OPENAI_API_KEY")
232
+
233
+
234
+ @asynccontextmanager
235
+ async def lifespan(app: FastAPI):
236
+ """Manage application lifespan."""
237
+ logger.info("Smartify API server starting...")
238
+ _register_adapters(state.orchestrator)
239
+
240
+ auth_config = get_auth_config()
241
+ if auth_config.enabled:
242
+ logger.info(f"API authentication enabled ({len(auth_config.api_keys)} key(s) configured)")
243
+ else:
244
+ logger.warning("API authentication DISABLED - all endpoints are public")
245
+
246
+ yield
247
+
248
+ for grid_id, task in state.run_tasks.items():
249
+ if not task.done():
250
+ logger.info(f"Cancelling running grid: {grid_id}")
251
+ task.cancel()
252
+ logger.info("Smartify API server stopped")
253
+
254
+
255
+ # ============================================================================
256
+ # FastAPI Application
257
+ # ============================================================================
258
+
259
+ API_DESCRIPTION = """
260
+ # Smartify Runtime API
261
+
262
+ Execute and manage AI agent coordination grids with enterprise guardrails.
263
+
264
+ ## Overview
265
+
266
+ The Smartify Runtime API provides programmatic access to:
267
+
268
+ - **Grid Management**: Load, validate, and manage Grid specifications
269
+ - **Execution Control**: Start, pause, resume, and stop grid execution
270
+ - **Monitoring**: Track execution progress with real-time events
271
+ - **Node Inspection**: Examine individual node states and outputs
272
+
273
+ ## Authentication
274
+
275
+ All endpoints (except `/health`) require an API key:
276
+
277
+ ```
278
+ X-API-Key: sk_your_api_key_here
279
+ ```
280
+
281
+ ## Versioning
282
+
283
+ This API is versioned. All endpoints are prefixed with `/v1/`.
284
+ The current API version is returned in the `X-API-Version` header.
285
+
286
+ ## Error Handling
287
+
288
+ All errors return a consistent `ErrorResponse` format with:
289
+ - `error`: Machine-readable error code
290
+ - `message`: Human-readable description
291
+ - `retryable`: Whether the request can be safely retried
292
+ - `details`: Additional context (when available)
293
+
294
+ ## Rate Limits
295
+
296
+ API requests are rate-limited per API key. Rate limit info is included in response headers.
297
+
298
+ ## Resources
299
+
300
+ - [Grid YAML Reference](https://docs.smartify.ai/reference/grid-yaml)
301
+ - [SDK Documentation](https://docs.smartify.ai/sdk)
302
+ """
303
+
304
+ app = FastAPI(
305
+ title="Smartify Runtime API",
306
+ description=API_DESCRIPTION,
307
+ version=API_VERSION,
308
+ lifespan=lifespan,
309
+ contact={
310
+ "name": "Smartify Support",
311
+ "url": "https://smartify.ai/support",
312
+ "email": "support@smartify.ai",
313
+ },
314
+ license_info={
315
+ "name": "Apache 2.0",
316
+ "url": "https://www.apache.org/licenses/LICENSE-2.0",
317
+ },
318
+ openapi_tags=[
319
+ {
320
+ "name": "Health",
321
+ "description": "Health checks and API information",
322
+ },
323
+ {
324
+ "name": "Grids",
325
+ "description": "Grid lifecycle management - load, list, get, delete",
326
+ },
327
+ {
328
+ "name": "Lifecycle",
329
+ "description": "Grid execution control - energize, run, pause, resume, stop",
330
+ },
331
+ {
332
+ "name": "Nodes",
333
+ "description": "Node inspection - list nodes, get outputs",
334
+ },
335
+ {
336
+ "name": "Events",
337
+ "description": "Execution events for async operation tracking",
338
+ },
339
+ ],
340
+ responses={
341
+ 400: {"model": ErrorResponse, "description": "Validation error"},
342
+ 401: {"model": ErrorResponse, "description": "Authentication error"},
343
+ 404: {"model": ErrorResponse, "description": "Resource not found"},
344
+ 409: {"model": ErrorResponse, "description": "State conflict"},
345
+ 429: {"model": ErrorResponse, "description": "Rate limit exceeded"},
346
+ 500: {"model": ErrorResponse, "description": "Internal server error"},
347
+ },
348
+ )
349
+
350
+ # Register custom error handlers
351
+ register_error_handlers(app)
352
+
353
+ # CORS middleware for development
354
+ app.add_middleware(
355
+ CORSMiddleware,
356
+ allow_origins=["*"],
357
+ allow_credentials=True,
358
+ allow_methods=["*"],
359
+ allow_headers=["*"],
360
+ )
361
+
362
+ # Authentication middleware
363
+ app.add_middleware(AuthMiddleware)
364
+
365
+
366
+ # ============================================================================
367
+ # Version Header Middleware
368
+ # ============================================================================
369
+
370
+ @app.middleware("http")
371
+ async def add_version_header(request: Request, call_next):
372
+ """Add API version header to all responses."""
373
+ response = await call_next(request)
374
+ response.headers["X-API-Version"] = API_VERSION
375
+ response.headers["X-API-Version-Major"] = API_VERSION_MAJOR
376
+ return response
377
+
378
+
379
+ # ============================================================================
380
+ # Health & Info Endpoints (unversioned)
381
+ # ============================================================================
382
+
383
+ @app.get(
384
+ "/health",
385
+ response_model=HealthResponse,
386
+ tags=["Health"],
387
+ operation_id="healthCheck",
388
+ summary="Health check",
389
+ description="Check API server health status. This endpoint is always public.",
390
+ )
391
+ async def health_check():
392
+ """Check API server health."""
393
+ return HealthResponse(
394
+ status="healthy",
395
+ version=API_VERSION,
396
+ api_version=API_VERSION_MAJOR,
397
+ active_grids=len([r for r in state.runs.values()
398
+ if r.grid.state in (GridState.RUNNING, GridState.ENERGIZED)]),
399
+ )
400
+
401
+
402
+ @app.get(
403
+ "/",
404
+ response_model=APIInfoResponse,
405
+ tags=["Health"],
406
+ operation_id="apiInfo",
407
+ summary="API information",
408
+ description="Get API name, version, and documentation links.",
409
+ )
410
+ async def root():
411
+ """API root endpoint."""
412
+ return APIInfoResponse(
413
+ name="Smartify Runtime API",
414
+ version=API_VERSION,
415
+ api_version=API_VERSION_MAJOR,
416
+ docs=f"/{API_VERSION_MAJOR}/docs",
417
+ )
418
+
419
+
420
+ # ============================================================================
421
+ # Versioned API Router
422
+ # ============================================================================
423
+
424
+ v1_router = APIRouter(prefix=f"/{API_VERSION_MAJOR}", tags=["v1"])
425
+
426
+
427
+ # ============================================================================
428
+ # Grid Management Endpoints
429
+ # ============================================================================
430
+
431
+ @v1_router.post(
432
+ "/grids",
433
+ response_model=GridResponse,
434
+ tags=["Grids"],
435
+ operation_id="loadGrid",
436
+ summary="Load a grid",
437
+ description="Load and validate a grid specification from YAML content or file path.",
438
+ )
439
+ async def load_grid(request: GridLoadRequest):
440
+ """Load and validate a grid specification."""
441
+ if not request.yaml_content and not request.file_path:
442
+ raise ValidationError(
443
+ "Must provide yaml_content or file_path",
444
+ details=[
445
+ ErrorDetail(suggestion="Provide either yaml_content (string) or file_path (path to YAML file)")
446
+ ]
447
+ )
448
+
449
+ if request.yaml_content and request.file_path:
450
+ raise ValidationError(
451
+ "Provide only one of yaml_content or file_path",
452
+ details=[
453
+ ErrorDetail(suggestion="Remove either yaml_content or file_path from request")
454
+ ]
455
+ )
456
+
457
+ try:
458
+ if request.yaml_content:
459
+ run = await state.orchestrator.load_grid(source=request.yaml_content)
460
+ else:
461
+ run = await state.orchestrator.load_grid(source=request.file_path)
462
+
463
+ if request.grid_id:
464
+ run.grid.id = request.grid_id
465
+
466
+ created_at = datetime.now()
467
+ state.runs[run.grid.id] = run
468
+ state.run_created_at[run.grid.id] = created_at
469
+
470
+ return GridResponse(
471
+ grid_id=run.grid.id,
472
+ name=run.grid.name,
473
+ state=run.state.value,
474
+ created_at=created_at,
475
+ message="Grid loaded and validated successfully",
476
+ )
477
+
478
+ except Exception as e:
479
+ logger.error(f"Failed to load grid: {e}")
480
+ raise ValidationError(f"Failed to load grid: {str(e)}")
481
+
482
+
483
+ @v1_router.get(
484
+ "/grids",
485
+ response_model=GridListResponse,
486
+ tags=["Grids"],
487
+ operation_id="listGrids",
488
+ summary="List grids",
489
+ description="List all loaded grids with optional pagination and filtering.",
490
+ )
491
+ async def list_grids(
492
+ limit: int = Query(default=20, ge=1, le=100, description="Maximum number of grids to return"),
493
+ offset: int = Query(default=0, ge=0, description="Number of grids to skip"),
494
+ state_filter: Optional[str] = Query(default=None, alias="state", description="Filter by grid state"),
495
+ ):
496
+ """List all loaded grids with pagination."""
497
+ all_runs = list(state.runs.values())
498
+
499
+ # Apply state filter
500
+ if state_filter:
501
+ all_runs = [r for r in all_runs if r.state.value == state_filter]
502
+
503
+ total = len(all_runs)
504
+
505
+ # Apply pagination
506
+ paginated = all_runs[offset:offset + limit]
507
+
508
+ grids = [
509
+ GridResponse(
510
+ grid_id=run.grid.id,
511
+ name=run.grid.name,
512
+ state=run.state.value,
513
+ created_at=state.run_created_at.get(run.grid.id, datetime.now()),
514
+ )
515
+ for run in paginated
516
+ ]
517
+
518
+ return GridListResponse(
519
+ grids=grids,
520
+ total=total,
521
+ limit=limit,
522
+ offset=offset,
523
+ has_more=offset + limit < total,
524
+ )
525
+
526
+
527
+ @v1_router.get(
528
+ "/grids/{grid_id}",
529
+ response_model=GridStatusResponse,
530
+ tags=["Grids"],
531
+ operation_id="getGridStatus",
532
+ summary="Get grid status",
533
+ description="Get detailed status of a grid including execution progress and outputs.",
534
+ )
535
+ async def get_grid_status(grid_id: str):
536
+ """Get detailed status of a grid."""
537
+ run = state.runs.get(grid_id)
538
+ if not run:
539
+ raise GridNotFoundError(grid_id)
540
+
541
+ status = state.orchestrator.get_status(run)
542
+
543
+ return GridStatusResponse(
544
+ grid_id=run.grid.id,
545
+ name=run.grid.name,
546
+ state=run.state.value,
547
+ node_count=status.get("node_count", 0),
548
+ completed_nodes=status.get("completed_nodes", 0),
549
+ failed_nodes=status.get("failed_nodes", 0),
550
+ total_tokens=status.get("total_tokens", 0),
551
+ total_cost=status.get("total_cost", 0.0),
552
+ elapsed_seconds=status.get("elapsed_seconds", 0.0),
553
+ outputs=run.outputs if run.state == GridState.COMPLETED else None,
554
+ error=run.error,
555
+ )
556
+
557
+
558
+ @v1_router.delete(
559
+ "/grids/{grid_id}",
560
+ tags=["Grids"],
561
+ operation_id="deleteGrid",
562
+ summary="Delete a grid",
563
+ description="Delete a grid from memory. Running grids will be stopped first.",
564
+ )
565
+ async def delete_grid(grid_id: str):
566
+ """Delete a grid from memory."""
567
+ if grid_id not in state.runs:
568
+ raise GridNotFoundError(grid_id)
569
+
570
+ if grid_id in state.run_tasks:
571
+ task = state.run_tasks[grid_id]
572
+ if not task.done():
573
+ task.cancel()
574
+ del state.run_tasks[grid_id]
575
+
576
+ del state.runs[grid_id]
577
+ if grid_id in state.run_created_at:
578
+ del state.run_created_at[grid_id]
579
+
580
+ return {"message": f"Grid {grid_id} deleted"}
581
+
582
+
583
+ # ============================================================================
584
+ # Grid Lifecycle Endpoints
585
+ # ============================================================================
586
+
587
+ @v1_router.post(
588
+ "/grids/{grid_id}/energize",
589
+ response_model=GridResponse,
590
+ tags=["Lifecycle"],
591
+ operation_id="energizeGrid",
592
+ summary="Energize a grid",
593
+ description="Initialize a grid with inputs. Transitions: DRAFT → READY → ENERGIZED.",
594
+ )
595
+ async def energize_grid(grid_id: str, request: GridInputsRequest):
596
+ """Energize a grid (initialize with inputs)."""
597
+ run = state.runs.get(grid_id)
598
+ if not run:
599
+ raise GridNotFoundError(grid_id)
600
+
601
+ if run.state not in (GridState.DRAFT,):
602
+ raise GridStateError(
603
+ f"Cannot energize grid in state '{run.state.value}'",
604
+ current_state=run.state.value,
605
+ expected_states=["draft"],
606
+ )
607
+
608
+ try:
609
+ run.context.inputs = request.inputs
610
+ await state.orchestrator.energize(run)
611
+
612
+ return GridResponse(
613
+ grid_id=run.grid.id,
614
+ name=run.grid.name,
615
+ state=run.state.value,
616
+ created_at=state.run_created_at.get(run.grid.id, datetime.now()),
617
+ message="Grid energized and ready to run",
618
+ )
619
+
620
+ except GridLifecycleError as e:
621
+ raise GridStateError(str(e), run.state.value, ["draft"])
622
+ except Exception as e:
623
+ logger.error(f"Failed to energize grid: {e}")
624
+ raise APIExecutionError(f"Failed to energize grid: {str(e)}")
625
+
626
+
627
+ @v1_router.post(
628
+ "/grids/{grid_id}/run",
629
+ response_model=GridStatusResponse,
630
+ tags=["Lifecycle"],
631
+ operation_id="runGrid",
632
+ summary="Run a grid",
633
+ description="Execute a grid. Use async_execution=true to run in background.",
634
+ )
635
+ async def run_grid(grid_id: str, request: GridRunRequest, background_tasks: BackgroundTasks):
636
+ """Run a grid to completion."""
637
+ run = state.runs.get(grid_id)
638
+ if not run:
639
+ raise GridNotFoundError(grid_id)
640
+
641
+ if request.inputs:
642
+ run.context.inputs.update(request.inputs)
643
+
644
+ # Auto-energize if still draft
645
+ if run.state == GridState.DRAFT:
646
+ try:
647
+ await state.orchestrator.energize(run)
648
+ except GridLifecycleError as e:
649
+ raise GridStateError(str(e), run.state.value, ["draft", "energized", "paused"])
650
+
651
+ if run.state not in (GridState.ENERGIZED, GridState.PAUSED):
652
+ raise GridStateError(
653
+ f"Cannot run grid in state '{run.state.value}'",
654
+ current_state=run.state.value,
655
+ expected_states=["energized", "paused"],
656
+ )
657
+
658
+ async def execute_grid():
659
+ try:
660
+ await state.orchestrator.execute(run)
661
+ except Exception as e:
662
+ logger.error(f"Grid execution failed: {e}")
663
+ run.error = str(e)
664
+
665
+ if request.async_execution:
666
+ task = asyncio.create_task(execute_grid())
667
+ state.run_tasks[grid_id] = task
668
+
669
+ return GridStatusResponse(
670
+ grid_id=run.grid.id,
671
+ name=run.grid.name,
672
+ state=run.state.value,
673
+ node_count=len(run.grid.nodes),
674
+ completed_nodes=0,
675
+ failed_nodes=0,
676
+ total_tokens=0,
677
+ total_cost=0.0,
678
+ elapsed_seconds=0.0,
679
+ )
680
+ else:
681
+ await execute_grid()
682
+
683
+ status = state.orchestrator.get_status(run)
684
+ return GridStatusResponse(
685
+ grid_id=run.grid.id,
686
+ name=run.grid.name,
687
+ state=run.state.value,
688
+ node_count=status.get("node_count", 0),
689
+ completed_nodes=status.get("completed_nodes", 0),
690
+ failed_nodes=status.get("failed_nodes", 0),
691
+ total_tokens=status.get("total_tokens", 0),
692
+ total_cost=status.get("total_cost", 0.0),
693
+ elapsed_seconds=status.get("elapsed_seconds", 0.0),
694
+ outputs=run.outputs,
695
+ error=run.error,
696
+ )
697
+
698
+
699
+ @v1_router.post(
700
+ "/grids/{grid_id}/pause",
701
+ response_model=GridResponse,
702
+ tags=["Lifecycle"],
703
+ operation_id="pauseGrid",
704
+ summary="Pause a grid",
705
+ description="Pause a running grid. Can be resumed later.",
706
+ )
707
+ async def pause_grid(grid_id: str):
708
+ """Pause a running grid."""
709
+ run = state.runs.get(grid_id)
710
+ if not run:
711
+ raise GridNotFoundError(grid_id)
712
+
713
+ if run.state != GridState.RUNNING:
714
+ raise GridStateError(
715
+ f"Cannot pause grid in state '{run.state.value}'",
716
+ current_state=run.state.value,
717
+ expected_states=["running"],
718
+ )
719
+
720
+ try:
721
+ await state.orchestrator.pause(run)
722
+ return GridResponse(
723
+ grid_id=run.grid.id,
724
+ name=run.grid.name,
725
+ state=run.state.value,
726
+ created_at=state.run_created_at.get(run.grid.id, datetime.now()),
727
+ message="Grid paused",
728
+ )
729
+ except GridLifecycleError as e:
730
+ raise GridStateError(str(e), run.state.value, ["running"])
731
+
732
+
733
+ @v1_router.post(
734
+ "/grids/{grid_id}/resume",
735
+ response_model=GridResponse,
736
+ tags=["Lifecycle"],
737
+ operation_id="resumeGrid",
738
+ summary="Resume a grid",
739
+ description="Resume a paused grid.",
740
+ )
741
+ async def resume_grid(grid_id: str):
742
+ """Resume a paused grid."""
743
+ run = state.runs.get(grid_id)
744
+ if not run:
745
+ raise GridNotFoundError(grid_id)
746
+
747
+ if run.state != GridState.PAUSED:
748
+ raise GridStateError(
749
+ f"Cannot resume grid in state '{run.state.value}'",
750
+ current_state=run.state.value,
751
+ expected_states=["paused"],
752
+ )
753
+
754
+ try:
755
+ await state.orchestrator.resume(run)
756
+ return GridResponse(
757
+ grid_id=run.grid.id,
758
+ name=run.grid.name,
759
+ state=run.state.value,
760
+ created_at=state.run_created_at.get(run.grid.id, datetime.now()),
761
+ message="Grid resumed",
762
+ )
763
+ except GridLifecycleError as e:
764
+ raise GridStateError(str(e), run.state.value, ["paused"])
765
+
766
+
767
+ @v1_router.post(
768
+ "/grids/{grid_id}/stop",
769
+ response_model=GridResponse,
770
+ tags=["Lifecycle"],
771
+ operation_id="stopGrid",
772
+ summary="Stop a grid",
773
+ description="Stop a running or paused grid. Cannot be resumed.",
774
+ )
775
+ async def stop_grid(grid_id: str):
776
+ """Stop a running grid."""
777
+ run = state.runs.get(grid_id)
778
+ if not run:
779
+ raise GridNotFoundError(grid_id)
780
+
781
+ if run.state not in (GridState.RUNNING, GridState.PAUSED, GridState.ENERGIZED):
782
+ raise GridStateError(
783
+ f"Cannot stop grid in state '{run.state.value}'",
784
+ current_state=run.state.value,
785
+ expected_states=["running", "paused", "energized"],
786
+ )
787
+
788
+ try:
789
+ if grid_id in state.run_tasks:
790
+ task = state.run_tasks[grid_id]
791
+ if not task.done():
792
+ task.cancel()
793
+
794
+ await state.orchestrator.stop(run)
795
+ return GridResponse(
796
+ grid_id=run.grid.id,
797
+ name=run.grid.name,
798
+ state=run.state.value,
799
+ created_at=state.run_created_at.get(run.grid.id, datetime.now()),
800
+ message="Grid stopped",
801
+ )
802
+ except GridLifecycleError as e:
803
+ raise GridStateError(str(e), run.state.value, ["running", "paused", "energized"])
804
+
805
+
806
+ # ============================================================================
807
+ # Node Inspection Endpoints
808
+ # ============================================================================
809
+
810
+ @v1_router.get(
811
+ "/grids/{grid_id}/nodes",
812
+ response_model=NodeListResponse,
813
+ tags=["Nodes"],
814
+ operation_id="listNodes",
815
+ summary="List nodes",
816
+ description="List all nodes in a grid with their current state.",
817
+ )
818
+ async def list_nodes(grid_id: str):
819
+ """List all nodes in a grid."""
820
+ run = state.runs.get(grid_id)
821
+ if not run:
822
+ raise GridNotFoundError(grid_id)
823
+
824
+ nodes = []
825
+ for node in run.grid.nodes:
826
+ node_exec = run.scheduler.nodes.get(node.id)
827
+ node_state = node_exec.state if node_exec else None
828
+ nodes.append(NodeInfo(
829
+ id=node.id,
830
+ kind=node.kind.value,
831
+ parent=node.parent,
832
+ state=node_state.value if node_state else "unknown",
833
+ has_output=node.id in run.context.outputs,
834
+ ))
835
+
836
+ return NodeListResponse(nodes=nodes, total=len(nodes))
837
+
838
+
839
+ @v1_router.get(
840
+ "/grids/{grid_id}/nodes/{node_id}",
841
+ response_model=NodeOutputResponse,
842
+ tags=["Nodes"],
843
+ operation_id="getNodeOutput",
844
+ summary="Get node output",
845
+ description="Get the output from a specific node.",
846
+ )
847
+ async def get_node_output(grid_id: str, node_id: str):
848
+ """Get output from a specific node."""
849
+ run = state.runs.get(grid_id)
850
+ if not run:
851
+ raise GridNotFoundError(grid_id)
852
+
853
+ node = next((n for n in run.grid.nodes if n.id == node_id), None)
854
+ if not node:
855
+ raise NodeNotFoundError(grid_id, node_id)
856
+
857
+ node_exec = run.scheduler.nodes.get(node_id)
858
+ node_state = node_exec.state if node_exec else None
859
+ output = run.context.outputs.get(node_id)
860
+
861
+ return NodeOutputResponse(
862
+ node_id=node_id,
863
+ kind=node.kind.value,
864
+ state=node_state.value if node_state else "unknown",
865
+ output=output,
866
+ )
867
+
868
+
869
+ # ============================================================================
870
+ # Event Streaming Endpoints
871
+ # ============================================================================
872
+
873
+ @v1_router.get(
874
+ "/grids/{grid_id}/events",
875
+ response_model=EventsResponse,
876
+ tags=["Events"],
877
+ operation_id="getGridEvents",
878
+ summary="Get grid events",
879
+ description="Poll for execution events. Use for tracking async operation progress.",
880
+ )
881
+ async def get_grid_events(
882
+ grid_id: str,
883
+ after: Optional[str] = Query(None, description="Return events after this event ID"),
884
+ types: Optional[str] = Query(None, description="Comma-separated event types to filter"),
885
+ node_id: Optional[str] = Query(None, description="Filter events by node ID"),
886
+ limit: int = Query(50, ge=1, le=200, description="Maximum events to return"),
887
+ ):
888
+ """Get execution events for a grid.
889
+
890
+ Use this endpoint to:
891
+ - Poll for progress on async executions
892
+ - Get detailed execution trace
893
+ - Monitor node-by-node execution
894
+
895
+ For long-polling, pass `after` with the last event ID you received.
896
+ """
897
+ run = state.runs.get(grid_id)
898
+ if not run:
899
+ raise GridNotFoundError(grid_id)
900
+
901
+ # Parse types filter
902
+ type_filter = None
903
+ if types:
904
+ try:
905
+ type_filter = [EventType(t.strip()) for t in types.split(",")]
906
+ except ValueError as e:
907
+ raise ValidationError(f"Invalid event type: {e}")
908
+
909
+ # Build filter
910
+ filter = EventFilter(
911
+ types=type_filter,
912
+ node_id=node_id,
913
+ after=after,
914
+ limit=limit,
915
+ )
916
+
917
+ # Get events
918
+ events, has_more, next_cursor = await event_manager.get_events(grid_id, filter)
919
+ store = await event_manager.get_store(grid_id)
920
+ total = await store.count()
921
+
922
+ # Calculate progress
923
+ status = state.orchestrator.get_status(run)
924
+ total_nodes = status.get("node_count", 1)
925
+ completed_nodes = status.get("completed_nodes", 0)
926
+ progress = completed_nodes / total_nodes if total_nodes > 0 else 0.0
927
+
928
+ return EventsResponse(
929
+ grid_id=grid_id,
930
+ events=events,
931
+ total=total,
932
+ has_more=has_more,
933
+ next_cursor=next_cursor,
934
+ state=run.state.value,
935
+ progress=progress,
936
+ completed_nodes=completed_nodes,
937
+ total_nodes=total_nodes,
938
+ )
939
+
940
+
941
+ class EventTypesResponse(BaseModel):
942
+ """List of available event types."""
943
+ types: List[str]
944
+
945
+
946
+ @v1_router.get(
947
+ "/events/types",
948
+ response_model=EventTypesResponse,
949
+ tags=["Events"],
950
+ operation_id="listEventTypes",
951
+ summary="List event types",
952
+ description="Get list of all possible event types for filtering.",
953
+ )
954
+ async def list_event_types():
955
+ """List all available event types."""
956
+ return EventTypesResponse(
957
+ types=[e.value for e in EventType]
958
+ )
959
+
960
+
961
+ # ============================================================================
962
+ # Register versioned router
963
+ # ============================================================================
964
+
965
+ app.include_router(v1_router)
966
+
967
+ # Also mount at root for backward compatibility (deprecated)
968
+ # TODO: Remove in v2
969
+ for route in v1_router.routes:
970
+ if hasattr(route, 'path') and route.path not in ['/', '/health']:
971
+ # Skip re-mounting health and root
972
+ pass
973
+
974
+
975
+ # ============================================================================
976
+ # Server Runner
977
+ # ============================================================================
978
+
979
+ def run_server(host: str = "0.0.0.0", port: int = 8080, reload: bool = False):
980
+ """Run the API server."""
981
+ import uvicorn
982
+ uvicorn.run(
983
+ "smartify.api.server:app",
984
+ host=host,
985
+ port=port,
986
+ reload=reload,
987
+ log_level="info",
988
+ )
989
+
990
+
991
+ if __name__ == "__main__":
992
+ run_server()