flowyml 1.7.2__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. flowyml/assets/base.py +15 -0
  2. flowyml/assets/metrics.py +5 -0
  3. flowyml/cli/main.py +709 -0
  4. flowyml/cli/stack_cli.py +138 -25
  5. flowyml/core/__init__.py +17 -0
  6. flowyml/core/executor.py +161 -26
  7. flowyml/core/image_builder.py +129 -0
  8. flowyml/core/log_streamer.py +227 -0
  9. flowyml/core/orchestrator.py +22 -2
  10. flowyml/core/pipeline.py +34 -10
  11. flowyml/core/routing.py +558 -0
  12. flowyml/core/step.py +9 -1
  13. flowyml/core/step_grouping.py +49 -35
  14. flowyml/core/types.py +407 -0
  15. flowyml/monitoring/alerts.py +10 -0
  16. flowyml/monitoring/notifications.py +104 -25
  17. flowyml/monitoring/slack_blocks.py +323 -0
  18. flowyml/plugins/__init__.py +251 -0
  19. flowyml/plugins/alerters/__init__.py +1 -0
  20. flowyml/plugins/alerters/slack.py +168 -0
  21. flowyml/plugins/base.py +752 -0
  22. flowyml/plugins/config.py +478 -0
  23. flowyml/plugins/deployers/__init__.py +22 -0
  24. flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
  25. flowyml/plugins/deployers/sagemaker.py +306 -0
  26. flowyml/plugins/deployers/vertex.py +290 -0
  27. flowyml/plugins/integration.py +369 -0
  28. flowyml/plugins/manager.py +510 -0
  29. flowyml/plugins/model_registries/__init__.py +22 -0
  30. flowyml/plugins/model_registries/mlflow.py +159 -0
  31. flowyml/plugins/model_registries/sagemaker.py +489 -0
  32. flowyml/plugins/model_registries/vertex.py +386 -0
  33. flowyml/plugins/orchestrators/__init__.py +13 -0
  34. flowyml/plugins/orchestrators/sagemaker.py +443 -0
  35. flowyml/plugins/orchestrators/vertex_ai.py +461 -0
  36. flowyml/plugins/registries/__init__.py +13 -0
  37. flowyml/plugins/registries/ecr.py +321 -0
  38. flowyml/plugins/registries/gcr.py +313 -0
  39. flowyml/plugins/registry.py +454 -0
  40. flowyml/plugins/stack.py +494 -0
  41. flowyml/plugins/stack_config.py +537 -0
  42. flowyml/plugins/stores/__init__.py +13 -0
  43. flowyml/plugins/stores/gcs.py +460 -0
  44. flowyml/plugins/stores/s3.py +453 -0
  45. flowyml/plugins/trackers/__init__.py +11 -0
  46. flowyml/plugins/trackers/mlflow.py +316 -0
  47. flowyml/plugins/validators/__init__.py +3 -0
  48. flowyml/plugins/validators/deepchecks.py +119 -0
  49. flowyml/registry/__init__.py +2 -1
  50. flowyml/registry/model_environment.py +109 -0
  51. flowyml/registry/model_registry.py +241 -96
  52. flowyml/serving/__init__.py +17 -0
  53. flowyml/serving/model_server.py +628 -0
  54. flowyml/stacks/__init__.py +60 -0
  55. flowyml/stacks/aws.py +93 -0
  56. flowyml/stacks/base.py +62 -0
  57. flowyml/stacks/components.py +12 -0
  58. flowyml/stacks/gcp.py +44 -9
  59. flowyml/stacks/plugins.py +115 -0
  60. flowyml/stacks/registry.py +2 -1
  61. flowyml/storage/sql.py +401 -12
  62. flowyml/tracking/experiment.py +8 -5
  63. flowyml/ui/backend/Dockerfile +87 -16
  64. flowyml/ui/backend/auth.py +12 -2
  65. flowyml/ui/backend/main.py +149 -5
  66. flowyml/ui/backend/routers/ai_context.py +226 -0
  67. flowyml/ui/backend/routers/assets.py +23 -4
  68. flowyml/ui/backend/routers/auth.py +96 -0
  69. flowyml/ui/backend/routers/deployments.py +660 -0
  70. flowyml/ui/backend/routers/model_explorer.py +597 -0
  71. flowyml/ui/backend/routers/plugins.py +103 -51
  72. flowyml/ui/backend/routers/projects.py +91 -8
  73. flowyml/ui/backend/routers/runs.py +20 -1
  74. flowyml/ui/backend/routers/schedules.py +22 -17
  75. flowyml/ui/backend/routers/templates.py +319 -0
  76. flowyml/ui/backend/routers/websocket.py +2 -2
  77. flowyml/ui/frontend/Dockerfile +55 -6
  78. flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
  79. flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
  80. flowyml/ui/frontend/dist/index.html +2 -2
  81. flowyml/ui/frontend/dist/logo.png +0 -0
  82. flowyml/ui/frontend/nginx.conf +65 -4
  83. flowyml/ui/frontend/package-lock.json +1404 -74
  84. flowyml/ui/frontend/package.json +3 -0
  85. flowyml/ui/frontend/public/logo.png +0 -0
  86. flowyml/ui/frontend/src/App.jsx +10 -7
  87. flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
  88. flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
  89. flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
  90. flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
  91. flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
  92. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
  93. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +36 -24
  94. flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
  95. flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
  96. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +29 -7
  97. flowyml/ui/frontend/src/components/Layout.jsx +6 -0
  98. flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
  99. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
  100. flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
  101. flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
  102. flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
  103. flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
  104. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
  105. flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
  106. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
  107. flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
  108. flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
  109. flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
  110. flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
  111. flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
  112. flowyml/ui/frontend/src/router/index.jsx +47 -20
  113. flowyml/ui/frontend/src/services/pluginService.js +3 -1
  114. flowyml/ui/server_manager.py +5 -5
  115. flowyml/ui/utils.py +157 -39
  116. flowyml/utils/config.py +37 -15
  117. flowyml/utils/model_introspection.py +123 -0
  118. flowyml/utils/observability.py +30 -0
  119. flowyml-1.8.0.dist-info/METADATA +174 -0
  120. {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/RECORD +123 -65
  121. {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
  122. flowyml/ui/frontend/dist/assets/index-B40RsQDq.css +0 -1
  123. flowyml/ui/frontend/dist/assets/index-CjI0zKCn.js +0 -685
  124. flowyml-1.7.2.dist-info/METADATA +0 -477
  125. {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
  126. {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,660 @@
1
+ """Deployment management API for model serving."""
2
+ from fastapi import APIRouter, HTTPException, BackgroundTasks
3
+ from pydantic import BaseModel, Field
4
+ from datetime import datetime
5
+ from uuid import uuid4
6
+ import secrets
7
+
8
+ router = APIRouter(prefix="/deployments", tags=["deployments"])
9
+
10
+
11
+ # ==================== Schemas ====================
12
+
13
+ # Common ML framework dependencies that can be installed on-demand
14
+ ML_DEPENDENCIES = {
15
+ "keras": ["keras", "tensorflow"],
16
+ "tensorflow": ["tensorflow"],
17
+ "pytorch": ["torch", "torchvision"],
18
+ "sklearn": ["scikit-learn"],
19
+ "xgboost": ["xgboost"],
20
+ "lightgbm": ["lightgbm"],
21
+ "onnx": ["onnx", "onnxruntime"],
22
+ }
23
+
24
+
25
+ class DeploymentConfig(BaseModel):
26
+ """Configuration for a model deployment."""
27
+
28
+ rate_limit: int = Field(default=100, description="Requests per minute")
29
+ timeout_seconds: int = Field(default=30, description="Request timeout")
30
+ max_batch_size: int = Field(default=1, description="Max batch size for predictions")
31
+ enable_cors: bool = Field(default=True, description="Enable CORS")
32
+ ttl_seconds: int | None = Field(None, description="Auto-destroy after N seconds (None = never)")
33
+ install_dependencies: list[str] = Field(
34
+ default_factory=list,
35
+ description="ML dependencies to install on server (e.g., ['keras', 'sklearn'])",
36
+ )
37
+
38
+
39
+ class DeploymentCreate(BaseModel):
40
+ """Request to create a new deployment."""
41
+
42
+ name: str = Field(..., description="Human-readable name for the deployment")
43
+ model_artifact_id: str = Field(..., description="ID of the model artifact to deploy")
44
+ model_version: str | None = Field(None, description="Specific version to deploy")
45
+ port: int | None = Field(None, description="Port to serve on (auto-assigned if not provided)")
46
+ config: DeploymentConfig = Field(default_factory=DeploymentConfig)
47
+
48
+
49
+ class DeploymentResponse(BaseModel):
50
+ """Deployment details response."""
51
+
52
+ id: str # noqa: A003
53
+ name: str
54
+ model_artifact_id: str
55
+ model_version: str | None
56
+ status: str # pending, starting, running, stopping, stopped, error
57
+ port: int
58
+ api_token: str
59
+ endpoint_url: str
60
+ config: DeploymentConfig
61
+ created_at: str
62
+ started_at: str | None
63
+ stopped_at: str | None
64
+ expires_at: str | None = None
65
+ error_message: str | None = None
66
+
67
+
68
+ class PredictRequest(BaseModel):
69
+ """Prediction request for deployed model."""
70
+
71
+ data: dict = Field(..., description="Input data for prediction")
72
+
73
+
74
+ class PredictResponse(BaseModel):
75
+ """Prediction response from deployed model."""
76
+
77
+ prediction: dict
78
+ latency_ms: float
79
+ model_version: str
80
+
81
+
82
+ # ==================== In-Memory State (for MVP) ====================
83
+ # TODO: Move to database for production
84
+
85
+ _deployments: dict[str, dict] = {}
86
+ _next_port = 9000 # Start port allocation from 9000
87
+
88
+
89
+ def _generate_token() -> str:
90
+ """Generate a secure API token."""
91
+ return f"flowy_{secrets.token_urlsafe(32)}"
92
+
93
+
94
+ def _allocate_port() -> int:
95
+ """Allocate the next available port."""
96
+ global _next_port
97
+ port = _next_port
98
+ _next_port += 1
99
+ return port
100
+
101
+
102
+ # ==================== Endpoints ====================
103
+
104
+
105
+ @router.get("/")
106
+ async def list_deployments() -> list[DeploymentResponse]:
107
+ """List all deployments."""
108
+ return [DeploymentResponse(**d) for d in _deployments.values()]
109
+
110
+
111
+ @router.post("/", status_code=201)
112
+ async def create_deployment(
113
+ request: DeploymentCreate,
114
+ background_tasks: BackgroundTasks,
115
+ ) -> DeploymentResponse:
116
+ """Create a new model deployment."""
117
+ from datetime import timedelta
118
+
119
+ deployment_id = str(uuid4())
120
+ port = request.port or _allocate_port()
121
+ api_token = _generate_token()
122
+
123
+ # Calculate expiry time if TTL is set
124
+ created_at = datetime.now()
125
+ expires_at = None
126
+ ttl_seconds = request.config.ttl_seconds
127
+ if ttl_seconds and ttl_seconds > 0:
128
+ expires_at = (created_at + timedelta(seconds=ttl_seconds)).isoformat()
129
+
130
+ deployment = {
131
+ "id": deployment_id,
132
+ "name": request.name,
133
+ "model_artifact_id": request.model_artifact_id,
134
+ "model_version": request.model_version,
135
+ "status": "pending",
136
+ "port": port,
137
+ "api_token": api_token,
138
+ "endpoint_url": f"http://localhost:{port}",
139
+ "config": request.config.model_dump(),
140
+ "created_at": created_at.isoformat(),
141
+ "started_at": None,
142
+ "stopped_at": None,
143
+ "expires_at": expires_at,
144
+ "error_message": None,
145
+ }
146
+
147
+ _deployments[deployment_id] = deployment
148
+
149
+ # Start the deployment in background
150
+ background_tasks.add_task(_start_deployment, deployment_id)
151
+
152
+ # Schedule auto-expiry if TTL is set
153
+ if ttl_seconds and ttl_seconds > 0:
154
+ background_tasks.add_task(_monitor_expiry, deployment_id, ttl_seconds)
155
+
156
+ return DeploymentResponse(**deployment)
157
+
158
+
159
+ @router.get("/available-models")
160
+ async def get_available_models() -> list[dict]:
161
+ """Get list of models available for deployment."""
162
+ # Import here to avoid circular imports
163
+ from flowyml.ui.backend.dependencies import get_store
164
+ import os
165
+
166
+ store = get_store()
167
+ try:
168
+ # Get all artifacts (assets)
169
+ artifacts = store.list_assets()
170
+
171
+ # Model-related type keywords
172
+ model_keywords = (
173
+ "model",
174
+ "keras",
175
+ "sklearn",
176
+ "pytorch",
177
+ "tensorflow",
178
+ "xgboost",
179
+ "lightgbm",
180
+ "catboost",
181
+ "onnx",
182
+ "joblib",
183
+ "pickle",
184
+ "h5",
185
+ "saved_model",
186
+ "nn",
187
+ "classifier",
188
+ "regressor",
189
+ )
190
+
191
+ # First pass: look for explicitly model-typed artifacts
192
+ models = []
193
+ for a in artifacts:
194
+ # Asset structure uses 'type' not 'asset_type'
195
+ asset_type = (a.get("type") or "").lower()
196
+ name = (a.get("name") or "").lower()
197
+ path = a.get("path")
198
+
199
+ # Check if this looks like a model
200
+ is_model = any(kw in asset_type for kw in model_keywords) or any(kw in name for kw in model_keywords)
201
+
202
+ # Skip if no file path (inline values can't be deployed as model servers)
203
+ has_file = bool(path)
204
+ file_exists = False
205
+ if has_file:
206
+ # Check if file exists (in container, paths are relative to artifacts dir)
207
+ full_path = os.path.join("/app/artifacts", path)
208
+ file_exists = os.path.exists(full_path)
209
+
210
+ if is_model:
211
+ # Generate artifact_id if not present
212
+ artifact_id = a.get("artifact_id") or f"{a.get('run_id')}_{a.get('step')}_{a.get('name')}"
213
+ models.append(
214
+ {
215
+ "artifact_id": artifact_id,
216
+ "name": a.get("name"),
217
+ "version": a.get("version"),
218
+ "type": a.get("type") or "model",
219
+ "created_at": a.get("created_at"),
220
+ "run_id": a.get("run_id"),
221
+ "project": a.get("project"),
222
+ "has_file": has_file,
223
+ "file_exists": file_exists,
224
+ "path": path,
225
+ },
226
+ )
227
+
228
+ # If no models found, return all artifacts with paths as potential models
229
+ if not models and artifacts:
230
+ models = [
231
+ {
232
+ "artifact_id": a.get("artifact_id") or f"{a.get('run_id')}_{a.get('step')}_{a.get('name')}",
233
+ "name": a.get("name"),
234
+ "version": a.get("version"),
235
+ "type": a.get("type") or "unknown",
236
+ "created_at": a.get("created_at"),
237
+ "run_id": a.get("run_id"),
238
+ "project": a.get("project"),
239
+ "has_file": bool(a.get("path")),
240
+ "file_exists": os.path.exists(os.path.join("/app/artifacts", a.get("path") or ""))
241
+ if a.get("path")
242
+ else False,
243
+ "path": a.get("path"),
244
+ }
245
+ for a in artifacts
246
+ ]
247
+
248
+ return models
249
+ except Exception:
250
+ # Return empty list on error
251
+ return []
252
+
253
+
254
+ @router.get("/{deployment_id}")
255
+ async def get_deployment(deployment_id: str) -> DeploymentResponse:
256
+ """Get deployment details."""
257
+ if deployment_id not in _deployments:
258
+ raise HTTPException(status_code=404, detail="Deployment not found")
259
+ return DeploymentResponse(**_deployments[deployment_id])
260
+
261
+
262
+ @router.delete("/{deployment_id}")
263
+ async def delete_deployment(
264
+ deployment_id: str,
265
+ background_tasks: BackgroundTasks,
266
+ ) -> dict:
267
+ """Stop and delete a deployment."""
268
+ if deployment_id not in _deployments:
269
+ raise HTTPException(status_code=404, detail="Deployment not found")
270
+
271
+ deployment = _deployments[deployment_id]
272
+ deployment["status"] = "stopping"
273
+
274
+ # Stop in background
275
+ background_tasks.add_task(_stop_deployment, deployment_id)
276
+
277
+ return {"status": "stopping", "id": deployment_id}
278
+
279
+
280
+ @router.post("/{deployment_id}/start")
281
+ async def start_deployment(
282
+ deployment_id: str,
283
+ background_tasks: BackgroundTasks,
284
+ ) -> dict:
285
+ """Start a stopped deployment."""
286
+ if deployment_id not in _deployments:
287
+ raise HTTPException(status_code=404, detail="Deployment not found")
288
+
289
+ deployment = _deployments[deployment_id]
290
+ if deployment["status"] == "running":
291
+ raise HTTPException(status_code=400, detail="Deployment already running")
292
+
293
+ deployment["status"] = "starting"
294
+ background_tasks.add_task(_start_deployment, deployment_id)
295
+
296
+ return {"status": "starting", "id": deployment_id}
297
+
298
+
299
+ @router.post("/{deployment_id}/stop")
300
+ async def stop_deployment(
301
+ deployment_id: str,
302
+ background_tasks: BackgroundTasks,
303
+ ) -> dict:
304
+ """Stop a running deployment."""
305
+ if deployment_id not in _deployments:
306
+ raise HTTPException(status_code=404, detail="Deployment not found")
307
+
308
+ deployment = _deployments[deployment_id]
309
+ if deployment["status"] != "running":
310
+ raise HTTPException(status_code=400, detail="Deployment not running")
311
+
312
+ deployment["status"] = "stopping"
313
+ background_tasks.add_task(_stop_deployment, deployment_id)
314
+
315
+ return {"status": "stopping", "id": deployment_id}
316
+
317
+
318
+ @router.get("/{deployment_id}/logs")
319
+ async def get_deployment_logs(
320
+ deployment_id: str,
321
+ lines: int = 100,
322
+ ) -> dict:
323
+ """Get deployment logs."""
324
+ if deployment_id not in _deployments:
325
+ raise HTTPException(status_code=404, detail="Deployment not found")
326
+
327
+ deployment = _deployments[deployment_id]
328
+
329
+ # Try to get real logs from model server
330
+ try:
331
+ from flowyml.serving.model_server import get_server_logs
332
+
333
+ logs = get_server_logs(deployment_id, lines)
334
+ if logs:
335
+ return {
336
+ "deployment_id": deployment_id,
337
+ "logs": logs,
338
+ }
339
+ except Exception:
340
+ pass
341
+
342
+ # Fallback to basic status logs
343
+ return {
344
+ "deployment_id": deployment_id,
345
+ "logs": [
346
+ {
347
+ "timestamp": deployment.get("created_at", datetime.now().isoformat()),
348
+ "level": "INFO",
349
+ "message": f"Deployment '{deployment['name']}' created",
350
+ },
351
+ {
352
+ "timestamp": deployment.get("started_at") or datetime.now().isoformat(),
353
+ "level": "INFO",
354
+ "message": f"Model {deployment['model_artifact_id']} loaded",
355
+ },
356
+ {
357
+ "timestamp": deployment.get("started_at") or datetime.now().isoformat(),
358
+ "level": "INFO",
359
+ "message": f"Server configured on port {deployment['port']}",
360
+ },
361
+ {
362
+ "timestamp": datetime.now().isoformat(),
363
+ "level": "INFO",
364
+ "message": f"Current status: {deployment['status']}",
365
+ },
366
+ ],
367
+ }
368
+
369
+
370
+ @router.post("/{deployment_id}/test")
371
+ async def test_deployment(
372
+ deployment_id: str,
373
+ request: PredictRequest,
374
+ ) -> PredictResponse:
375
+ """Test a deployed model with sample input."""
376
+ if deployment_id not in _deployments:
377
+ raise HTTPException(status_code=404, detail="Deployment not found")
378
+
379
+ deployment = _deployments[deployment_id]
380
+ if deployment["status"] != "running":
381
+ raise HTTPException(status_code=400, detail="Deployment not running")
382
+
383
+ import time
384
+
385
+ start = time.time()
386
+
387
+ try:
388
+ # Use real model server prediction
389
+ from flowyml.serving.model_server import predict, get_server
390
+
391
+ server = get_server(deployment_id)
392
+ if server is None:
393
+ raise HTTPException(status_code=500, detail="Model server not available")
394
+
395
+ # Run prediction
396
+ import asyncio
397
+
398
+ loop = asyncio.get_event_loop()
399
+ result = await loop.run_in_executor(
400
+ None,
401
+ lambda: predict(deployment_id, request.data),
402
+ )
403
+
404
+ latency = (time.time() - start) * 1000
405
+
406
+ return PredictResponse(
407
+ prediction=result,
408
+ latency_ms=latency,
409
+ model_version=deployment["model_version"] or "latest",
410
+ )
411
+
412
+ except ValueError as e:
413
+ raise HTTPException(status_code=404, detail=str(e))
414
+ except RuntimeError as e:
415
+ raise HTTPException(status_code=500, detail=str(e))
416
+ except Exception as e:
417
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
418
+
419
+
420
+ # ==================== Background Tasks ====================
421
+
422
+ _server_processes: dict[str, any] = {}
423
+
424
+
425
+ async def _start_deployment(deployment_id: str):
426
+ """Start the model server for a deployment."""
427
+ import asyncio
428
+
429
+ if deployment_id not in _deployments:
430
+ return
431
+
432
+ deployment = _deployments[deployment_id]
433
+
434
+ try:
435
+ deployment["status"] = "starting"
436
+
437
+ # Import the real model server
438
+ from flowyml.serving.model_server import (
439
+ start_model_server,
440
+ ServerConfig,
441
+ )
442
+
443
+ # Create server config from deployment config
444
+ config = ServerConfig(
445
+ port=deployment["port"],
446
+ api_token=deployment["api_token"],
447
+ rate_limit=deployment["config"].get("rate_limit", 100),
448
+ timeout_seconds=deployment["config"].get("timeout_seconds", 30),
449
+ max_batch_size=deployment["config"].get("max_batch_size", 1),
450
+ enable_cors=deployment["config"].get("enable_cors", True),
451
+ )
452
+
453
+ # Start the model server (this loads the model)
454
+ # Run in executor to avoid blocking
455
+ loop = asyncio.get_event_loop()
456
+ server = await loop.run_in_executor(
457
+ None,
458
+ lambda: start_model_server(
459
+ deployment_id=deployment_id,
460
+ model_artifact_id=deployment["model_artifact_id"],
461
+ config=config,
462
+ ),
463
+ )
464
+
465
+ _server_processes[deployment_id] = server
466
+
467
+ deployment["status"] = "running"
468
+ deployment["started_at"] = datetime.now().isoformat()
469
+ deployment["error_message"] = None
470
+
471
+ except Exception as e:
472
+ deployment["status"] = "error"
473
+ deployment["error_message"] = str(e)
474
+
475
+
476
+ async def _stop_deployment(deployment_id: str):
477
+ """Stop the model server for a deployment."""
478
+ import asyncio
479
+
480
+ if deployment_id not in _deployments:
481
+ return
482
+
483
+ deployment = _deployments[deployment_id]
484
+
485
+ try:
486
+ # Import the real model server
487
+ from flowyml.serving.model_server import stop_model_server
488
+
489
+ # Stop the server (this cleans up loaded models)
490
+ loop = asyncio.get_event_loop()
491
+ await loop.run_in_executor(
492
+ None,
493
+ lambda: stop_model_server(deployment_id),
494
+ )
495
+
496
+ # Clean up local reference
497
+ if deployment_id in _server_processes:
498
+ del _server_processes[deployment_id]
499
+
500
+ deployment["status"] = "stopped"
501
+ deployment["stopped_at"] = datetime.now().isoformat()
502
+
503
+ # Remove from deployments on delete
504
+ if deployment.get("_pending_delete"):
505
+ del _deployments[deployment_id]
506
+
507
+ except Exception as e:
508
+ deployment["status"] = "error"
509
+ deployment["error_message"] = str(e)
510
+
511
+
512
+ async def _monitor_expiry(deployment_id: str, ttl_seconds: int):
513
+ """Monitor deployment and auto-stop after TTL expires."""
514
+ import asyncio
515
+
516
+ # Wait for TTL duration
517
+ await asyncio.sleep(ttl_seconds)
518
+
519
+ # Check if deployment still exists and is running
520
+ if deployment_id not in _deployments:
521
+ return
522
+
523
+ deployment = _deployments[deployment_id]
524
+
525
+ # Only stop if still running
526
+ if deployment["status"] == "running":
527
+ deployment["status"] = "stopping"
528
+
529
+ # Add expiry reason to logs
530
+ try:
531
+ from flowyml.serving.model_server import get_server
532
+
533
+ server = get_server(deployment_id)
534
+ if server:
535
+ server.log_buffer.append(
536
+ {
537
+ "timestamp": datetime.now().isoformat(),
538
+ "level": "INFO",
539
+ "message": f"Auto-stopping: TTL of {ttl_seconds}s expired",
540
+ },
541
+ )
542
+ except Exception:
543
+ pass
544
+
545
+ # Stop the deployment
546
+ await _stop_deployment(deployment_id)
547
+
548
+
549
+ # ==================== Dependency Installation ====================
550
+
551
+
552
+ class InstallDependenciesRequest(BaseModel):
553
+ """Request to install ML framework dependencies."""
554
+
555
+ frameworks: list[str] = Field(
556
+ ...,
557
+ description="List of frameworks to install (keras, tensorflow, pytorch, sklearn, etc.)",
558
+ )
559
+
560
+
561
+ @router.get("/dependencies/available")
562
+ async def list_available_dependencies() -> dict:
563
+ """List available ML framework dependencies that can be installed."""
564
+ return {
565
+ "available": ML_DEPENDENCIES,
566
+ "description": "Pass framework keys to the install endpoint to install dependencies",
567
+ }
568
+
569
+
570
+ @router.post("/dependencies/install")
571
+ async def install_dependencies(
572
+ request: InstallDependenciesRequest,
573
+ background_tasks: BackgroundTasks,
574
+ ) -> dict:
575
+ """Install ML framework dependencies on the server.
576
+
577
+ This lightweight approach allows deploying Keras/TensorFlow/PyTorch models
578
+ without needing a heavy Triton Inference Server container.
579
+ """
580
+
581
+ # Collect all packages to install
582
+ packages = []
583
+ for framework in request.frameworks:
584
+ framework_lower = framework.lower()
585
+ if framework_lower in ML_DEPENDENCIES:
586
+ packages.extend(ML_DEPENDENCIES[framework_lower])
587
+ else:
588
+ # Allow direct package names too
589
+ packages.append(framework)
590
+
591
+ if not packages:
592
+ raise HTTPException(status_code=400, detail="No valid frameworks specified")
593
+
594
+ # Deduplicate
595
+ packages = list(set(packages))
596
+
597
+ # Queue the installation in background
598
+ background_tasks.add_task(_install_packages_sync, packages)
599
+
600
+ return {
601
+ "status": "installing",
602
+ "packages": packages,
603
+ "message": f"Installing {len(packages)} package(s) in background",
604
+ }
605
+
606
+
607
+ def _install_packages_sync(packages: list[str]):
608
+ """Background task to install packages via pip."""
609
+ import subprocess
610
+ import logging
611
+
612
+ logger = logging.getLogger(__name__)
613
+
614
+ for package in packages:
615
+ try:
616
+ logger.info(f"Installing {package}...")
617
+ result = subprocess.run(
618
+ ["pip", "install", package],
619
+ capture_output=True,
620
+ text=True,
621
+ timeout=300, # 5 min timeout per package
622
+ )
623
+ if result.returncode == 0:
624
+ logger.info(f"Successfully installed {package}")
625
+ else:
626
+ logger.warning(f"Failed to install {package}: {result.stderr}")
627
+ except Exception as e:
628
+ logger.error(f"Error installing {package}: {e}")
629
+
630
+
631
+ @router.get("/dependencies/status")
632
+ async def check_installed_dependencies() -> dict:
633
+ """Check which ML frameworks are currently installed."""
634
+ import importlib.util
635
+
636
+ installed = {}
637
+ checks = {
638
+ "keras": "keras",
639
+ "tensorflow": "tensorflow",
640
+ "pytorch": "torch",
641
+ "sklearn": "sklearn",
642
+ "xgboost": "xgboost",
643
+ "lightgbm": "lightgbm",
644
+ "onnx": "onnx",
645
+ "onnxruntime": "onnxruntime",
646
+ "numpy": "numpy",
647
+ "pandas": "pandas",
648
+ }
649
+
650
+ for name, module in checks.items():
651
+ try:
652
+ spec = importlib.util.find_spec(module)
653
+ installed[name] = spec is not None
654
+ except (ImportError, ModuleNotFoundError):
655
+ installed[name] = False
656
+
657
+ return {
658
+ "installed": installed,
659
+ "ready_frameworks": [k for k, v in installed.items() if v],
660
+ }