flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/metrics.py +5 -0
- flowyml/assets/model.py +1052 -15
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +231 -37
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +59 -4
- flowyml/core/pipeline.py +65 -13
- flowyml/core/routing.py +558 -0
- flowyml/core/scheduler.py +88 -5
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/integrations/keras.py +247 -82
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +132 -1
- flowyml/ui/backend/routers/schedules.py +54 -29
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1415 -74
- flowyml/ui/frontend/package.json +4 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml-1.7.1.dist-info/METADATA +0 -477
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,660 @@
|
|
|
1
|
+
"""Deployment management API for model serving."""
|
|
2
|
+
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
import secrets
|
|
7
|
+
|
|
8
|
+
router = APIRouter(prefix="/deployments", tags=["deployments"])
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ==================== Schemas ====================
|
|
12
|
+
|
|
13
|
+
# Common ML framework dependencies that can be installed on-demand
|
|
14
|
+
ML_DEPENDENCIES = {
|
|
15
|
+
"keras": ["keras", "tensorflow"],
|
|
16
|
+
"tensorflow": ["tensorflow"],
|
|
17
|
+
"pytorch": ["torch", "torchvision"],
|
|
18
|
+
"sklearn": ["scikit-learn"],
|
|
19
|
+
"xgboost": ["xgboost"],
|
|
20
|
+
"lightgbm": ["lightgbm"],
|
|
21
|
+
"onnx": ["onnx", "onnxruntime"],
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DeploymentConfig(BaseModel):
|
|
26
|
+
"""Configuration for a model deployment."""
|
|
27
|
+
|
|
28
|
+
rate_limit: int = Field(default=100, description="Requests per minute")
|
|
29
|
+
timeout_seconds: int = Field(default=30, description="Request timeout")
|
|
30
|
+
max_batch_size: int = Field(default=1, description="Max batch size for predictions")
|
|
31
|
+
enable_cors: bool = Field(default=True, description="Enable CORS")
|
|
32
|
+
ttl_seconds: int | None = Field(None, description="Auto-destroy after N seconds (None = never)")
|
|
33
|
+
install_dependencies: list[str] = Field(
|
|
34
|
+
default_factory=list,
|
|
35
|
+
description="ML dependencies to install on server (e.g., ['keras', 'sklearn'])",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DeploymentCreate(BaseModel):
|
|
40
|
+
"""Request to create a new deployment."""
|
|
41
|
+
|
|
42
|
+
name: str = Field(..., description="Human-readable name for the deployment")
|
|
43
|
+
model_artifact_id: str = Field(..., description="ID of the model artifact to deploy")
|
|
44
|
+
model_version: str | None = Field(None, description="Specific version to deploy")
|
|
45
|
+
port: int | None = Field(None, description="Port to serve on (auto-assigned if not provided)")
|
|
46
|
+
config: DeploymentConfig = Field(default_factory=DeploymentConfig)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DeploymentResponse(BaseModel):
|
|
50
|
+
"""Deployment details response."""
|
|
51
|
+
|
|
52
|
+
id: str # noqa: A003
|
|
53
|
+
name: str
|
|
54
|
+
model_artifact_id: str
|
|
55
|
+
model_version: str | None
|
|
56
|
+
status: str # pending, starting, running, stopping, stopped, error
|
|
57
|
+
port: int
|
|
58
|
+
api_token: str
|
|
59
|
+
endpoint_url: str
|
|
60
|
+
config: DeploymentConfig
|
|
61
|
+
created_at: str
|
|
62
|
+
started_at: str | None
|
|
63
|
+
stopped_at: str | None
|
|
64
|
+
expires_at: str | None = None
|
|
65
|
+
error_message: str | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class PredictRequest(BaseModel):
|
|
69
|
+
"""Prediction request for deployed model."""
|
|
70
|
+
|
|
71
|
+
data: dict = Field(..., description="Input data for prediction")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class PredictResponse(BaseModel):
|
|
75
|
+
"""Prediction response from deployed model."""
|
|
76
|
+
|
|
77
|
+
prediction: dict
|
|
78
|
+
latency_ms: float
|
|
79
|
+
model_version: str
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ==================== In-Memory State (for MVP) ====================
|
|
83
|
+
# TODO: Move to database for production
|
|
84
|
+
|
|
85
|
+
_deployments: dict[str, dict] = {}
|
|
86
|
+
_next_port = 9000 # Start port allocation from 9000
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _generate_token() -> str:
|
|
90
|
+
"""Generate a secure API token."""
|
|
91
|
+
return f"flowy_{secrets.token_urlsafe(32)}"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _allocate_port() -> int:
|
|
95
|
+
"""Allocate the next available port."""
|
|
96
|
+
global _next_port
|
|
97
|
+
port = _next_port
|
|
98
|
+
_next_port += 1
|
|
99
|
+
return port
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ==================== Endpoints ====================
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@router.get("/")
|
|
106
|
+
async def list_deployments() -> list[DeploymentResponse]:
|
|
107
|
+
"""List all deployments."""
|
|
108
|
+
return [DeploymentResponse(**d) for d in _deployments.values()]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@router.post("/", status_code=201)
|
|
112
|
+
async def create_deployment(
|
|
113
|
+
request: DeploymentCreate,
|
|
114
|
+
background_tasks: BackgroundTasks,
|
|
115
|
+
) -> DeploymentResponse:
|
|
116
|
+
"""Create a new model deployment."""
|
|
117
|
+
from datetime import timedelta
|
|
118
|
+
|
|
119
|
+
deployment_id = str(uuid4())
|
|
120
|
+
port = request.port or _allocate_port()
|
|
121
|
+
api_token = _generate_token()
|
|
122
|
+
|
|
123
|
+
# Calculate expiry time if TTL is set
|
|
124
|
+
created_at = datetime.now()
|
|
125
|
+
expires_at = None
|
|
126
|
+
ttl_seconds = request.config.ttl_seconds
|
|
127
|
+
if ttl_seconds and ttl_seconds > 0:
|
|
128
|
+
expires_at = (created_at + timedelta(seconds=ttl_seconds)).isoformat()
|
|
129
|
+
|
|
130
|
+
deployment = {
|
|
131
|
+
"id": deployment_id,
|
|
132
|
+
"name": request.name,
|
|
133
|
+
"model_artifact_id": request.model_artifact_id,
|
|
134
|
+
"model_version": request.model_version,
|
|
135
|
+
"status": "pending",
|
|
136
|
+
"port": port,
|
|
137
|
+
"api_token": api_token,
|
|
138
|
+
"endpoint_url": f"http://localhost:{port}",
|
|
139
|
+
"config": request.config.model_dump(),
|
|
140
|
+
"created_at": created_at.isoformat(),
|
|
141
|
+
"started_at": None,
|
|
142
|
+
"stopped_at": None,
|
|
143
|
+
"expires_at": expires_at,
|
|
144
|
+
"error_message": None,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
_deployments[deployment_id] = deployment
|
|
148
|
+
|
|
149
|
+
# Start the deployment in background
|
|
150
|
+
background_tasks.add_task(_start_deployment, deployment_id)
|
|
151
|
+
|
|
152
|
+
# Schedule auto-expiry if TTL is set
|
|
153
|
+
if ttl_seconds and ttl_seconds > 0:
|
|
154
|
+
background_tasks.add_task(_monitor_expiry, deployment_id, ttl_seconds)
|
|
155
|
+
|
|
156
|
+
return DeploymentResponse(**deployment)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@router.get("/available-models")
|
|
160
|
+
async def get_available_models() -> list[dict]:
|
|
161
|
+
"""Get list of models available for deployment."""
|
|
162
|
+
# Import here to avoid circular imports
|
|
163
|
+
from flowyml.ui.backend.dependencies import get_store
|
|
164
|
+
import os
|
|
165
|
+
|
|
166
|
+
store = get_store()
|
|
167
|
+
try:
|
|
168
|
+
# Get all artifacts (assets)
|
|
169
|
+
artifacts = store.list_assets()
|
|
170
|
+
|
|
171
|
+
# Model-related type keywords
|
|
172
|
+
model_keywords = (
|
|
173
|
+
"model",
|
|
174
|
+
"keras",
|
|
175
|
+
"sklearn",
|
|
176
|
+
"pytorch",
|
|
177
|
+
"tensorflow",
|
|
178
|
+
"xgboost",
|
|
179
|
+
"lightgbm",
|
|
180
|
+
"catboost",
|
|
181
|
+
"onnx",
|
|
182
|
+
"joblib",
|
|
183
|
+
"pickle",
|
|
184
|
+
"h5",
|
|
185
|
+
"saved_model",
|
|
186
|
+
"nn",
|
|
187
|
+
"classifier",
|
|
188
|
+
"regressor",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# First pass: look for explicitly model-typed artifacts
|
|
192
|
+
models = []
|
|
193
|
+
for a in artifacts:
|
|
194
|
+
# Asset structure uses 'type' not 'asset_type'
|
|
195
|
+
asset_type = (a.get("type") or "").lower()
|
|
196
|
+
name = (a.get("name") or "").lower()
|
|
197
|
+
path = a.get("path")
|
|
198
|
+
|
|
199
|
+
# Check if this looks like a model
|
|
200
|
+
is_model = any(kw in asset_type for kw in model_keywords) or any(kw in name for kw in model_keywords)
|
|
201
|
+
|
|
202
|
+
# Skip if no file path (inline values can't be deployed as model servers)
|
|
203
|
+
has_file = bool(path)
|
|
204
|
+
file_exists = False
|
|
205
|
+
if has_file:
|
|
206
|
+
# Check if file exists (in container, paths are relative to artifacts dir)
|
|
207
|
+
full_path = os.path.join("/app/artifacts", path)
|
|
208
|
+
file_exists = os.path.exists(full_path)
|
|
209
|
+
|
|
210
|
+
if is_model:
|
|
211
|
+
# Generate artifact_id if not present
|
|
212
|
+
artifact_id = a.get("artifact_id") or f"{a.get('run_id')}_{a.get('step')}_{a.get('name')}"
|
|
213
|
+
models.append(
|
|
214
|
+
{
|
|
215
|
+
"artifact_id": artifact_id,
|
|
216
|
+
"name": a.get("name"),
|
|
217
|
+
"version": a.get("version"),
|
|
218
|
+
"type": a.get("type") or "model",
|
|
219
|
+
"created_at": a.get("created_at"),
|
|
220
|
+
"run_id": a.get("run_id"),
|
|
221
|
+
"project": a.get("project"),
|
|
222
|
+
"has_file": has_file,
|
|
223
|
+
"file_exists": file_exists,
|
|
224
|
+
"path": path,
|
|
225
|
+
},
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# If no models found, return all artifacts with paths as potential models
|
|
229
|
+
if not models and artifacts:
|
|
230
|
+
models = [
|
|
231
|
+
{
|
|
232
|
+
"artifact_id": a.get("artifact_id") or f"{a.get('run_id')}_{a.get('step')}_{a.get('name')}",
|
|
233
|
+
"name": a.get("name"),
|
|
234
|
+
"version": a.get("version"),
|
|
235
|
+
"type": a.get("type") or "unknown",
|
|
236
|
+
"created_at": a.get("created_at"),
|
|
237
|
+
"run_id": a.get("run_id"),
|
|
238
|
+
"project": a.get("project"),
|
|
239
|
+
"has_file": bool(a.get("path")),
|
|
240
|
+
"file_exists": os.path.exists(os.path.join("/app/artifacts", a.get("path") or ""))
|
|
241
|
+
if a.get("path")
|
|
242
|
+
else False,
|
|
243
|
+
"path": a.get("path"),
|
|
244
|
+
}
|
|
245
|
+
for a in artifacts
|
|
246
|
+
]
|
|
247
|
+
|
|
248
|
+
return models
|
|
249
|
+
except Exception:
|
|
250
|
+
# Return empty list on error
|
|
251
|
+
return []
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@router.get("/{deployment_id}")
|
|
255
|
+
async def get_deployment(deployment_id: str) -> DeploymentResponse:
|
|
256
|
+
"""Get deployment details."""
|
|
257
|
+
if deployment_id not in _deployments:
|
|
258
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
259
|
+
return DeploymentResponse(**_deployments[deployment_id])
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@router.delete("/{deployment_id}")
|
|
263
|
+
async def delete_deployment(
|
|
264
|
+
deployment_id: str,
|
|
265
|
+
background_tasks: BackgroundTasks,
|
|
266
|
+
) -> dict:
|
|
267
|
+
"""Stop and delete a deployment."""
|
|
268
|
+
if deployment_id not in _deployments:
|
|
269
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
270
|
+
|
|
271
|
+
deployment = _deployments[deployment_id]
|
|
272
|
+
deployment["status"] = "stopping"
|
|
273
|
+
|
|
274
|
+
# Stop in background
|
|
275
|
+
background_tasks.add_task(_stop_deployment, deployment_id)
|
|
276
|
+
|
|
277
|
+
return {"status": "stopping", "id": deployment_id}
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@router.post("/{deployment_id}/start")
|
|
281
|
+
async def start_deployment(
|
|
282
|
+
deployment_id: str,
|
|
283
|
+
background_tasks: BackgroundTasks,
|
|
284
|
+
) -> dict:
|
|
285
|
+
"""Start a stopped deployment."""
|
|
286
|
+
if deployment_id not in _deployments:
|
|
287
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
288
|
+
|
|
289
|
+
deployment = _deployments[deployment_id]
|
|
290
|
+
if deployment["status"] == "running":
|
|
291
|
+
raise HTTPException(status_code=400, detail="Deployment already running")
|
|
292
|
+
|
|
293
|
+
deployment["status"] = "starting"
|
|
294
|
+
background_tasks.add_task(_start_deployment, deployment_id)
|
|
295
|
+
|
|
296
|
+
return {"status": "starting", "id": deployment_id}
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@router.post("/{deployment_id}/stop")
|
|
300
|
+
async def stop_deployment(
|
|
301
|
+
deployment_id: str,
|
|
302
|
+
background_tasks: BackgroundTasks,
|
|
303
|
+
) -> dict:
|
|
304
|
+
"""Stop a running deployment."""
|
|
305
|
+
if deployment_id not in _deployments:
|
|
306
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
307
|
+
|
|
308
|
+
deployment = _deployments[deployment_id]
|
|
309
|
+
if deployment["status"] != "running":
|
|
310
|
+
raise HTTPException(status_code=400, detail="Deployment not running")
|
|
311
|
+
|
|
312
|
+
deployment["status"] = "stopping"
|
|
313
|
+
background_tasks.add_task(_stop_deployment, deployment_id)
|
|
314
|
+
|
|
315
|
+
return {"status": "stopping", "id": deployment_id}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@router.get("/{deployment_id}/logs")
|
|
319
|
+
async def get_deployment_logs(
|
|
320
|
+
deployment_id: str,
|
|
321
|
+
lines: int = 100,
|
|
322
|
+
) -> dict:
|
|
323
|
+
"""Get deployment logs."""
|
|
324
|
+
if deployment_id not in _deployments:
|
|
325
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
326
|
+
|
|
327
|
+
deployment = _deployments[deployment_id]
|
|
328
|
+
|
|
329
|
+
# Try to get real logs from model server
|
|
330
|
+
try:
|
|
331
|
+
from flowyml.serving.model_server import get_server_logs
|
|
332
|
+
|
|
333
|
+
logs = get_server_logs(deployment_id, lines)
|
|
334
|
+
if logs:
|
|
335
|
+
return {
|
|
336
|
+
"deployment_id": deployment_id,
|
|
337
|
+
"logs": logs,
|
|
338
|
+
}
|
|
339
|
+
except Exception:
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
# Fallback to basic status logs
|
|
343
|
+
return {
|
|
344
|
+
"deployment_id": deployment_id,
|
|
345
|
+
"logs": [
|
|
346
|
+
{
|
|
347
|
+
"timestamp": deployment.get("created_at", datetime.now().isoformat()),
|
|
348
|
+
"level": "INFO",
|
|
349
|
+
"message": f"Deployment '{deployment['name']}' created",
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
"timestamp": deployment.get("started_at") or datetime.now().isoformat(),
|
|
353
|
+
"level": "INFO",
|
|
354
|
+
"message": f"Model {deployment['model_artifact_id']} loaded",
|
|
355
|
+
},
|
|
356
|
+
{
|
|
357
|
+
"timestamp": deployment.get("started_at") or datetime.now().isoformat(),
|
|
358
|
+
"level": "INFO",
|
|
359
|
+
"message": f"Server configured on port {deployment['port']}",
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
"timestamp": datetime.now().isoformat(),
|
|
363
|
+
"level": "INFO",
|
|
364
|
+
"message": f"Current status: {deployment['status']}",
|
|
365
|
+
},
|
|
366
|
+
],
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@router.post("/{deployment_id}/test")
|
|
371
|
+
async def test_deployment(
|
|
372
|
+
deployment_id: str,
|
|
373
|
+
request: PredictRequest,
|
|
374
|
+
) -> PredictResponse:
|
|
375
|
+
"""Test a deployed model with sample input."""
|
|
376
|
+
if deployment_id not in _deployments:
|
|
377
|
+
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
378
|
+
|
|
379
|
+
deployment = _deployments[deployment_id]
|
|
380
|
+
if deployment["status"] != "running":
|
|
381
|
+
raise HTTPException(status_code=400, detail="Deployment not running")
|
|
382
|
+
|
|
383
|
+
import time
|
|
384
|
+
|
|
385
|
+
start = time.time()
|
|
386
|
+
|
|
387
|
+
try:
|
|
388
|
+
# Use real model server prediction
|
|
389
|
+
from flowyml.serving.model_server import predict, get_server
|
|
390
|
+
|
|
391
|
+
server = get_server(deployment_id)
|
|
392
|
+
if server is None:
|
|
393
|
+
raise HTTPException(status_code=500, detail="Model server not available")
|
|
394
|
+
|
|
395
|
+
# Run prediction
|
|
396
|
+
import asyncio
|
|
397
|
+
|
|
398
|
+
loop = asyncio.get_event_loop()
|
|
399
|
+
result = await loop.run_in_executor(
|
|
400
|
+
None,
|
|
401
|
+
lambda: predict(deployment_id, request.data),
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
latency = (time.time() - start) * 1000
|
|
405
|
+
|
|
406
|
+
return PredictResponse(
|
|
407
|
+
prediction=result,
|
|
408
|
+
latency_ms=latency,
|
|
409
|
+
model_version=deployment["model_version"] or "latest",
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
except ValueError as e:
|
|
413
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
414
|
+
except RuntimeError as e:
|
|
415
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
416
|
+
except Exception as e:
|
|
417
|
+
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# ==================== Background Tasks ====================
|
|
421
|
+
|
|
422
|
+
_server_processes: dict[str, any] = {}
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
async def _start_deployment(deployment_id: str):
|
|
426
|
+
"""Start the model server for a deployment."""
|
|
427
|
+
import asyncio
|
|
428
|
+
|
|
429
|
+
if deployment_id not in _deployments:
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
deployment = _deployments[deployment_id]
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
deployment["status"] = "starting"
|
|
436
|
+
|
|
437
|
+
# Import the real model server
|
|
438
|
+
from flowyml.serving.model_server import (
|
|
439
|
+
start_model_server,
|
|
440
|
+
ServerConfig,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# Create server config from deployment config
|
|
444
|
+
config = ServerConfig(
|
|
445
|
+
port=deployment["port"],
|
|
446
|
+
api_token=deployment["api_token"],
|
|
447
|
+
rate_limit=deployment["config"].get("rate_limit", 100),
|
|
448
|
+
timeout_seconds=deployment["config"].get("timeout_seconds", 30),
|
|
449
|
+
max_batch_size=deployment["config"].get("max_batch_size", 1),
|
|
450
|
+
enable_cors=deployment["config"].get("enable_cors", True),
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# Start the model server (this loads the model)
|
|
454
|
+
# Run in executor to avoid blocking
|
|
455
|
+
loop = asyncio.get_event_loop()
|
|
456
|
+
server = await loop.run_in_executor(
|
|
457
|
+
None,
|
|
458
|
+
lambda: start_model_server(
|
|
459
|
+
deployment_id=deployment_id,
|
|
460
|
+
model_artifact_id=deployment["model_artifact_id"],
|
|
461
|
+
config=config,
|
|
462
|
+
),
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
_server_processes[deployment_id] = server
|
|
466
|
+
|
|
467
|
+
deployment["status"] = "running"
|
|
468
|
+
deployment["started_at"] = datetime.now().isoformat()
|
|
469
|
+
deployment["error_message"] = None
|
|
470
|
+
|
|
471
|
+
except Exception as e:
|
|
472
|
+
deployment["status"] = "error"
|
|
473
|
+
deployment["error_message"] = str(e)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
async def _stop_deployment(deployment_id: str):
|
|
477
|
+
"""Stop the model server for a deployment."""
|
|
478
|
+
import asyncio
|
|
479
|
+
|
|
480
|
+
if deployment_id not in _deployments:
|
|
481
|
+
return
|
|
482
|
+
|
|
483
|
+
deployment = _deployments[deployment_id]
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
# Import the real model server
|
|
487
|
+
from flowyml.serving.model_server import stop_model_server
|
|
488
|
+
|
|
489
|
+
# Stop the server (this cleans up loaded models)
|
|
490
|
+
loop = asyncio.get_event_loop()
|
|
491
|
+
await loop.run_in_executor(
|
|
492
|
+
None,
|
|
493
|
+
lambda: stop_model_server(deployment_id),
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Clean up local reference
|
|
497
|
+
if deployment_id in _server_processes:
|
|
498
|
+
del _server_processes[deployment_id]
|
|
499
|
+
|
|
500
|
+
deployment["status"] = "stopped"
|
|
501
|
+
deployment["stopped_at"] = datetime.now().isoformat()
|
|
502
|
+
|
|
503
|
+
# Remove from deployments on delete
|
|
504
|
+
if deployment.get("_pending_delete"):
|
|
505
|
+
del _deployments[deployment_id]
|
|
506
|
+
|
|
507
|
+
except Exception as e:
|
|
508
|
+
deployment["status"] = "error"
|
|
509
|
+
deployment["error_message"] = str(e)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
async def _monitor_expiry(deployment_id: str, ttl_seconds: int):
|
|
513
|
+
"""Monitor deployment and auto-stop after TTL expires."""
|
|
514
|
+
import asyncio
|
|
515
|
+
|
|
516
|
+
# Wait for TTL duration
|
|
517
|
+
await asyncio.sleep(ttl_seconds)
|
|
518
|
+
|
|
519
|
+
# Check if deployment still exists and is running
|
|
520
|
+
if deployment_id not in _deployments:
|
|
521
|
+
return
|
|
522
|
+
|
|
523
|
+
deployment = _deployments[deployment_id]
|
|
524
|
+
|
|
525
|
+
# Only stop if still running
|
|
526
|
+
if deployment["status"] == "running":
|
|
527
|
+
deployment["status"] = "stopping"
|
|
528
|
+
|
|
529
|
+
# Add expiry reason to logs
|
|
530
|
+
try:
|
|
531
|
+
from flowyml.serving.model_server import get_server
|
|
532
|
+
|
|
533
|
+
server = get_server(deployment_id)
|
|
534
|
+
if server:
|
|
535
|
+
server.log_buffer.append(
|
|
536
|
+
{
|
|
537
|
+
"timestamp": datetime.now().isoformat(),
|
|
538
|
+
"level": "INFO",
|
|
539
|
+
"message": f"Auto-stopping: TTL of {ttl_seconds}s expired",
|
|
540
|
+
},
|
|
541
|
+
)
|
|
542
|
+
except Exception:
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
# Stop the deployment
|
|
546
|
+
await _stop_deployment(deployment_id)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# ==================== Dependency Installation ====================
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
class InstallDependenciesRequest(BaseModel):
|
|
553
|
+
"""Request to install ML framework dependencies."""
|
|
554
|
+
|
|
555
|
+
frameworks: list[str] = Field(
|
|
556
|
+
...,
|
|
557
|
+
description="List of frameworks to install (keras, tensorflow, pytorch, sklearn, etc.)",
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
@router.get("/dependencies/available")
|
|
562
|
+
async def list_available_dependencies() -> dict:
|
|
563
|
+
"""List available ML framework dependencies that can be installed."""
|
|
564
|
+
return {
|
|
565
|
+
"available": ML_DEPENDENCIES,
|
|
566
|
+
"description": "Pass framework keys to the install endpoint to install dependencies",
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
@router.post("/dependencies/install")
|
|
571
|
+
async def install_dependencies(
|
|
572
|
+
request: InstallDependenciesRequest,
|
|
573
|
+
background_tasks: BackgroundTasks,
|
|
574
|
+
) -> dict:
|
|
575
|
+
"""Install ML framework dependencies on the server.
|
|
576
|
+
|
|
577
|
+
This lightweight approach allows deploying Keras/TensorFlow/PyTorch models
|
|
578
|
+
without needing a heavy Triton Inference Server container.
|
|
579
|
+
"""
|
|
580
|
+
|
|
581
|
+
# Collect all packages to install
|
|
582
|
+
packages = []
|
|
583
|
+
for framework in request.frameworks:
|
|
584
|
+
framework_lower = framework.lower()
|
|
585
|
+
if framework_lower in ML_DEPENDENCIES:
|
|
586
|
+
packages.extend(ML_DEPENDENCIES[framework_lower])
|
|
587
|
+
else:
|
|
588
|
+
# Allow direct package names too
|
|
589
|
+
packages.append(framework)
|
|
590
|
+
|
|
591
|
+
if not packages:
|
|
592
|
+
raise HTTPException(status_code=400, detail="No valid frameworks specified")
|
|
593
|
+
|
|
594
|
+
# Deduplicate
|
|
595
|
+
packages = list(set(packages))
|
|
596
|
+
|
|
597
|
+
# Queue the installation in background
|
|
598
|
+
background_tasks.add_task(_install_packages_sync, packages)
|
|
599
|
+
|
|
600
|
+
return {
|
|
601
|
+
"status": "installing",
|
|
602
|
+
"packages": packages,
|
|
603
|
+
"message": f"Installing {len(packages)} package(s) in background",
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def _install_packages_sync(packages: list[str]):
|
|
608
|
+
"""Background task to install packages via pip."""
|
|
609
|
+
import subprocess
|
|
610
|
+
import logging
|
|
611
|
+
|
|
612
|
+
logger = logging.getLogger(__name__)
|
|
613
|
+
|
|
614
|
+
for package in packages:
|
|
615
|
+
try:
|
|
616
|
+
logger.info(f"Installing {package}...")
|
|
617
|
+
result = subprocess.run(
|
|
618
|
+
["pip", "install", package],
|
|
619
|
+
capture_output=True,
|
|
620
|
+
text=True,
|
|
621
|
+
timeout=300, # 5 min timeout per package
|
|
622
|
+
)
|
|
623
|
+
if result.returncode == 0:
|
|
624
|
+
logger.info(f"Successfully installed {package}")
|
|
625
|
+
else:
|
|
626
|
+
logger.warning(f"Failed to install {package}: {result.stderr}")
|
|
627
|
+
except Exception as e:
|
|
628
|
+
logger.error(f"Error installing {package}: {e}")
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
@router.get("/dependencies/status")
|
|
632
|
+
async def check_installed_dependencies() -> dict:
|
|
633
|
+
"""Check which ML frameworks are currently installed."""
|
|
634
|
+
import importlib.util
|
|
635
|
+
|
|
636
|
+
installed = {}
|
|
637
|
+
checks = {
|
|
638
|
+
"keras": "keras",
|
|
639
|
+
"tensorflow": "tensorflow",
|
|
640
|
+
"pytorch": "torch",
|
|
641
|
+
"sklearn": "sklearn",
|
|
642
|
+
"xgboost": "xgboost",
|
|
643
|
+
"lightgbm": "lightgbm",
|
|
644
|
+
"onnx": "onnx",
|
|
645
|
+
"onnxruntime": "onnxruntime",
|
|
646
|
+
"numpy": "numpy",
|
|
647
|
+
"pandas": "pandas",
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
for name, module in checks.items():
|
|
651
|
+
try:
|
|
652
|
+
spec = importlib.util.find_spec(module)
|
|
653
|
+
installed[name] = spec is not None
|
|
654
|
+
except (ImportError, ModuleNotFoundError):
|
|
655
|
+
installed[name] = False
|
|
656
|
+
|
|
657
|
+
return {
|
|
658
|
+
"installed": installed,
|
|
659
|
+
"ready_frameworks": [k for k, v in installed.items() if v],
|
|
660
|
+
}
|