flowyml 1.7.2__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/metrics.py +5 -0
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +161 -26
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +22 -2
- flowyml/core/pipeline.py +34 -10
- flowyml/core/routing.py +558 -0
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +20 -1
- flowyml/ui/backend/routers/schedules.py +22 -17
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1404 -74
- flowyml/ui/frontend/package.json +3 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +36 -24
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +29 -7
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/RECORD +123 -65
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-B40RsQDq.css +0 -1
- flowyml/ui/frontend/dist/assets/index-CjI0zKCn.js +0 -685
- flowyml-1.7.2.dist-info/METADATA +0 -477
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.2.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
"""MLflow Experiment Tracker - Native FlowyML Plugin.
|
|
2
|
+
|
|
3
|
+
This is a native FlowyML implementation that uses MLflow directly,
|
|
4
|
+
without requiring any external framework dependencies.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from flowyml.plugins import get_plugin
|
|
8
|
+
|
|
9
|
+
tracker = get_plugin("mlflow", tracking_uri="http://localhost:5000")
|
|
10
|
+
|
|
11
|
+
tracker.start_run("my_experiment", experiment_name="training")
|
|
12
|
+
tracker.log_params({"learning_rate": 0.001, "epochs": 100})
|
|
13
|
+
tracker.log_metrics({"accuracy": 0.95, "loss": 0.05})
|
|
14
|
+
tracker.end_run()
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Any
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from flowyml.plugins.base import ExperimentTracker, PluginMetadata, PluginType
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MLflowTracker(ExperimentTracker):
|
|
27
|
+
"""Native MLflow experiment tracker for FlowyML.
|
|
28
|
+
|
|
29
|
+
This tracker integrates directly with MLflow without any
|
|
30
|
+
intermediate framework, providing full control over the
|
|
31
|
+
tracking experience.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
tracking_uri: MLflow tracking server URI. If not provided,
|
|
35
|
+
uses a local mlruns directory.
|
|
36
|
+
experiment_name: Default experiment name.
|
|
37
|
+
artifact_location: Custom artifact storage location.
|
|
38
|
+
registry_uri: Model registry URI (if different from tracking).
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
tracker = MLflowTracker(
|
|
42
|
+
tracking_uri="http://localhost:5000",
|
|
43
|
+
experiment_name="my_experiments"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
run_id = tracker.start_run("training_v1")
|
|
47
|
+
tracker.log_params({"lr": 0.001})
|
|
48
|
+
tracker.log_metrics({"accuracy": 0.95})
|
|
49
|
+
tracker.log_artifact("model.pkl")
|
|
50
|
+
tracker.end_run()
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
METADATA = PluginMetadata(
|
|
54
|
+
name="mlflow",
|
|
55
|
+
description="MLflow experiment tracking and model registry",
|
|
56
|
+
plugin_type=PluginType.EXPERIMENT_TRACKER,
|
|
57
|
+
version="1.0.0",
|
|
58
|
+
author="FlowyML",
|
|
59
|
+
packages=["mlflow>=2.0"],
|
|
60
|
+
documentation_url="https://mlflow.org/docs/latest/index.html",
|
|
61
|
+
tags=["experiment-tracking", "model-registry", "popular"],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
tracking_uri: str = None,
|
|
67
|
+
experiment_name: str = None,
|
|
68
|
+
artifact_location: str = None,
|
|
69
|
+
registry_uri: str = None,
|
|
70
|
+
**kwargs,
|
|
71
|
+
):
|
|
72
|
+
"""Initialize the MLflow tracker."""
|
|
73
|
+
super().__init__(
|
|
74
|
+
name=kwargs.pop("name", "mlflow"),
|
|
75
|
+
tracking_uri=tracking_uri,
|
|
76
|
+
experiment_name=experiment_name,
|
|
77
|
+
artifact_location=artifact_location,
|
|
78
|
+
registry_uri=registry_uri,
|
|
79
|
+
**kwargs,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
self._mlflow = None
|
|
83
|
+
self._current_run = None
|
|
84
|
+
self._experiment_name = experiment_name
|
|
85
|
+
self._artifact_location = artifact_location
|
|
86
|
+
|
|
87
|
+
def initialize(self) -> None:
|
|
88
|
+
"""Initialize MLflow connection."""
|
|
89
|
+
try:
|
|
90
|
+
import mlflow
|
|
91
|
+
|
|
92
|
+
self._mlflow = mlflow
|
|
93
|
+
|
|
94
|
+
# Set tracking URI
|
|
95
|
+
tracking_uri = self._config.get("tracking_uri") or self._local_backend()
|
|
96
|
+
mlflow.set_tracking_uri(tracking_uri)
|
|
97
|
+
|
|
98
|
+
# Set registry URI if provided
|
|
99
|
+
registry_uri = self._config.get("registry_uri")
|
|
100
|
+
if registry_uri:
|
|
101
|
+
mlflow.set_registry_uri(registry_uri)
|
|
102
|
+
|
|
103
|
+
self._is_initialized = True
|
|
104
|
+
logger.info(f"MLflow initialized with tracking URI: {tracking_uri}")
|
|
105
|
+
|
|
106
|
+
except ImportError:
|
|
107
|
+
raise ImportError(
|
|
108
|
+
"MLflow is not installed. Run: flowyml plugin install mlflow",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def _local_backend(self) -> str:
|
|
112
|
+
"""Get the local MLflow backend path."""
|
|
113
|
+
mlruns_path = Path.cwd() / "mlruns"
|
|
114
|
+
mlruns_path.mkdir(parents=True, exist_ok=True)
|
|
115
|
+
return f"file:{mlruns_path}"
|
|
116
|
+
|
|
117
|
+
def _ensure_initialized(self) -> None:
|
|
118
|
+
"""Ensure MLflow is initialized."""
|
|
119
|
+
if not self._is_initialized:
|
|
120
|
+
self.initialize()
|
|
121
|
+
|
|
122
|
+
def start_run(
|
|
123
|
+
self,
|
|
124
|
+
run_name: str,
|
|
125
|
+
experiment_name: str = None,
|
|
126
|
+
tags: dict = None,
|
|
127
|
+
) -> str:
|
|
128
|
+
"""Start a new MLflow run.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
run_name: Name for this run.
|
|
132
|
+
experiment_name: Experiment to log to. Uses default if not provided.
|
|
133
|
+
tags: Optional tags for the run.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
The run ID.
|
|
137
|
+
"""
|
|
138
|
+
self._ensure_initialized()
|
|
139
|
+
|
|
140
|
+
# Set experiment
|
|
141
|
+
exp_name = experiment_name or self._experiment_name or "default"
|
|
142
|
+
self._mlflow.set_experiment(exp_name)
|
|
143
|
+
|
|
144
|
+
# Start run
|
|
145
|
+
run = self._mlflow.start_run(run_name=run_name, tags=tags)
|
|
146
|
+
self._current_run = run
|
|
147
|
+
|
|
148
|
+
logger.info(f"Started MLflow run '{run_name}' (ID: {run.info.run_id})")
|
|
149
|
+
return run.info.run_id
|
|
150
|
+
|
|
151
|
+
def end_run(self, status: str = "FINISHED") -> None:
|
|
152
|
+
"""End the current run.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
status: Final status (FINISHED, FAILED, KILLED).
|
|
156
|
+
"""
|
|
157
|
+
self._ensure_initialized()
|
|
158
|
+
|
|
159
|
+
mlflow_status = {
|
|
160
|
+
"FINISHED": "FINISHED",
|
|
161
|
+
"FAILED": "FAILED",
|
|
162
|
+
"KILLED": "KILLED",
|
|
163
|
+
}.get(status.upper(), "FINISHED")
|
|
164
|
+
|
|
165
|
+
self._mlflow.end_run(status=mlflow_status)
|
|
166
|
+
self._current_run = None
|
|
167
|
+
logger.info(f"Ended MLflow run with status: {mlflow_status}")
|
|
168
|
+
|
|
169
|
+
def log_params(self, params: dict[str, Any]) -> None:
|
|
170
|
+
"""Log parameters to the current run.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
params: Dictionary of parameter names and values.
|
|
174
|
+
"""
|
|
175
|
+
self._ensure_initialized()
|
|
176
|
+
self._mlflow.log_params(params)
|
|
177
|
+
|
|
178
|
+
def log_metrics(self, metrics: dict[str, float], step: int = None) -> None:
|
|
179
|
+
"""Log metrics to the current run.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
metrics: Dictionary of metric names and values.
|
|
183
|
+
step: Optional step number.
|
|
184
|
+
"""
|
|
185
|
+
self._ensure_initialized()
|
|
186
|
+
self._mlflow.log_metrics(metrics, step=step)
|
|
187
|
+
|
|
188
|
+
def log_artifact(self, local_path: str, artifact_path: str = None) -> None:
|
|
189
|
+
"""Log an artifact file.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
local_path: Path to the local file.
|
|
193
|
+
artifact_path: Optional subdirectory in artifacts.
|
|
194
|
+
"""
|
|
195
|
+
self._ensure_initialized()
|
|
196
|
+
self._mlflow.log_artifact(local_path, artifact_path)
|
|
197
|
+
|
|
198
|
+
def log_artifacts(self, local_dir: str, artifact_path: str = None) -> None:
|
|
199
|
+
"""Log all files in a directory as artifacts.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
local_dir: Path to the local directory.
|
|
203
|
+
artifact_path: Optional subdirectory in artifacts.
|
|
204
|
+
"""
|
|
205
|
+
self._ensure_initialized()
|
|
206
|
+
self._mlflow.log_artifacts(local_dir, artifact_path)
|
|
207
|
+
|
|
208
|
+
def log_model(
|
|
209
|
+
self,
|
|
210
|
+
model: Any,
|
|
211
|
+
artifact_path: str,
|
|
212
|
+
model_type: str = None,
|
|
213
|
+
registered_model_name: str = None,
|
|
214
|
+
) -> None:
|
|
215
|
+
"""Log a model to the current run.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
model: The model object.
|
|
219
|
+
artifact_path: Path within artifacts.
|
|
220
|
+
model_type: Type of model (sklearn, pytorch, tensorflow, keras).
|
|
221
|
+
registered_model_name: Optional name to register in model registry.
|
|
222
|
+
"""
|
|
223
|
+
self._ensure_initialized()
|
|
224
|
+
|
|
225
|
+
# Auto-detect model type if not provided
|
|
226
|
+
if model_type is None:
|
|
227
|
+
model_type = self._detect_model_type(model)
|
|
228
|
+
|
|
229
|
+
# Log using appropriate MLflow flavor
|
|
230
|
+
if model_type == "sklearn":
|
|
231
|
+
self._mlflow.sklearn.log_model(
|
|
232
|
+
model,
|
|
233
|
+
artifact_path,
|
|
234
|
+
registered_model_name=registered_model_name,
|
|
235
|
+
)
|
|
236
|
+
elif model_type == "pytorch":
|
|
237
|
+
self._mlflow.pytorch.log_model(
|
|
238
|
+
model,
|
|
239
|
+
artifact_path,
|
|
240
|
+
registered_model_name=registered_model_name,
|
|
241
|
+
)
|
|
242
|
+
elif model_type == "tensorflow" or model_type == "keras":
|
|
243
|
+
self._mlflow.keras.log_model(
|
|
244
|
+
model,
|
|
245
|
+
artifact_path,
|
|
246
|
+
registered_model_name=registered_model_name,
|
|
247
|
+
)
|
|
248
|
+
elif model_type == "xgboost":
|
|
249
|
+
self._mlflow.xgboost.log_model(
|
|
250
|
+
model,
|
|
251
|
+
artifact_path,
|
|
252
|
+
registered_model_name=registered_model_name,
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
# Fallback to generic pickling
|
|
256
|
+
self._mlflow.pyfunc.log_model(
|
|
257
|
+
artifact_path,
|
|
258
|
+
python_model=model,
|
|
259
|
+
registered_model_name=registered_model_name,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _detect_model_type(self, model: Any) -> str:
|
|
263
|
+
"""Detect the type of ML model."""
|
|
264
|
+
model_class = type(model).__module__
|
|
265
|
+
|
|
266
|
+
if "sklearn" in model_class:
|
|
267
|
+
return "sklearn"
|
|
268
|
+
elif "torch" in model_class:
|
|
269
|
+
return "pytorch"
|
|
270
|
+
elif "tensorflow" in model_class or "keras" in model_class:
|
|
271
|
+
return "keras"
|
|
272
|
+
elif "xgboost" in model_class:
|
|
273
|
+
return "xgboost"
|
|
274
|
+
else:
|
|
275
|
+
return "generic"
|
|
276
|
+
|
|
277
|
+
def get_tracking_uri(self) -> str:
|
|
278
|
+
"""Get the current tracking URI."""
|
|
279
|
+
self._ensure_initialized()
|
|
280
|
+
return self._mlflow.get_tracking_uri()
|
|
281
|
+
|
|
282
|
+
def get_run_id(self) -> str | None:
|
|
283
|
+
"""Get the current run ID."""
|
|
284
|
+
if self._current_run:
|
|
285
|
+
return self._current_run.info.run_id
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def set_tag(self, key: str, value: str) -> None:
|
|
289
|
+
"""Set a tag on the current run."""
|
|
290
|
+
self._ensure_initialized()
|
|
291
|
+
self._mlflow.set_tag(key, value)
|
|
292
|
+
|
|
293
|
+
def set_tags(self, tags: dict[str, str]) -> None:
|
|
294
|
+
"""Set multiple tags on the current run."""
|
|
295
|
+
self._ensure_initialized()
|
|
296
|
+
self._mlflow.set_tags(tags)
|
|
297
|
+
|
|
298
|
+
def autolog(self, framework: str = None) -> None:
|
|
299
|
+
"""Enable MLflow autologging.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
framework: Specific framework to enable (sklearn, pytorch, etc.)
|
|
303
|
+
If None, enables for all supported frameworks.
|
|
304
|
+
"""
|
|
305
|
+
self._ensure_initialized()
|
|
306
|
+
|
|
307
|
+
if framework:
|
|
308
|
+
getattr(self._mlflow, framework).autolog()
|
|
309
|
+
else:
|
|
310
|
+
self._mlflow.autolog()
|
|
311
|
+
|
|
312
|
+
def cleanup(self) -> None:
|
|
313
|
+
"""Cleanup resources."""
|
|
314
|
+
if self._current_run:
|
|
315
|
+
self.end_run()
|
|
316
|
+
self._is_initialized = False
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Deepchecks Data Validator - Native FlowyML Plugin.
|
|
2
|
+
|
|
3
|
+
This plugin integrates Deepchecks for robust data validation,
|
|
4
|
+
including integrity, drift, and performance checks.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from flowyml.plugins.base import DataValidatorPlugin, PluginMetadata, PluginType
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DeepchecksValidator(DataValidatorPlugin):
|
|
16
|
+
"""Deepchecks data validator for FlowyML.
|
|
17
|
+
|
|
18
|
+
Run suites of data validation checks using Deepchecks.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
suite: Name of the default suite to run (e.g., 'integrity', 'train_test_validation').
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
metadata = PluginMetadata(
|
|
25
|
+
name="deepchecks",
|
|
26
|
+
version="1.0.0",
|
|
27
|
+
description="Deepchecks Data Validator",
|
|
28
|
+
author="FlowyML Team",
|
|
29
|
+
plugin_type=PluginType.DATA_VALIDATOR,
|
|
30
|
+
tags=["validation", "data-quality", "drift"],
|
|
31
|
+
packages=["deepchecks>=0.17.0"],
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def __init__(self, suite: str = "integrity", **kwargs):
|
|
35
|
+
super().__init__(**kwargs)
|
|
36
|
+
self.default_suite = suite
|
|
37
|
+
self._deepchecks = None
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def plugin_type(self) -> PluginType:
|
|
41
|
+
return PluginType.DATA_VALIDATOR
|
|
42
|
+
|
|
43
|
+
def initialize(self) -> None:
|
|
44
|
+
"""Initialize Deepchecks."""
|
|
45
|
+
try:
|
|
46
|
+
import deepchecks
|
|
47
|
+
from deepchecks.tabular import Dataset
|
|
48
|
+
from deepchecks.tabular import suites
|
|
49
|
+
|
|
50
|
+
self._deepchecks = deepchecks
|
|
51
|
+
self._dc_suites = suites
|
|
52
|
+
self._dc_dataset = Dataset
|
|
53
|
+
|
|
54
|
+
logger.info("Deepchecks validator initialized.")
|
|
55
|
+
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"deepchecks is required. Install with: pip install deepchecks",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def validate(self, data: Any, expectations: Any = None) -> dict[str, Any]:
|
|
62
|
+
"""Validate data using Deepchecks.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
data: Pandas DataFrame or supported Deepchecks dataset.
|
|
66
|
+
expectations: Optional suite name or Suite object.
|
|
67
|
+
If None, uses self.default_suite.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Validation result dictionary (passed, results json).
|
|
71
|
+
"""
|
|
72
|
+
self.initialize()
|
|
73
|
+
|
|
74
|
+
# Resolve suite
|
|
75
|
+
suite_obj = None
|
|
76
|
+
suite_name = expectations or self.default_suite
|
|
77
|
+
|
|
78
|
+
if isinstance(suite_name, str):
|
|
79
|
+
if hasattr(self._dc_suites, suite_name):
|
|
80
|
+
suite_obj = getattr(self._dc_suites, suite_name)()
|
|
81
|
+
else:
|
|
82
|
+
# Try to create a full suite if name not found in presets
|
|
83
|
+
logger.warning(f"Suite '{suite_name}' not found in default suites, falling back to full suite.")
|
|
84
|
+
suite_obj = self._dc_suites.full_suite()
|
|
85
|
+
else:
|
|
86
|
+
# Assume it's a Suite object passed directly
|
|
87
|
+
suite_obj = suite_name
|
|
88
|
+
|
|
89
|
+
# Wrap data if needed
|
|
90
|
+
# This is a simplified wrapper; real usage might require label/cat_features config
|
|
91
|
+
if not isinstance(data, self._dc_dataset):
|
|
92
|
+
from pandas import DataFrame
|
|
93
|
+
|
|
94
|
+
if isinstance(data, DataFrame):
|
|
95
|
+
ds = self._dc_dataset(data)
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"Deepchecks requires DataFrame or Dataset, got {type(data)}")
|
|
98
|
+
else:
|
|
99
|
+
ds = data
|
|
100
|
+
|
|
101
|
+
logger.info(f"Running Deepchecks suite: {suite_name}")
|
|
102
|
+
result = suite_obj.run(ds)
|
|
103
|
+
|
|
104
|
+
# Determine overall success (if any check failed)
|
|
105
|
+
# Deepchecks results structure varies, but we can serialize to json/dict
|
|
106
|
+
passed = result.passed() if hasattr(result, "passed") else True # simplified check
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
"passed": passed,
|
|
110
|
+
"suite_name": str(suite_name),
|
|
111
|
+
"results": result.to_json(),
|
|
112
|
+
"report_html": result.save_as_html(), # Returns path string usually
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
def get_data_profile(self, data: Any) -> dict[str, Any]:
|
|
116
|
+
"""Profile data using Deepchecks integrity suite as a proxy."""
|
|
117
|
+
# Deepchecks doesn't have a pure "profile" methods like pandas-profiling
|
|
118
|
+
# but we can run a quick check
|
|
119
|
+
return self.validate(data, expectations="integrity")
|
flowyml/registry/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Model registry for versioning and deployment."""
|
|
2
2
|
|
|
3
3
|
from flowyml.registry.model_registry import ModelRegistry, ModelVersion, ModelStage
|
|
4
|
+
from flowyml.registry.model_environment import ModelEnvironment
|
|
4
5
|
|
|
5
|
-
__all__ = ["ModelRegistry", "ModelVersion", "ModelStage"]
|
|
6
|
+
__all__ = ["ModelRegistry", "ModelVersion", "ModelStage", "ModelEnvironment"]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Model environment capture for reproducibility."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import subprocess
|
|
5
|
+
import platform
|
|
6
|
+
from dataclasses import dataclass, field, asdict
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ModelEnvironment:
|
|
13
|
+
r"""Captures Python environment for model reproducibility.
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> env = ModelEnvironment.from_current()
|
|
17
|
+
>>> print(env.python_version)
|
|
18
|
+
'3.11.5'
|
|
19
|
+
>>> env.to_requirements_txt()
|
|
20
|
+
'numpy==1.24.0\npandas==2.0.0\n...'
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
python_version: str
|
|
24
|
+
platform: str
|
|
25
|
+
dependencies: list[str] = field(default_factory=list)
|
|
26
|
+
system_info: dict[str, str] = field(default_factory=dict)
|
|
27
|
+
captured_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def from_current(cls, include_all: bool = False) -> "ModelEnvironment": # noqa: ARG003
|
|
31
|
+
"""Capture current Python environment.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
include_all: If True, capture all packages. If False, only top-level.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
ModelEnvironment with current system info and dependencies
|
|
38
|
+
"""
|
|
39
|
+
# Get pip freeze output
|
|
40
|
+
try:
|
|
41
|
+
result = subprocess.run(
|
|
42
|
+
[sys.executable, "-m", "pip", "freeze"],
|
|
43
|
+
capture_output=True,
|
|
44
|
+
text=True,
|
|
45
|
+
timeout=30,
|
|
46
|
+
)
|
|
47
|
+
deps = [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
|
48
|
+
except Exception:
|
|
49
|
+
deps = []
|
|
50
|
+
|
|
51
|
+
# System info
|
|
52
|
+
system_info = {
|
|
53
|
+
"os": platform.system(),
|
|
54
|
+
"os_version": platform.version(),
|
|
55
|
+
"machine": platform.machine(),
|
|
56
|
+
"processor": platform.processor(),
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return cls(
|
|
60
|
+
python_version=platform.python_version(),
|
|
61
|
+
platform=platform.platform(),
|
|
62
|
+
dependencies=deps,
|
|
63
|
+
system_info=system_info,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def to_requirements_txt(self) -> str:
|
|
67
|
+
"""Export dependencies as requirements.txt format.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
String with one dependency per line
|
|
71
|
+
"""
|
|
72
|
+
return "\n".join(self.dependencies)
|
|
73
|
+
|
|
74
|
+
def save_requirements(self, path: str) -> None:
|
|
75
|
+
"""Save dependencies to a requirements.txt file.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
path: Path to save the file
|
|
79
|
+
"""
|
|
80
|
+
with open(path, "w") as f:
|
|
81
|
+
f.write(self.to_requirements_txt())
|
|
82
|
+
|
|
83
|
+
def to_dict(self) -> dict[str, Any]:
|
|
84
|
+
"""Convert to dictionary for serialization."""
|
|
85
|
+
return asdict(self)
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_dict(cls, data: dict[str, Any]) -> "ModelEnvironment":
|
|
89
|
+
"""Create from dictionary."""
|
|
90
|
+
return cls(**data)
|
|
91
|
+
|
|
92
|
+
def get_package_version(self, package_name: str) -> str | None:
|
|
93
|
+
"""Get version of a specific package.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
package_name: Name of the package to look up
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Version string or None if not found
|
|
100
|
+
"""
|
|
101
|
+
for dep in self.dependencies:
|
|
102
|
+
if dep.lower().startswith(package_name.lower() + "=="):
|
|
103
|
+
return dep.split("==")[1]
|
|
104
|
+
elif dep.lower().startswith(package_name.lower() + ">="):
|
|
105
|
+
return dep.split(">=")[1]
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def __repr__(self) -> str:
|
|
109
|
+
return f"ModelEnvironment(python={self.python_version}, deps={len(self.dependencies)})"
|