flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""FeatureSet asset for feature engineering outputs."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from flowyml.assets.base import Asset, AssetMetadata
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class FeatureSetMetadata(AssetMetadata):
|
|
12
|
+
"""Metadata specific to FeatureSets."""
|
|
13
|
+
|
|
14
|
+
feature_names: list[str] = field(default_factory=list)
|
|
15
|
+
num_features: int = 0
|
|
16
|
+
num_samples: int = 0
|
|
17
|
+
feature_types: dict[str, str] = field(default_factory=dict)
|
|
18
|
+
statistics: dict[str, dict[str, float]] = field(default_factory=dict)
|
|
19
|
+
transformations: list[str] = field(default_factory=list)
|
|
20
|
+
source_dataset: str | None = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FeatureSet(Asset):
|
|
24
|
+
"""Asset representing a set of engineered features.
|
|
25
|
+
|
|
26
|
+
FeatureSets are created through feature engineering pipelines and contain
|
|
27
|
+
transformed data ready for model training.
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
```python
|
|
31
|
+
from flowyml import FeatureSet
|
|
32
|
+
|
|
33
|
+
# Create a feature set
|
|
34
|
+
features = FeatureSet.create(
|
|
35
|
+
data=feature_matrix,
|
|
36
|
+
feature_names=["age_scaled", "income_log", "category_encoded"],
|
|
37
|
+
num_samples=10000,
|
|
38
|
+
transformations=["StandardScaler", "LogTransform", "OneHotEncoder"],
|
|
39
|
+
source_dataset="customers_v1",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Access feature information
|
|
43
|
+
print(features.num_features) # 3
|
|
44
|
+
print(features.feature_names)
|
|
45
|
+
print(features.statistics)
|
|
46
|
+
```
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
name: str,
|
|
52
|
+
data: Any = None,
|
|
53
|
+
feature_names: list[str] | None = None,
|
|
54
|
+
num_samples: int = 0,
|
|
55
|
+
transformations: list[str] | None = None,
|
|
56
|
+
source_dataset: str | None = None,
|
|
57
|
+
**kwargs,
|
|
58
|
+
):
|
|
59
|
+
"""Initialize a FeatureSet.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
name: Name of the feature set
|
|
63
|
+
data: The feature matrix (DataFrame, array, etc.)
|
|
64
|
+
feature_names: List of feature names
|
|
65
|
+
num_samples: Number of samples in the feature set
|
|
66
|
+
transformations: List of transformations applied
|
|
67
|
+
source_dataset: Name of source dataset
|
|
68
|
+
**kwargs: Additional metadata
|
|
69
|
+
"""
|
|
70
|
+
# Initialize base asset
|
|
71
|
+
metadata = FeatureSetMetadata(
|
|
72
|
+
name=name,
|
|
73
|
+
type="featureset",
|
|
74
|
+
feature_names=feature_names or [],
|
|
75
|
+
num_features=len(feature_names) if feature_names else 0,
|
|
76
|
+
num_samples=num_samples,
|
|
77
|
+
transformations=transformations or [],
|
|
78
|
+
source_dataset=source_dataset,
|
|
79
|
+
**kwargs,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
super().__init__(name=name, type="featureset", metadata=metadata)
|
|
83
|
+
self._data = data
|
|
84
|
+
|
|
85
|
+
# Extract feature metadata if data provided
|
|
86
|
+
if data is not None:
|
|
87
|
+
self._extract_feature_metadata()
|
|
88
|
+
|
|
89
|
+
def _extract_feature_metadata(self) -> None:
|
|
90
|
+
"""Extract feature metadata from data."""
|
|
91
|
+
try:
|
|
92
|
+
import pandas as pd
|
|
93
|
+
|
|
94
|
+
if isinstance(self._data, pd.DataFrame):
|
|
95
|
+
# Update feature names from DataFrame
|
|
96
|
+
if not self.metadata.feature_names:
|
|
97
|
+
self.metadata.feature_names = self._data.columns.tolist()
|
|
98
|
+
self.metadata.num_features = len(self._data.columns)
|
|
99
|
+
|
|
100
|
+
# Extract feature types
|
|
101
|
+
self.metadata.feature_types = {col: str(dtype) for col, dtype in self._data.dtypes.items()}
|
|
102
|
+
|
|
103
|
+
# Update num_samples
|
|
104
|
+
if not self.metadata.num_samples:
|
|
105
|
+
self.metadata.num_samples = len(self._data)
|
|
106
|
+
|
|
107
|
+
# Calculate statistics for numerical columns
|
|
108
|
+
numerical_cols = self._data.select_dtypes(include=["number"]).columns
|
|
109
|
+
for col in numerical_cols:
|
|
110
|
+
self.metadata.statistics[col] = {
|
|
111
|
+
"min": float(self._data[col].min()),
|
|
112
|
+
"max": float(self._data[col].max()),
|
|
113
|
+
"mean": float(self._data[col].mean()),
|
|
114
|
+
"std": float(self._data[col].std()),
|
|
115
|
+
"missing": int(self._data[col].isna().sum()),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
except ImportError:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
import numpy as np
|
|
123
|
+
|
|
124
|
+
if isinstance(self._data, np.ndarray):
|
|
125
|
+
# Update dimensions
|
|
126
|
+
if self._data.ndim >= 2:
|
|
127
|
+
self.metadata.num_samples = self._data.shape[0]
|
|
128
|
+
self.metadata.num_features = self._data.shape[1]
|
|
129
|
+
|
|
130
|
+
# Calculate statistics if numerical
|
|
131
|
+
if np.issubdtype(self._data.dtype, np.number):
|
|
132
|
+
for i in range(min(self.metadata.num_features, 100)): # Limit to 100 features
|
|
133
|
+
feature_name = (
|
|
134
|
+
self.metadata.feature_names[i] if i < len(self.metadata.feature_names) else f"feature_{i}"
|
|
135
|
+
)
|
|
136
|
+
self.metadata.statistics[feature_name] = {
|
|
137
|
+
"min": float(np.min(self._data[:, i])),
|
|
138
|
+
"max": float(np.max(self._data[:, i])),
|
|
139
|
+
"mean": float(np.mean(self._data[:, i])),
|
|
140
|
+
"std": float(np.std(self._data[:, i])),
|
|
141
|
+
}
|
|
142
|
+
except ImportError:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def data(self) -> Any:
|
|
147
|
+
"""Get the feature data."""
|
|
148
|
+
return self._data
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def feature_names(self) -> list[str]:
|
|
152
|
+
"""Get feature names."""
|
|
153
|
+
return self.metadata.feature_names
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def num_features(self) -> int:
|
|
157
|
+
"""Get number of features."""
|
|
158
|
+
return self.metadata.num_features
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def num_samples(self) -> int:
|
|
162
|
+
"""Get number of samples."""
|
|
163
|
+
return self.metadata.num_samples
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def feature_types(self) -> dict[str, str]:
|
|
167
|
+
"""Get feature types."""
|
|
168
|
+
return self.metadata.feature_types
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def statistics(self) -> dict[str, dict[str, float]]:
|
|
172
|
+
"""Get feature statistics."""
|
|
173
|
+
return self.metadata.statistics
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def transformations(self) -> list[str]:
|
|
177
|
+
"""Get list of transformations applied."""
|
|
178
|
+
return self.metadata.transformations
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def source_dataset(self) -> str | None:
|
|
182
|
+
"""Get source dataset name."""
|
|
183
|
+
return self.metadata.source_dataset
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def create(
|
|
187
|
+
cls,
|
|
188
|
+
data: Any,
|
|
189
|
+
name: str | None = None,
|
|
190
|
+
feature_names: list[str] | None = None,
|
|
191
|
+
transformations: list[str] | None = None,
|
|
192
|
+
source_dataset: str | None = None,
|
|
193
|
+
**kwargs,
|
|
194
|
+
) -> "FeatureSet":
|
|
195
|
+
"""Factory method to create a FeatureSet.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
data: The feature matrix
|
|
199
|
+
name: Name of the feature set (auto-generated if not provided)
|
|
200
|
+
feature_names: List of feature names
|
|
201
|
+
transformations: List of transformations applied
|
|
202
|
+
source_dataset: Name of source dataset
|
|
203
|
+
**kwargs: Additional metadata
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
New FeatureSet instance
|
|
207
|
+
"""
|
|
208
|
+
if name is None:
|
|
209
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
210
|
+
name = f"features_{timestamp}"
|
|
211
|
+
|
|
212
|
+
return cls(
|
|
213
|
+
name=name,
|
|
214
|
+
data=data,
|
|
215
|
+
feature_names=feature_names,
|
|
216
|
+
transformations=transformations,
|
|
217
|
+
source_dataset=source_dataset,
|
|
218
|
+
**kwargs,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def select_features(self, feature_names: list[str]) -> "FeatureSet":
|
|
222
|
+
"""Select a subset of features.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
feature_names: List of feature names to select
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
New FeatureSet with selected features
|
|
229
|
+
"""
|
|
230
|
+
if self._data is None:
|
|
231
|
+
raise ValueError("Cannot select features from FeatureSet without data")
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
import pandas as pd
|
|
235
|
+
|
|
236
|
+
if isinstance(self._data, pd.DataFrame):
|
|
237
|
+
selected_data = self._data[feature_names]
|
|
238
|
+
return FeatureSet.create(
|
|
239
|
+
data=selected_data,
|
|
240
|
+
name=f"{self.name}_selected",
|
|
241
|
+
feature_names=feature_names,
|
|
242
|
+
transformations=self.transformations,
|
|
243
|
+
source_dataset=self.name,
|
|
244
|
+
)
|
|
245
|
+
except ImportError:
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
import numpy as np
|
|
250
|
+
|
|
251
|
+
if isinstance(self._data, np.ndarray):
|
|
252
|
+
# Map feature names to indices
|
|
253
|
+
indices = [self.feature_names.index(fn) for fn in feature_names if fn in self.feature_names]
|
|
254
|
+
selected_data = self._data[:, indices]
|
|
255
|
+
return FeatureSet.create(
|
|
256
|
+
data=selected_data,
|
|
257
|
+
name=f"{self.name}_selected",
|
|
258
|
+
feature_names=feature_names,
|
|
259
|
+
transformations=self.transformations,
|
|
260
|
+
source_dataset=self.name,
|
|
261
|
+
)
|
|
262
|
+
except ImportError:
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
raise TypeError("Unsupported data type for feature selection")
|
|
266
|
+
|
|
267
|
+
def get_feature_statistics(self, feature_name: str) -> dict[str, float] | None:
|
|
268
|
+
"""Get statistics for a specific feature.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
feature_name: Name of the feature
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Dictionary of statistics or None if not found
|
|
275
|
+
"""
|
|
276
|
+
return self.statistics.get(feature_name)
|
|
277
|
+
|
|
278
|
+
def to_dict(self) -> dict[str, Any]:
|
|
279
|
+
"""Convert FeatureSet to dictionary.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Dictionary representation (excluding data)
|
|
283
|
+
"""
|
|
284
|
+
return {
|
|
285
|
+
"id": self.id,
|
|
286
|
+
"name": self.name,
|
|
287
|
+
"type": self.type,
|
|
288
|
+
"feature_names": self.feature_names,
|
|
289
|
+
"num_features": self.num_features,
|
|
290
|
+
"num_samples": self.num_samples,
|
|
291
|
+
"feature_types": self.feature_types,
|
|
292
|
+
"statistics": self.statistics,
|
|
293
|
+
"transformations": self.transformations,
|
|
294
|
+
"source_dataset": self.source_dataset,
|
|
295
|
+
"created_at": self.created_at.isoformat(),
|
|
296
|
+
"tags": self.tags,
|
|
297
|
+
"properties": self.properties,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
def __repr__(self) -> str:
|
|
301
|
+
return f"FeatureSet(name='{self.name}', num_features={self.num_features}, num_samples={self.num_samples})"
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Metrics Asset - Represents experiment metrics and evaluation results."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from flowyml.assets.base import Asset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Metrics(Asset):
|
|
8
|
+
"""Metrics asset for experiment tracking.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> metrics = Metrics(name="training_metrics", data={"accuracy": 0.95, "loss": 0.05, "f1_score": 0.93})
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
name: str,
|
|
17
|
+
version: str | None = None,
|
|
18
|
+
data: dict[str, Any] | None = None,
|
|
19
|
+
parent: Asset | None = None,
|
|
20
|
+
tags: dict[str, str] | None = None,
|
|
21
|
+
properties: dict[str, Any] | None = None,
|
|
22
|
+
**metrics,
|
|
23
|
+
):
|
|
24
|
+
# Merge data and kwargs metrics
|
|
25
|
+
all_metrics = data or {}
|
|
26
|
+
all_metrics.update(metrics)
|
|
27
|
+
|
|
28
|
+
super().__init__(
|
|
29
|
+
name=name,
|
|
30
|
+
version=version,
|
|
31
|
+
data=all_metrics,
|
|
32
|
+
parent=parent,
|
|
33
|
+
tags=tags,
|
|
34
|
+
properties=properties,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Store metrics in properties for easy access
|
|
38
|
+
self.metadata.properties.update(all_metrics)
|
|
39
|
+
|
|
40
|
+
def get_metric(self, name: str, default: Any = None) -> Any:
|
|
41
|
+
"""Get a specific metric value."""
|
|
42
|
+
if self.data and name in self.data:
|
|
43
|
+
return self.data[name]
|
|
44
|
+
return self.metadata.properties.get(name, default)
|
|
45
|
+
|
|
46
|
+
def add_metric(self, name: str, value: Any) -> None:
|
|
47
|
+
"""Add a new metric."""
|
|
48
|
+
if self.data is None:
|
|
49
|
+
self.data = {}
|
|
50
|
+
self.data[name] = value
|
|
51
|
+
self.metadata.properties[name] = value
|
|
52
|
+
|
|
53
|
+
def get_all_metrics(self) -> dict[str, Any]:
|
|
54
|
+
"""Get all metrics."""
|
|
55
|
+
return self.data or {}
|
|
56
|
+
|
|
57
|
+
def compare_with(self, other: "Metrics") -> dict[str, dict[str, Any]]:
|
|
58
|
+
"""Compare metrics with another Metrics asset.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Dictionary with comparison results
|
|
62
|
+
"""
|
|
63
|
+
self_metrics = self.get_all_metrics()
|
|
64
|
+
other_metrics = other.get_all_metrics()
|
|
65
|
+
|
|
66
|
+
comparison = {}
|
|
67
|
+
all_keys = set(self_metrics.keys()) | set(other_metrics.keys())
|
|
68
|
+
|
|
69
|
+
for key in all_keys:
|
|
70
|
+
self_val = self_metrics.get(key)
|
|
71
|
+
other_val = other_metrics.get(key)
|
|
72
|
+
|
|
73
|
+
comparison[key] = {
|
|
74
|
+
"self": self_val,
|
|
75
|
+
"other": other_val,
|
|
76
|
+
"diff": self_val - other_val
|
|
77
|
+
if (
|
|
78
|
+
self_val is not None
|
|
79
|
+
and other_val is not None
|
|
80
|
+
and isinstance(self_val, (int, float))
|
|
81
|
+
and isinstance(other_val, (int, float))
|
|
82
|
+
)
|
|
83
|
+
else None,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return comparison
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def create(
|
|
90
|
+
cls,
|
|
91
|
+
name: str | None = None,
|
|
92
|
+
parent: Asset | None = None,
|
|
93
|
+
**metrics,
|
|
94
|
+
) -> "Metrics":
|
|
95
|
+
"""Factory method to create metrics.
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> metrics = Metrics.create(accuracy=0.95, loss=0.05, training_time="2h 15m")
|
|
99
|
+
"""
|
|
100
|
+
return cls(
|
|
101
|
+
name=name or "metrics",
|
|
102
|
+
data=metrics,
|
|
103
|
+
parent=parent,
|
|
104
|
+
)
|
flowyml/assets/model.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Model Asset - Represents ML models with metadata and lineage."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from flowyml.assets.base import Asset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Model(Asset):
|
|
8
|
+
"""Model asset with training metadata and lineage.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> model = Model(
|
|
12
|
+
... name="resnet50_v1",
|
|
13
|
+
... version="v1.0.0",
|
|
14
|
+
... data=trained_model,
|
|
15
|
+
... architecture="resnet50",
|
|
16
|
+
... framework="pytorch",
|
|
17
|
+
... properties={"params": 25_557_032},
|
|
18
|
+
... )
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
name: str,
|
|
24
|
+
version: str | None = None,
|
|
25
|
+
data: Any = None,
|
|
26
|
+
architecture: str | None = None,
|
|
27
|
+
framework: str | None = None,
|
|
28
|
+
input_shape: tuple | None = None,
|
|
29
|
+
output_shape: tuple | None = None,
|
|
30
|
+
trained_on: Asset | None = None,
|
|
31
|
+
parent: Asset | None = None,
|
|
32
|
+
tags: dict[str, str] | None = None,
|
|
33
|
+
properties: dict[str, Any] | None = None,
|
|
34
|
+
):
|
|
35
|
+
super().__init__(
|
|
36
|
+
name=name,
|
|
37
|
+
version=version,
|
|
38
|
+
data=data,
|
|
39
|
+
parent=parent,
|
|
40
|
+
tags=tags,
|
|
41
|
+
properties=properties,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.architecture = architecture
|
|
45
|
+
self.framework = framework
|
|
46
|
+
self.input_shape = input_shape
|
|
47
|
+
self.output_shape = output_shape
|
|
48
|
+
|
|
49
|
+
# Track training dataset
|
|
50
|
+
if trained_on:
|
|
51
|
+
self.parents.append(trained_on)
|
|
52
|
+
trained_on.children.append(self)
|
|
53
|
+
|
|
54
|
+
# Add model-specific properties
|
|
55
|
+
if architecture:
|
|
56
|
+
self.metadata.properties["architecture"] = architecture
|
|
57
|
+
if framework:
|
|
58
|
+
self.metadata.properties["framework"] = framework
|
|
59
|
+
if input_shape:
|
|
60
|
+
self.metadata.properties["input_shape"] = input_shape
|
|
61
|
+
if output_shape:
|
|
62
|
+
self.metadata.properties["output_shape"] = output_shape
|
|
63
|
+
|
|
64
|
+
def get_training_datasets(self):
|
|
65
|
+
"""Get all datasets this model was trained on."""
|
|
66
|
+
from flowyml.assets.dataset import Dataset
|
|
67
|
+
|
|
68
|
+
return [p for p in self.parents if isinstance(p, Dataset)]
|
|
69
|
+
|
|
70
|
+
def get_parameters_count(self) -> int | None:
|
|
71
|
+
"""Get number of model parameters if available."""
|
|
72
|
+
return self.metadata.properties.get("params") or self.metadata.properties.get("parameters")
|
|
73
|
+
|
|
74
|
+
def get_architecture_info(self) -> dict[str, Any]:
|
|
75
|
+
"""Get architecture information."""
|
|
76
|
+
return {
|
|
77
|
+
"architecture": self.architecture,
|
|
78
|
+
"framework": self.framework,
|
|
79
|
+
"input_shape": self.input_shape,
|
|
80
|
+
"output_shape": self.output_shape,
|
|
81
|
+
"parameters": self.get_parameters_count(),
|
|
82
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Asset Registry - Central registry for all pipeline assets."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
from flowyml.assets.base import Asset
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AssetRegistry:
|
|
11
|
+
"""Central registry for managing and querying assets."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, registry_dir: str = ".flowyml/assets"):
|
|
14
|
+
self.registry_dir = Path(registry_dir)
|
|
15
|
+
self.registry_dir.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
|
|
17
|
+
self.assets: dict[str, Asset] = {} # id -> asset
|
|
18
|
+
self.assets_by_name: dict[str, list[Asset]] = {} # name -> [assets]
|
|
19
|
+
self.assets_by_type: dict[str, set[Asset]] = {} # type -> {assets}
|
|
20
|
+
|
|
21
|
+
self._load_registry()
|
|
22
|
+
|
|
23
|
+
def register(self, asset: Asset) -> None:
|
|
24
|
+
"""Register an asset."""
|
|
25
|
+
self.assets[asset.id] = asset
|
|
26
|
+
|
|
27
|
+
# Index by name
|
|
28
|
+
if asset.name not in self.assets_by_name:
|
|
29
|
+
self.assets_by_name[asset.name] = []
|
|
30
|
+
self.assets_by_name[asset.name].append(asset)
|
|
31
|
+
|
|
32
|
+
# Index by type
|
|
33
|
+
asset_type = asset.metadata.asset_type
|
|
34
|
+
if asset_type not in self.assets_by_type:
|
|
35
|
+
self.assets_by_type[asset_type] = set()
|
|
36
|
+
self.assets_by_type[asset_type].add(asset)
|
|
37
|
+
|
|
38
|
+
self._save_registry()
|
|
39
|
+
|
|
40
|
+
def get(self, asset_id: str) -> Asset | None:
|
|
41
|
+
"""Get asset by ID."""
|
|
42
|
+
return self.assets.get(asset_id)
|
|
43
|
+
|
|
44
|
+
def get_by_name(
|
|
45
|
+
self,
|
|
46
|
+
name: str,
|
|
47
|
+
version: str | None = None,
|
|
48
|
+
) -> Asset | None:
|
|
49
|
+
"""Get asset by name and optionally version.
|
|
50
|
+
|
|
51
|
+
Returns the latest version if version not specified.
|
|
52
|
+
"""
|
|
53
|
+
assets = self.assets_by_name.get(name, [])
|
|
54
|
+
|
|
55
|
+
if not assets:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
if version:
|
|
59
|
+
for asset in assets:
|
|
60
|
+
if asset.version == version:
|
|
61
|
+
return asset
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
# Return latest version
|
|
65
|
+
return max(assets, key=lambda a: a.metadata.created_at)
|
|
66
|
+
|
|
67
|
+
def list_by_type(self, asset_type: str) -> list[Asset]:
|
|
68
|
+
"""List all assets of a specific type."""
|
|
69
|
+
return list(self.assets_by_type.get(asset_type, set()))
|
|
70
|
+
|
|
71
|
+
def list_all(self) -> list[Asset]:
|
|
72
|
+
"""List all registered assets."""
|
|
73
|
+
return list(self.assets.values())
|
|
74
|
+
|
|
75
|
+
def search(
|
|
76
|
+
self,
|
|
77
|
+
name: str | None = None,
|
|
78
|
+
asset_type: str | None = None,
|
|
79
|
+
tags: dict[str, str] | None = None,
|
|
80
|
+
created_after: datetime | None = None,
|
|
81
|
+
) -> list[Asset]:
|
|
82
|
+
"""Search for assets matching criteria.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
name: Filter by name (substring match)
|
|
86
|
+
asset_type: Filter by asset type
|
|
87
|
+
tags: Filter by tags (must match all)
|
|
88
|
+
created_after: Filter by creation date
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of matching assets
|
|
92
|
+
"""
|
|
93
|
+
results = list(self.assets.values())
|
|
94
|
+
|
|
95
|
+
if name:
|
|
96
|
+
results = [a for a in results if name.lower() in a.name.lower()]
|
|
97
|
+
|
|
98
|
+
if asset_type:
|
|
99
|
+
results = [a for a in results if a.metadata.asset_type == asset_type]
|
|
100
|
+
|
|
101
|
+
if tags:
|
|
102
|
+
results = [a for a in results if all(a.metadata.tags.get(k) == v for k, v in tags.items())]
|
|
103
|
+
|
|
104
|
+
if created_after:
|
|
105
|
+
results = [a for a in results if a.metadata.created_at > created_after]
|
|
106
|
+
|
|
107
|
+
return results
|
|
108
|
+
|
|
109
|
+
def get_lineage_graph(self, asset_id: str) -> dict:
|
|
110
|
+
"""Get full lineage graph for an asset."""
|
|
111
|
+
asset = self.get(asset_id)
|
|
112
|
+
if not asset:
|
|
113
|
+
return {}
|
|
114
|
+
return asset.get_lineage()
|
|
115
|
+
|
|
116
|
+
def _save_registry(self) -> None:
|
|
117
|
+
"""Save registry metadata to disk."""
|
|
118
|
+
registry_file = self.registry_dir / "registry.json"
|
|
119
|
+
|
|
120
|
+
data = {
|
|
121
|
+
"assets": {asset_id: asset.to_dict() for asset_id, asset in self.assets.items()},
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
with open(registry_file, "w") as f:
|
|
125
|
+
json.dump(data, f, indent=2)
|
|
126
|
+
|
|
127
|
+
def _load_registry(self) -> None:
|
|
128
|
+
"""Load registry metadata from disk."""
|
|
129
|
+
registry_file = self.registry_dir / "registry.json"
|
|
130
|
+
|
|
131
|
+
if not registry_file.exists():
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
with open(registry_file) as f:
|
|
136
|
+
json.load(f)
|
|
137
|
+
|
|
138
|
+
# Note: This is simplified - in production, we'd need to
|
|
139
|
+
# deserialize the actual asset objects with their data
|
|
140
|
+
# For now, we just load the metadata
|
|
141
|
+
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
def clear(self) -> None:
|
|
146
|
+
"""Clear the registry."""
|
|
147
|
+
self.assets.clear()
|
|
148
|
+
self.assets_by_name.clear()
|
|
149
|
+
self.assets_by_type.clear()
|
|
150
|
+
self._save_registry()
|
|
151
|
+
|
|
152
|
+
def stats(self) -> dict[str, int]:
|
|
153
|
+
"""Get registry statistics."""
|
|
154
|
+
return {
|
|
155
|
+
"total_assets": len(self.assets),
|
|
156
|
+
"by_type": {asset_type: len(assets) for asset_type, assets in self.assets_by_type.items()},
|
|
157
|
+
}
|