flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,301 @@
1
+ """FeatureSet asset for feature engineering outputs."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from typing import Any
6
+
7
+ from flowyml.assets.base import Asset, AssetMetadata
8
+
9
+
10
+ @dataclass
11
+ class FeatureSetMetadata(AssetMetadata):
12
+ """Metadata specific to FeatureSets."""
13
+
14
+ feature_names: list[str] = field(default_factory=list)
15
+ num_features: int = 0
16
+ num_samples: int = 0
17
+ feature_types: dict[str, str] = field(default_factory=dict)
18
+ statistics: dict[str, dict[str, float]] = field(default_factory=dict)
19
+ transformations: list[str] = field(default_factory=list)
20
+ source_dataset: str | None = None
21
+
22
+
23
+ class FeatureSet(Asset):
24
+ """Asset representing a set of engineered features.
25
+
26
+ FeatureSets are created through feature engineering pipelines and contain
27
+ transformed data ready for model training.
28
+
29
+ Example:
30
+ ```python
31
+ from flowyml import FeatureSet
32
+
33
+ # Create a feature set
34
+ features = FeatureSet.create(
35
+ data=feature_matrix,
36
+ feature_names=["age_scaled", "income_log", "category_encoded"],
37
+ num_samples=10000,
38
+ transformations=["StandardScaler", "LogTransform", "OneHotEncoder"],
39
+ source_dataset="customers_v1",
40
+ )
41
+
42
+ # Access feature information
43
+ print(features.num_features) # 3
44
+ print(features.feature_names)
45
+ print(features.statistics)
46
+ ```
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ name: str,
52
+ data: Any = None,
53
+ feature_names: list[str] | None = None,
54
+ num_samples: int = 0,
55
+ transformations: list[str] | None = None,
56
+ source_dataset: str | None = None,
57
+ **kwargs,
58
+ ):
59
+ """Initialize a FeatureSet.
60
+
61
+ Args:
62
+ name: Name of the feature set
63
+ data: The feature matrix (DataFrame, array, etc.)
64
+ feature_names: List of feature names
65
+ num_samples: Number of samples in the feature set
66
+ transformations: List of transformations applied
67
+ source_dataset: Name of source dataset
68
+ **kwargs: Additional metadata
69
+ """
70
+ # Initialize base asset
71
+ metadata = FeatureSetMetadata(
72
+ name=name,
73
+ type="featureset",
74
+ feature_names=feature_names or [],
75
+ num_features=len(feature_names) if feature_names else 0,
76
+ num_samples=num_samples,
77
+ transformations=transformations or [],
78
+ source_dataset=source_dataset,
79
+ **kwargs,
80
+ )
81
+
82
+ super().__init__(name=name, type="featureset", metadata=metadata)
83
+ self._data = data
84
+
85
+ # Extract feature metadata if data provided
86
+ if data is not None:
87
+ self._extract_feature_metadata()
88
+
89
+ def _extract_feature_metadata(self) -> None:
90
+ """Extract feature metadata from data."""
91
+ try:
92
+ import pandas as pd
93
+
94
+ if isinstance(self._data, pd.DataFrame):
95
+ # Update feature names from DataFrame
96
+ if not self.metadata.feature_names:
97
+ self.metadata.feature_names = self._data.columns.tolist()
98
+ self.metadata.num_features = len(self._data.columns)
99
+
100
+ # Extract feature types
101
+ self.metadata.feature_types = {col: str(dtype) for col, dtype in self._data.dtypes.items()}
102
+
103
+ # Update num_samples
104
+ if not self.metadata.num_samples:
105
+ self.metadata.num_samples = len(self._data)
106
+
107
+ # Calculate statistics for numerical columns
108
+ numerical_cols = self._data.select_dtypes(include=["number"]).columns
109
+ for col in numerical_cols:
110
+ self.metadata.statistics[col] = {
111
+ "min": float(self._data[col].min()),
112
+ "max": float(self._data[col].max()),
113
+ "mean": float(self._data[col].mean()),
114
+ "std": float(self._data[col].std()),
115
+ "missing": int(self._data[col].isna().sum()),
116
+ }
117
+
118
+ except ImportError:
119
+ pass
120
+
121
+ try:
122
+ import numpy as np
123
+
124
+ if isinstance(self._data, np.ndarray):
125
+ # Update dimensions
126
+ if self._data.ndim >= 2:
127
+ self.metadata.num_samples = self._data.shape[0]
128
+ self.metadata.num_features = self._data.shape[1]
129
+
130
+ # Calculate statistics if numerical
131
+ if np.issubdtype(self._data.dtype, np.number):
132
+ for i in range(min(self.metadata.num_features, 100)): # Limit to 100 features
133
+ feature_name = (
134
+ self.metadata.feature_names[i] if i < len(self.metadata.feature_names) else f"feature_{i}"
135
+ )
136
+ self.metadata.statistics[feature_name] = {
137
+ "min": float(np.min(self._data[:, i])),
138
+ "max": float(np.max(self._data[:, i])),
139
+ "mean": float(np.mean(self._data[:, i])),
140
+ "std": float(np.std(self._data[:, i])),
141
+ }
142
+ except ImportError:
143
+ pass
144
+
145
+ @property
146
+ def data(self) -> Any:
147
+ """Get the feature data."""
148
+ return self._data
149
+
150
+ @property
151
+ def feature_names(self) -> list[str]:
152
+ """Get feature names."""
153
+ return self.metadata.feature_names
154
+
155
+ @property
156
+ def num_features(self) -> int:
157
+ """Get number of features."""
158
+ return self.metadata.num_features
159
+
160
+ @property
161
+ def num_samples(self) -> int:
162
+ """Get number of samples."""
163
+ return self.metadata.num_samples
164
+
165
+ @property
166
+ def feature_types(self) -> dict[str, str]:
167
+ """Get feature types."""
168
+ return self.metadata.feature_types
169
+
170
+ @property
171
+ def statistics(self) -> dict[str, dict[str, float]]:
172
+ """Get feature statistics."""
173
+ return self.metadata.statistics
174
+
175
+ @property
176
+ def transformations(self) -> list[str]:
177
+ """Get list of transformations applied."""
178
+ return self.metadata.transformations
179
+
180
+ @property
181
+ def source_dataset(self) -> str | None:
182
+ """Get source dataset name."""
183
+ return self.metadata.source_dataset
184
+
185
+ @classmethod
186
+ def create(
187
+ cls,
188
+ data: Any,
189
+ name: str | None = None,
190
+ feature_names: list[str] | None = None,
191
+ transformations: list[str] | None = None,
192
+ source_dataset: str | None = None,
193
+ **kwargs,
194
+ ) -> "FeatureSet":
195
+ """Factory method to create a FeatureSet.
196
+
197
+ Args:
198
+ data: The feature matrix
199
+ name: Name of the feature set (auto-generated if not provided)
200
+ feature_names: List of feature names
201
+ transformations: List of transformations applied
202
+ source_dataset: Name of source dataset
203
+ **kwargs: Additional metadata
204
+
205
+ Returns:
206
+ New FeatureSet instance
207
+ """
208
+ if name is None:
209
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
210
+ name = f"features_{timestamp}"
211
+
212
+ return cls(
213
+ name=name,
214
+ data=data,
215
+ feature_names=feature_names,
216
+ transformations=transformations,
217
+ source_dataset=source_dataset,
218
+ **kwargs,
219
+ )
220
+
221
+ def select_features(self, feature_names: list[str]) -> "FeatureSet":
222
+ """Select a subset of features.
223
+
224
+ Args:
225
+ feature_names: List of feature names to select
226
+
227
+ Returns:
228
+ New FeatureSet with selected features
229
+ """
230
+ if self._data is None:
231
+ raise ValueError("Cannot select features from FeatureSet without data")
232
+
233
+ try:
234
+ import pandas as pd
235
+
236
+ if isinstance(self._data, pd.DataFrame):
237
+ selected_data = self._data[feature_names]
238
+ return FeatureSet.create(
239
+ data=selected_data,
240
+ name=f"{self.name}_selected",
241
+ feature_names=feature_names,
242
+ transformations=self.transformations,
243
+ source_dataset=self.name,
244
+ )
245
+ except ImportError:
246
+ pass
247
+
248
+ try:
249
+ import numpy as np
250
+
251
+ if isinstance(self._data, np.ndarray):
252
+ # Map feature names to indices
253
+ indices = [self.feature_names.index(fn) for fn in feature_names if fn in self.feature_names]
254
+ selected_data = self._data[:, indices]
255
+ return FeatureSet.create(
256
+ data=selected_data,
257
+ name=f"{self.name}_selected",
258
+ feature_names=feature_names,
259
+ transformations=self.transformations,
260
+ source_dataset=self.name,
261
+ )
262
+ except ImportError:
263
+ pass
264
+
265
+ raise TypeError("Unsupported data type for feature selection")
266
+
267
+ def get_feature_statistics(self, feature_name: str) -> dict[str, float] | None:
268
+ """Get statistics for a specific feature.
269
+
270
+ Args:
271
+ feature_name: Name of the feature
272
+
273
+ Returns:
274
+ Dictionary of statistics or None if not found
275
+ """
276
+ return self.statistics.get(feature_name)
277
+
278
+ def to_dict(self) -> dict[str, Any]:
279
+ """Convert FeatureSet to dictionary.
280
+
281
+ Returns:
282
+ Dictionary representation (excluding data)
283
+ """
284
+ return {
285
+ "id": self.id,
286
+ "name": self.name,
287
+ "type": self.type,
288
+ "feature_names": self.feature_names,
289
+ "num_features": self.num_features,
290
+ "num_samples": self.num_samples,
291
+ "feature_types": self.feature_types,
292
+ "statistics": self.statistics,
293
+ "transformations": self.transformations,
294
+ "source_dataset": self.source_dataset,
295
+ "created_at": self.created_at.isoformat(),
296
+ "tags": self.tags,
297
+ "properties": self.properties,
298
+ }
299
+
300
+ def __repr__(self) -> str:
301
+ return f"FeatureSet(name='{self.name}', num_features={self.num_features}, num_samples={self.num_samples})"
@@ -0,0 +1,104 @@
1
+ """Metrics Asset - Represents experiment metrics and evaluation results."""
2
+
3
+ from typing import Any
4
+ from flowyml.assets.base import Asset
5
+
6
+
7
+ class Metrics(Asset):
8
+ """Metrics asset for experiment tracking.
9
+
10
+ Example:
11
+ >>> metrics = Metrics(name="training_metrics", data={"accuracy": 0.95, "loss": 0.05, "f1_score": 0.93})
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ name: str,
17
+ version: str | None = None,
18
+ data: dict[str, Any] | None = None,
19
+ parent: Asset | None = None,
20
+ tags: dict[str, str] | None = None,
21
+ properties: dict[str, Any] | None = None,
22
+ **metrics,
23
+ ):
24
+ # Merge data and kwargs metrics
25
+ all_metrics = data or {}
26
+ all_metrics.update(metrics)
27
+
28
+ super().__init__(
29
+ name=name,
30
+ version=version,
31
+ data=all_metrics,
32
+ parent=parent,
33
+ tags=tags,
34
+ properties=properties,
35
+ )
36
+
37
+ # Store metrics in properties for easy access
38
+ self.metadata.properties.update(all_metrics)
39
+
40
+ def get_metric(self, name: str, default: Any = None) -> Any:
41
+ """Get a specific metric value."""
42
+ if self.data and name in self.data:
43
+ return self.data[name]
44
+ return self.metadata.properties.get(name, default)
45
+
46
+ def add_metric(self, name: str, value: Any) -> None:
47
+ """Add a new metric."""
48
+ if self.data is None:
49
+ self.data = {}
50
+ self.data[name] = value
51
+ self.metadata.properties[name] = value
52
+
53
+ def get_all_metrics(self) -> dict[str, Any]:
54
+ """Get all metrics."""
55
+ return self.data or {}
56
+
57
+ def compare_with(self, other: "Metrics") -> dict[str, dict[str, Any]]:
58
+ """Compare metrics with another Metrics asset.
59
+
60
+ Returns:
61
+ Dictionary with comparison results
62
+ """
63
+ self_metrics = self.get_all_metrics()
64
+ other_metrics = other.get_all_metrics()
65
+
66
+ comparison = {}
67
+ all_keys = set(self_metrics.keys()) | set(other_metrics.keys())
68
+
69
+ for key in all_keys:
70
+ self_val = self_metrics.get(key)
71
+ other_val = other_metrics.get(key)
72
+
73
+ comparison[key] = {
74
+ "self": self_val,
75
+ "other": other_val,
76
+ "diff": self_val - other_val
77
+ if (
78
+ self_val is not None
79
+ and other_val is not None
80
+ and isinstance(self_val, (int, float))
81
+ and isinstance(other_val, (int, float))
82
+ )
83
+ else None,
84
+ }
85
+
86
+ return comparison
87
+
88
+ @classmethod
89
+ def create(
90
+ cls,
91
+ name: str | None = None,
92
+ parent: Asset | None = None,
93
+ **metrics,
94
+ ) -> "Metrics":
95
+ """Factory method to create metrics.
96
+
97
+ Example:
98
+ >>> metrics = Metrics.create(accuracy=0.95, loss=0.05, training_time="2h 15m")
99
+ """
100
+ return cls(
101
+ name=name or "metrics",
102
+ data=metrics,
103
+ parent=parent,
104
+ )
@@ -0,0 +1,82 @@
1
+ """Model Asset - Represents ML models with metadata and lineage."""
2
+
3
+ from typing import Any
4
+ from flowyml.assets.base import Asset
5
+
6
+
7
+ class Model(Asset):
8
+ """Model asset with training metadata and lineage.
9
+
10
+ Example:
11
+ >>> model = Model(
12
+ ... name="resnet50_v1",
13
+ ... version="v1.0.0",
14
+ ... data=trained_model,
15
+ ... architecture="resnet50",
16
+ ... framework="pytorch",
17
+ ... properties={"params": 25_557_032},
18
+ ... )
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ name: str,
24
+ version: str | None = None,
25
+ data: Any = None,
26
+ architecture: str | None = None,
27
+ framework: str | None = None,
28
+ input_shape: tuple | None = None,
29
+ output_shape: tuple | None = None,
30
+ trained_on: Asset | None = None,
31
+ parent: Asset | None = None,
32
+ tags: dict[str, str] | None = None,
33
+ properties: dict[str, Any] | None = None,
34
+ ):
35
+ super().__init__(
36
+ name=name,
37
+ version=version,
38
+ data=data,
39
+ parent=parent,
40
+ tags=tags,
41
+ properties=properties,
42
+ )
43
+
44
+ self.architecture = architecture
45
+ self.framework = framework
46
+ self.input_shape = input_shape
47
+ self.output_shape = output_shape
48
+
49
+ # Track training dataset
50
+ if trained_on:
51
+ self.parents.append(trained_on)
52
+ trained_on.children.append(self)
53
+
54
+ # Add model-specific properties
55
+ if architecture:
56
+ self.metadata.properties["architecture"] = architecture
57
+ if framework:
58
+ self.metadata.properties["framework"] = framework
59
+ if input_shape:
60
+ self.metadata.properties["input_shape"] = input_shape
61
+ if output_shape:
62
+ self.metadata.properties["output_shape"] = output_shape
63
+
64
+ def get_training_datasets(self):
65
+ """Get all datasets this model was trained on."""
66
+ from flowyml.assets.dataset import Dataset
67
+
68
+ return [p for p in self.parents if isinstance(p, Dataset)]
69
+
70
+ def get_parameters_count(self) -> int | None:
71
+ """Get number of model parameters if available."""
72
+ return self.metadata.properties.get("params") or self.metadata.properties.get("parameters")
73
+
74
+ def get_architecture_info(self) -> dict[str, Any]:
75
+ """Get architecture information."""
76
+ return {
77
+ "architecture": self.architecture,
78
+ "framework": self.framework,
79
+ "input_shape": self.input_shape,
80
+ "output_shape": self.output_shape,
81
+ "parameters": self.get_parameters_count(),
82
+ }
@@ -0,0 +1,157 @@
1
+ """Asset Registry - Central registry for all pipeline assets."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+
7
+ from flowyml.assets.base import Asset
8
+
9
+
10
+ class AssetRegistry:
11
+ """Central registry for managing and querying assets."""
12
+
13
+ def __init__(self, registry_dir: str = ".flowyml/assets"):
14
+ self.registry_dir = Path(registry_dir)
15
+ self.registry_dir.mkdir(parents=True, exist_ok=True)
16
+
17
+ self.assets: dict[str, Asset] = {} # id -> asset
18
+ self.assets_by_name: dict[str, list[Asset]] = {} # name -> [assets]
19
+ self.assets_by_type: dict[str, set[Asset]] = {} # type -> {assets}
20
+
21
+ self._load_registry()
22
+
23
+ def register(self, asset: Asset) -> None:
24
+ """Register an asset."""
25
+ self.assets[asset.id] = asset
26
+
27
+ # Index by name
28
+ if asset.name not in self.assets_by_name:
29
+ self.assets_by_name[asset.name] = []
30
+ self.assets_by_name[asset.name].append(asset)
31
+
32
+ # Index by type
33
+ asset_type = asset.metadata.asset_type
34
+ if asset_type not in self.assets_by_type:
35
+ self.assets_by_type[asset_type] = set()
36
+ self.assets_by_type[asset_type].add(asset)
37
+
38
+ self._save_registry()
39
+
40
+ def get(self, asset_id: str) -> Asset | None:
41
+ """Get asset by ID."""
42
+ return self.assets.get(asset_id)
43
+
44
+ def get_by_name(
45
+ self,
46
+ name: str,
47
+ version: str | None = None,
48
+ ) -> Asset | None:
49
+ """Get asset by name and optionally version.
50
+
51
+ Returns the latest version if version not specified.
52
+ """
53
+ assets = self.assets_by_name.get(name, [])
54
+
55
+ if not assets:
56
+ return None
57
+
58
+ if version:
59
+ for asset in assets:
60
+ if asset.version == version:
61
+ return asset
62
+ return None
63
+
64
+ # Return latest version
65
+ return max(assets, key=lambda a: a.metadata.created_at)
66
+
67
+ def list_by_type(self, asset_type: str) -> list[Asset]:
68
+ """List all assets of a specific type."""
69
+ return list(self.assets_by_type.get(asset_type, set()))
70
+
71
+ def list_all(self) -> list[Asset]:
72
+ """List all registered assets."""
73
+ return list(self.assets.values())
74
+
75
+ def search(
76
+ self,
77
+ name: str | None = None,
78
+ asset_type: str | None = None,
79
+ tags: dict[str, str] | None = None,
80
+ created_after: datetime | None = None,
81
+ ) -> list[Asset]:
82
+ """Search for assets matching criteria.
83
+
84
+ Args:
85
+ name: Filter by name (substring match)
86
+ asset_type: Filter by asset type
87
+ tags: Filter by tags (must match all)
88
+ created_after: Filter by creation date
89
+
90
+ Returns:
91
+ List of matching assets
92
+ """
93
+ results = list(self.assets.values())
94
+
95
+ if name:
96
+ results = [a for a in results if name.lower() in a.name.lower()]
97
+
98
+ if asset_type:
99
+ results = [a for a in results if a.metadata.asset_type == asset_type]
100
+
101
+ if tags:
102
+ results = [a for a in results if all(a.metadata.tags.get(k) == v for k, v in tags.items())]
103
+
104
+ if created_after:
105
+ results = [a for a in results if a.metadata.created_at > created_after]
106
+
107
+ return results
108
+
109
+ def get_lineage_graph(self, asset_id: str) -> dict:
110
+ """Get full lineage graph for an asset."""
111
+ asset = self.get(asset_id)
112
+ if not asset:
113
+ return {}
114
+ return asset.get_lineage()
115
+
116
+ def _save_registry(self) -> None:
117
+ """Save registry metadata to disk."""
118
+ registry_file = self.registry_dir / "registry.json"
119
+
120
+ data = {
121
+ "assets": {asset_id: asset.to_dict() for asset_id, asset in self.assets.items()},
122
+ }
123
+
124
+ with open(registry_file, "w") as f:
125
+ json.dump(data, f, indent=2)
126
+
127
+ def _load_registry(self) -> None:
128
+ """Load registry metadata from disk."""
129
+ registry_file = self.registry_dir / "registry.json"
130
+
131
+ if not registry_file.exists():
132
+ return
133
+
134
+ try:
135
+ with open(registry_file) as f:
136
+ json.load(f)
137
+
138
+ # Note: This is simplified - in production, we'd need to
139
+ # deserialize the actual asset objects with their data
140
+ # For now, we just load the metadata
141
+
142
+ except Exception:
143
+ pass
144
+
145
+ def clear(self) -> None:
146
+ """Clear the registry."""
147
+ self.assets.clear()
148
+ self.assets_by_name.clear()
149
+ self.assets_by_type.clear()
150
+ self._save_registry()
151
+
152
+ def stats(self) -> dict[str, int]:
153
+ """Get registry statistics."""
154
+ return {
155
+ "total_assets": len(self.assets),
156
+ "by_type": {asset_type: len(assets) for asset_type, assets in self.assets_by_type.items()},
157
+ }