flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/__init__.py ADDED
@@ -0,0 +1,207 @@
1
+ """🌊 flowyml - Next-Generation ML Pipeline Framework.
2
+
3
+ flowyml is a developer-first ML pipeline orchestration framework that combines
4
+ the simplicity of Metaflow with the power of ZenML and the elegance of
5
+ asset-centric design.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "flowyml Team"
10
+
11
+ # Core imports
12
+ from flowyml.core.context import Context, context
13
+ from flowyml.core.step import step, Step
14
+ from flowyml.core.pipeline import Pipeline
15
+ from flowyml.core.executor import Executor, LocalExecutor
16
+ from flowyml.core.cache import CacheStrategy
17
+ from flowyml.core.conditional import Condition, ConditionalBranch, Switch, when, unless
18
+ from flowyml.core.parallel import ParallelExecutor, DataParallelExecutor, BatchExecutor, parallel_map
19
+ from flowyml.core.error_handling import (
20
+ CircuitBreaker,
21
+ ExponentialBackoff,
22
+ RetryConfig,
23
+ FallbackHandler,
24
+ retry,
25
+ on_failure,
26
+ )
27
+
28
+ # Asset imports
29
+ from flowyml.assets.base import Asset
30
+ from flowyml.assets.dataset import Dataset
31
+ from flowyml.assets.model import Model
32
+ from flowyml.assets.metrics import Metrics
33
+ from flowyml.assets.artifact import Artifact
34
+ from flowyml.assets.featureset import FeatureSet
35
+ from flowyml.assets.report import Report
36
+ from flowyml.assets.registry import AssetRegistry
37
+
38
+ # Stack imports
39
+ from flowyml.stacks.base import Stack
40
+ from flowyml.stacks.local import LocalStack
41
+
42
+ # Tracking imports
43
+ from flowyml.tracking.experiment import Experiment
44
+ from flowyml.tracking.runs import Run
45
+
46
+ # Registry imports
47
+ from flowyml.registry.model_registry import ModelRegistry, ModelVersion, ModelStage
48
+
49
+ # Storage imports (for advanced usage)
50
+ from flowyml.storage import (
51
+ ArtifactStore,
52
+ LocalArtifactStore,
53
+ MetadataStore,
54
+ SQLiteMetadataStore,
55
+ materializer_registry,
56
+ )
57
+
58
+ # Monitoring & Integrations
59
+ from flowyml.monitoring.llm import trace_llm, tracer
60
+ from flowyml.monitoring.data import detect_drift, compute_stats
61
+ from flowyml.monitoring.notifications import (
62
+ NotificationManager,
63
+ configure_notifications,
64
+ get_notifier,
65
+ ConsoleNotifier,
66
+ SlackNotifier,
67
+ EmailNotifier,
68
+ )
69
+ from flowyml.integrations.keras import FlowymlKerasCallback
70
+
71
+ # Advanced Features
72
+ from flowyml.core.scheduler import PipelineScheduler
73
+ from flowyml.core.approval import approval, ApprovalStep
74
+ from flowyml.core.checkpoint import PipelineCheckpoint, checkpoint_enabled_pipeline
75
+ from flowyml.core.templates import create_from_template, list_templates, TEMPLATES
76
+ from flowyml.tracking.leaderboard import ModelLeaderboard, compare_runs
77
+ from flowyml.core.versioning import VersionedPipeline, PipelineVersion
78
+ from flowyml.core.project import Project, ProjectManager
79
+ from flowyml.core.advanced_cache import (
80
+ ContentBasedCache,
81
+ SharedCache,
82
+ SmartCache,
83
+ memoize,
84
+ )
85
+ from flowyml.utils.debug import (
86
+ StepDebugger,
87
+ PipelineDebugger,
88
+ debug_step,
89
+ trace_step,
90
+ profile_step,
91
+ inspect_step,
92
+ )
93
+ from flowyml.utils.performance import (
94
+ LazyValue,
95
+ lazy_property,
96
+ IncrementalComputation,
97
+ GPUResourceManager,
98
+ optimize_dataframe,
99
+ batch_iterator,
100
+ )
101
+ from flowyml.registry.pipeline_registry import pipeline_registry, register_pipeline
102
+
103
+ __all__ = [
104
+ # Core
105
+ "Context",
106
+ "context",
107
+ "step",
108
+ "Step",
109
+ "Pipeline",
110
+ "Executor",
111
+ "LocalExecutor",
112
+ "CacheStrategy",
113
+ # Conditional & Control Flow
114
+ "Condition",
115
+ "ConditionalBranch",
116
+ "Switch",
117
+ "when",
118
+ "unless",
119
+ # Parallel Execution
120
+ "ParallelExecutor",
121
+ "DataParallelExecutor",
122
+ "BatchExecutor",
123
+ "parallel_map",
124
+ # Error Handling
125
+ "CircuitBreaker",
126
+ "ExponentialBackoff",
127
+ "RetryConfig",
128
+ "FallbackHandler",
129
+ "retry",
130
+ "on_failure",
131
+ # Assets
132
+ "Asset",
133
+ "Dataset",
134
+ "Model",
135
+ "Metrics",
136
+ "Artifact",
137
+ "FeatureSet",
138
+ "Report",
139
+ "AssetRegistry",
140
+ # Stacks
141
+ "Stack",
142
+ "LocalStack",
143
+ # Tracking
144
+ "Experiment",
145
+ "Run",
146
+ # Registry
147
+ "ModelRegistry",
148
+ "ModelVersion",
149
+ "ModelStage",
150
+ # Storage
151
+ "ArtifactStore",
152
+ "LocalArtifactStore",
153
+ "MetadataStore",
154
+ "SQLiteMetadataStore",
155
+ "materializer_registry",
156
+ # Monitoring & Integrations
157
+ "trace_llm",
158
+ "tracer",
159
+ "detect_drift",
160
+ "compute_stats",
161
+ "FlowymlKerasCallback",
162
+ # Advanced Features
163
+ "PipelineScheduler",
164
+ "approval",
165
+ "ApprovalStep",
166
+ "PipelineCheckpoint",
167
+ "checkpoint_enabled_pipeline",
168
+ "create_from_template",
169
+ "list_templates",
170
+ "TEMPLATES",
171
+ "ModelLeaderboard",
172
+ "compare_runs",
173
+ "NotificationManager",
174
+ "configure_notifications",
175
+ "get_notifier",
176
+ "ConsoleNotifier",
177
+ "SlackNotifier",
178
+ "EmailNotifier",
179
+ # Versioning & Projects
180
+ "VersionedPipeline",
181
+ "PipelineVersion",
182
+ "Project",
183
+ "ProjectManager",
184
+ # Advanced Caching
185
+ "ContentBasedCache",
186
+ "SharedCache",
187
+ "SmartCache",
188
+ "memoize",
189
+ # Debugging
190
+ "StepDebugger",
191
+ "PipelineDebugger",
192
+ "debug_step",
193
+ "trace_step",
194
+ "profile_step",
195
+ "inspect_step",
196
+ # Performance
197
+ "LazyValue",
198
+ "lazy_property",
199
+ "ParallelExecutor",
200
+ "IncrementalComputation",
201
+ "GPUResourceManager",
202
+ "optimize_dataframe",
203
+ "batch_iterator",
204
+ # Registry
205
+ "pipeline_registry",
206
+ "register_pipeline",
207
+ ]
@@ -0,0 +1,22 @@
1
+ """Asset-centric design for ML pipelines."""
2
+
3
+ from flowyml.assets.base import Asset, AssetMetadata
4
+ from flowyml.assets.dataset import Dataset
5
+ from flowyml.assets.model import Model
6
+ from flowyml.assets.metrics import Metrics
7
+ from flowyml.assets.artifact import Artifact
8
+ from flowyml.assets.featureset import FeatureSet
9
+ from flowyml.assets.report import Report
10
+ from flowyml.assets.registry import AssetRegistry
11
+
12
+ __all__ = [
13
+ "Asset",
14
+ "AssetMetadata",
15
+ "Dataset",
16
+ "Model",
17
+ "Metrics",
18
+ "Artifact",
19
+ "FeatureSet",
20
+ "Report",
21
+ "AssetRegistry",
22
+ ]
@@ -0,0 +1,40 @@
1
+ """Artifact Asset - Represents generic artifacts (configs, checkpoints, etc)."""
2
+
3
+ from typing import Any
4
+ from flowyml.assets.base import Asset
5
+
6
+
7
+ class Artifact(Asset):
8
+ """Generic artifact asset for configs, checkpoints, reports, etc.
9
+
10
+ Example:
11
+ >>> config = Artifact(name="training_config", artifact_type="config", data={"lr": 0.001, "epochs": 10})
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ name: str,
17
+ artifact_type: str = "generic",
18
+ version: str | None = None,
19
+ data: Any = None,
20
+ file_path: str | None = None,
21
+ parent: Asset | None = None,
22
+ tags: dict[str, str] | None = None,
23
+ properties: dict[str, Any] | None = None,
24
+ ):
25
+ super().__init__(
26
+ name=name,
27
+ version=version,
28
+ data=data,
29
+ parent=parent,
30
+ tags=tags,
31
+ properties=properties,
32
+ )
33
+
34
+ self.artifact_type = artifact_type
35
+ self.file_path = file_path
36
+
37
+ # Add artifact-specific properties
38
+ self.metadata.properties["artifact_type"] = artifact_type
39
+ if file_path:
40
+ self.metadata.properties["file_path"] = file_path
flowyml/assets/base.py ADDED
@@ -0,0 +1,209 @@
1
+ """Base Asset - Foundation for all ML assets in flowyml."""
2
+
3
+ import hashlib
4
+ import json
5
+ from typing import Any, Optional
6
+ from datetime import datetime
7
+ from dataclasses import dataclass, field
8
+ from uuid import uuid4
9
+
10
+
11
+ @dataclass
12
+ class AssetMetadata:
13
+ """Metadata for an asset."""
14
+
15
+ asset_id: str
16
+ name: str
17
+ version: str
18
+ asset_type: str
19
+ created_at: datetime
20
+ created_by: str
21
+ parent_ids: list[str] = field(default_factory=list)
22
+ tags: dict[str, str] = field(default_factory=dict)
23
+ properties: dict[str, Any] = field(default_factory=dict)
24
+
25
+ def to_dict(self) -> dict[str, Any]:
26
+ """Convert to dictionary."""
27
+ return {
28
+ "asset_id": self.asset_id,
29
+ "name": self.name,
30
+ "version": self.version,
31
+ "asset_type": self.asset_type,
32
+ "created_at": self.created_at.isoformat(),
33
+ "created_by": self.created_by,
34
+ "parent_ids": self.parent_ids,
35
+ "tags": self.tags,
36
+ "properties": self.properties,
37
+ }
38
+
39
+
40
+ class Asset:
41
+ """Base class for all ML assets (datasets, models, features, etc).
42
+
43
+ Assets are first-class objects in flowyml pipelines with full lineage tracking.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ name: str,
49
+ version: str | None = None,
50
+ data: Any = None,
51
+ parent: Optional["Asset"] = None,
52
+ tags: dict[str, str] | None = None,
53
+ properties: dict[str, Any] | None = None,
54
+ ):
55
+ self.name = name
56
+ self.version = version or "v1.0.0"
57
+ self.data = data
58
+ self.asset_id = str(uuid4())
59
+
60
+ # Metadata
61
+ self.metadata = AssetMetadata(
62
+ asset_id=self.asset_id,
63
+ name=name,
64
+ version=self.version,
65
+ asset_type=self.__class__.__name__,
66
+ created_at=datetime.now(),
67
+ created_by="flowyml",
68
+ parent_ids=[parent.asset_id] if parent else [],
69
+ tags=tags or {},
70
+ properties=properties or {},
71
+ )
72
+
73
+ # Lineage tracking
74
+ self.parents: list[Asset] = [parent] if parent else []
75
+ self.children: list[Asset] = []
76
+
77
+ if parent:
78
+ parent.children.append(self)
79
+
80
+ @classmethod
81
+ def create(
82
+ cls,
83
+ data: Any,
84
+ name: str | None = None,
85
+ version: str | None = None,
86
+ parent: Optional["Asset"] = None,
87
+ **kwargs: Any,
88
+ ) -> "Asset":
89
+ """Factory method to create an asset.
90
+
91
+ Args:
92
+ data: The actual data/object
93
+ name: Asset name
94
+ version: Asset version
95
+ parent: Parent asset for lineage
96
+ **kwargs: Additional metadata
97
+
98
+ Returns:
99
+ New asset instance
100
+ """
101
+ asset_name = name or f"{cls.__name__}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
102
+
103
+ # Extract tags and properties if passed explicitly
104
+ tags = kwargs.pop("tags", {})
105
+ props = kwargs.pop("properties", {})
106
+ # Merge remaining kwargs into properties
107
+ props.update(kwargs)
108
+
109
+ return cls(
110
+ name=asset_name,
111
+ version=version,
112
+ data=data,
113
+ parent=parent,
114
+ tags=tags,
115
+ properties=props,
116
+ )
117
+
118
+ def get_hash(self) -> str:
119
+ """Generate hash of asset for caching/versioning."""
120
+ content = json.dumps(
121
+ {
122
+ "name": self.name,
123
+ "version": self.version,
124
+ "type": self.metadata.asset_type,
125
+ "created_at": self.metadata.created_at.isoformat(),
126
+ },
127
+ sort_keys=True,
128
+ )
129
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
130
+
131
+ def get_lineage(self, depth: int = -1) -> dict[str, Any]:
132
+ """Get asset lineage.
133
+
134
+ Args:
135
+ depth: How many levels to traverse (-1 for all)
136
+
137
+ Returns:
138
+ Lineage tree as nested dict
139
+ """
140
+ lineage = {
141
+ "asset": {
142
+ "asset_id": self.asset_id,
143
+ "name": self.name,
144
+ "type": self.metadata.asset_type,
145
+ "version": self.version,
146
+ },
147
+ "parents": [],
148
+ "children": [],
149
+ }
150
+
151
+ if depth != 0:
152
+ next_depth = depth - 1 if depth > 0 else -1
153
+ lineage["parents"] = [p.get_lineage(next_depth) for p in self.parents]
154
+ lineage["children"] = [c.get_lineage(next_depth) for c in self.children]
155
+
156
+ return lineage
157
+
158
+ def get_all_ancestors(self) -> set["Asset"]:
159
+ """Get all ancestor assets."""
160
+ ancestors = set()
161
+
162
+ def traverse(asset) -> None:
163
+ for parent in asset.parents:
164
+ if parent not in ancestors:
165
+ ancestors.add(parent)
166
+ traverse(parent)
167
+
168
+ traverse(self)
169
+ return ancestors
170
+
171
+ def get_all_descendants(self) -> set["Asset"]:
172
+ """Get all descendant assets."""
173
+ descendants = set()
174
+
175
+ def traverse(asset) -> None:
176
+ for child in asset.children:
177
+ if child not in descendants:
178
+ descendants.add(child)
179
+ traverse(child)
180
+
181
+ traverse(self)
182
+ return descendants
183
+
184
+ def add_tag(self, key: str, value: str) -> None:
185
+ """Add a tag to the asset."""
186
+ self.metadata.tags[key] = value
187
+
188
+ def add_property(self, key: str, value: Any) -> None:
189
+ """Add a property to the asset."""
190
+ self.metadata.properties[key] = value
191
+
192
+ def to_dict(self) -> dict[str, Any]:
193
+ """Convert asset to dictionary."""
194
+ return {
195
+ "metadata": self.metadata.to_dict(),
196
+ "lineage": {
197
+ "parents": [p.asset_id for p in self.parents],
198
+ "children": [c.asset_id for c in self.children],
199
+ },
200
+ }
201
+
202
+ def __repr__(self) -> str:
203
+ return f"{self.__class__.__name__}(name='{self.name}', version='{self.version}')"
204
+
205
+ def __hash__(self):
206
+ return hash(self.asset_id)
207
+
208
+ def __eq__(self, other):
209
+ return isinstance(other, Asset) and self.asset_id == other.asset_id
@@ -0,0 +1,100 @@
1
+ """Dataset Asset - Represents ML datasets with schema validation."""
2
+
3
+ from typing import Any
4
+ from flowyml.assets.base import Asset
5
+
6
+
7
+ class Dataset(Asset):
8
+ """Dataset asset with schema and lineage tracking.
9
+
10
+ Example:
11
+ >>> raw_data = Dataset(
12
+ ... name="imagenet_train",
13
+ ... version="v2.0",
14
+ ... data=train_dataset,
15
+ ... properties={"size": "150GB", "samples": 1_281_167},
16
+ ... )
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ name: str,
22
+ version: str | None = None,
23
+ data: Any = None,
24
+ schema: Any | None = None,
25
+ location: str | None = None,
26
+ parent: Asset | None = None,
27
+ tags: dict[str, str] | None = None,
28
+ properties: dict[str, Any] | None = None,
29
+ ):
30
+ super().__init__(
31
+ name=name,
32
+ version=version,
33
+ data=data,
34
+ parent=parent,
35
+ tags=tags,
36
+ properties=properties,
37
+ )
38
+
39
+ self.schema = schema
40
+ self.location = location
41
+
42
+ # Add dataset-specific properties
43
+ if schema:
44
+ self.metadata.properties["schema"] = str(schema)
45
+ if location:
46
+ self.metadata.properties["location"] = location
47
+
48
+ @property
49
+ def size(self) -> int | None:
50
+ """Get dataset size if available."""
51
+ return self.metadata.properties.get("size")
52
+
53
+ @property
54
+ def num_samples(self) -> int | None:
55
+ """Get number of samples if available."""
56
+ return self.metadata.properties.get("samples") or self.metadata.properties.get("num_samples")
57
+
58
+ def validate_schema(self) -> bool:
59
+ """Validate data against schema (placeholder)."""
60
+ if self.schema is None or self.data is None:
61
+ return True
62
+ # Schema validation would go here
63
+ return True
64
+
65
+ def split(self, train_ratio: float = 0.8, name_prefix: str | None = None) -> tuple["Dataset", "Dataset"]:
66
+ """Split dataset into train/test.
67
+
68
+ Args:
69
+ train_ratio: Ratio for training split
70
+ name_prefix: Prefix for split dataset names
71
+
72
+ Returns:
73
+ Tuple of (train_dataset, test_dataset)
74
+ """
75
+ prefix = name_prefix or self.name
76
+
77
+ # Placeholder - actual splitting logic would depend on data type
78
+ _ = train_ratio # Unused in placeholder
79
+ train_data = self.data # Would actually split the data
80
+ test_data = self.data
81
+
82
+ train_dataset = Dataset(
83
+ name=f"{prefix}_train",
84
+ version=self.version,
85
+ data=train_data,
86
+ schema=self.schema,
87
+ parent=self,
88
+ tags={**self.metadata.tags, "split": "train"},
89
+ )
90
+
91
+ test_dataset = Dataset(
92
+ name=f"{prefix}_test",
93
+ version=self.version,
94
+ data=test_data,
95
+ schema=self.schema,
96
+ parent=self,
97
+ tags={**self.metadata.tags, "split": "test"},
98
+ )
99
+
100
+ return train_dataset, test_dataset