flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/__init__.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""🌊 flowyml - Next-Generation ML Pipeline Framework.
|
|
2
|
+
|
|
3
|
+
flowyml is a developer-first ML pipeline orchestration framework that combines
|
|
4
|
+
the simplicity of Metaflow with the power of ZenML and the elegance of
|
|
5
|
+
asset-centric design.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "flowyml Team"
|
|
10
|
+
|
|
11
|
+
# Core imports
|
|
12
|
+
from flowyml.core.context import Context, context
|
|
13
|
+
from flowyml.core.step import step, Step
|
|
14
|
+
from flowyml.core.pipeline import Pipeline
|
|
15
|
+
from flowyml.core.executor import Executor, LocalExecutor
|
|
16
|
+
from flowyml.core.cache import CacheStrategy
|
|
17
|
+
from flowyml.core.conditional import Condition, ConditionalBranch, Switch, when, unless
|
|
18
|
+
from flowyml.core.parallel import ParallelExecutor, DataParallelExecutor, BatchExecutor, parallel_map
|
|
19
|
+
from flowyml.core.error_handling import (
|
|
20
|
+
CircuitBreaker,
|
|
21
|
+
ExponentialBackoff,
|
|
22
|
+
RetryConfig,
|
|
23
|
+
FallbackHandler,
|
|
24
|
+
retry,
|
|
25
|
+
on_failure,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Asset imports
|
|
29
|
+
from flowyml.assets.base import Asset
|
|
30
|
+
from flowyml.assets.dataset import Dataset
|
|
31
|
+
from flowyml.assets.model import Model
|
|
32
|
+
from flowyml.assets.metrics import Metrics
|
|
33
|
+
from flowyml.assets.artifact import Artifact
|
|
34
|
+
from flowyml.assets.featureset import FeatureSet
|
|
35
|
+
from flowyml.assets.report import Report
|
|
36
|
+
from flowyml.assets.registry import AssetRegistry
|
|
37
|
+
|
|
38
|
+
# Stack imports
|
|
39
|
+
from flowyml.stacks.base import Stack
|
|
40
|
+
from flowyml.stacks.local import LocalStack
|
|
41
|
+
|
|
42
|
+
# Tracking imports
|
|
43
|
+
from flowyml.tracking.experiment import Experiment
|
|
44
|
+
from flowyml.tracking.runs import Run
|
|
45
|
+
|
|
46
|
+
# Registry imports
|
|
47
|
+
from flowyml.registry.model_registry import ModelRegistry, ModelVersion, ModelStage
|
|
48
|
+
|
|
49
|
+
# Storage imports (for advanced usage)
|
|
50
|
+
from flowyml.storage import (
|
|
51
|
+
ArtifactStore,
|
|
52
|
+
LocalArtifactStore,
|
|
53
|
+
MetadataStore,
|
|
54
|
+
SQLiteMetadataStore,
|
|
55
|
+
materializer_registry,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Monitoring & Integrations
|
|
59
|
+
from flowyml.monitoring.llm import trace_llm, tracer
|
|
60
|
+
from flowyml.monitoring.data import detect_drift, compute_stats
|
|
61
|
+
from flowyml.monitoring.notifications import (
|
|
62
|
+
NotificationManager,
|
|
63
|
+
configure_notifications,
|
|
64
|
+
get_notifier,
|
|
65
|
+
ConsoleNotifier,
|
|
66
|
+
SlackNotifier,
|
|
67
|
+
EmailNotifier,
|
|
68
|
+
)
|
|
69
|
+
from flowyml.integrations.keras import FlowymlKerasCallback
|
|
70
|
+
|
|
71
|
+
# Advanced Features
|
|
72
|
+
from flowyml.core.scheduler import PipelineScheduler
|
|
73
|
+
from flowyml.core.approval import approval, ApprovalStep
|
|
74
|
+
from flowyml.core.checkpoint import PipelineCheckpoint, checkpoint_enabled_pipeline
|
|
75
|
+
from flowyml.core.templates import create_from_template, list_templates, TEMPLATES
|
|
76
|
+
from flowyml.tracking.leaderboard import ModelLeaderboard, compare_runs
|
|
77
|
+
from flowyml.core.versioning import VersionedPipeline, PipelineVersion
|
|
78
|
+
from flowyml.core.project import Project, ProjectManager
|
|
79
|
+
from flowyml.core.advanced_cache import (
|
|
80
|
+
ContentBasedCache,
|
|
81
|
+
SharedCache,
|
|
82
|
+
SmartCache,
|
|
83
|
+
memoize,
|
|
84
|
+
)
|
|
85
|
+
from flowyml.utils.debug import (
|
|
86
|
+
StepDebugger,
|
|
87
|
+
PipelineDebugger,
|
|
88
|
+
debug_step,
|
|
89
|
+
trace_step,
|
|
90
|
+
profile_step,
|
|
91
|
+
inspect_step,
|
|
92
|
+
)
|
|
93
|
+
from flowyml.utils.performance import (
|
|
94
|
+
LazyValue,
|
|
95
|
+
lazy_property,
|
|
96
|
+
IncrementalComputation,
|
|
97
|
+
GPUResourceManager,
|
|
98
|
+
optimize_dataframe,
|
|
99
|
+
batch_iterator,
|
|
100
|
+
)
|
|
101
|
+
from flowyml.registry.pipeline_registry import pipeline_registry, register_pipeline
|
|
102
|
+
|
|
103
|
+
__all__ = [
|
|
104
|
+
# Core
|
|
105
|
+
"Context",
|
|
106
|
+
"context",
|
|
107
|
+
"step",
|
|
108
|
+
"Step",
|
|
109
|
+
"Pipeline",
|
|
110
|
+
"Executor",
|
|
111
|
+
"LocalExecutor",
|
|
112
|
+
"CacheStrategy",
|
|
113
|
+
# Conditional & Control Flow
|
|
114
|
+
"Condition",
|
|
115
|
+
"ConditionalBranch",
|
|
116
|
+
"Switch",
|
|
117
|
+
"when",
|
|
118
|
+
"unless",
|
|
119
|
+
# Parallel Execution
|
|
120
|
+
"ParallelExecutor",
|
|
121
|
+
"DataParallelExecutor",
|
|
122
|
+
"BatchExecutor",
|
|
123
|
+
"parallel_map",
|
|
124
|
+
# Error Handling
|
|
125
|
+
"CircuitBreaker",
|
|
126
|
+
"ExponentialBackoff",
|
|
127
|
+
"RetryConfig",
|
|
128
|
+
"FallbackHandler",
|
|
129
|
+
"retry",
|
|
130
|
+
"on_failure",
|
|
131
|
+
# Assets
|
|
132
|
+
"Asset",
|
|
133
|
+
"Dataset",
|
|
134
|
+
"Model",
|
|
135
|
+
"Metrics",
|
|
136
|
+
"Artifact",
|
|
137
|
+
"FeatureSet",
|
|
138
|
+
"Report",
|
|
139
|
+
"AssetRegistry",
|
|
140
|
+
# Stacks
|
|
141
|
+
"Stack",
|
|
142
|
+
"LocalStack",
|
|
143
|
+
# Tracking
|
|
144
|
+
"Experiment",
|
|
145
|
+
"Run",
|
|
146
|
+
# Registry
|
|
147
|
+
"ModelRegistry",
|
|
148
|
+
"ModelVersion",
|
|
149
|
+
"ModelStage",
|
|
150
|
+
# Storage
|
|
151
|
+
"ArtifactStore",
|
|
152
|
+
"LocalArtifactStore",
|
|
153
|
+
"MetadataStore",
|
|
154
|
+
"SQLiteMetadataStore",
|
|
155
|
+
"materializer_registry",
|
|
156
|
+
# Monitoring & Integrations
|
|
157
|
+
"trace_llm",
|
|
158
|
+
"tracer",
|
|
159
|
+
"detect_drift",
|
|
160
|
+
"compute_stats",
|
|
161
|
+
"FlowymlKerasCallback",
|
|
162
|
+
# Advanced Features
|
|
163
|
+
"PipelineScheduler",
|
|
164
|
+
"approval",
|
|
165
|
+
"ApprovalStep",
|
|
166
|
+
"PipelineCheckpoint",
|
|
167
|
+
"checkpoint_enabled_pipeline",
|
|
168
|
+
"create_from_template",
|
|
169
|
+
"list_templates",
|
|
170
|
+
"TEMPLATES",
|
|
171
|
+
"ModelLeaderboard",
|
|
172
|
+
"compare_runs",
|
|
173
|
+
"NotificationManager",
|
|
174
|
+
"configure_notifications",
|
|
175
|
+
"get_notifier",
|
|
176
|
+
"ConsoleNotifier",
|
|
177
|
+
"SlackNotifier",
|
|
178
|
+
"EmailNotifier",
|
|
179
|
+
# Versioning & Projects
|
|
180
|
+
"VersionedPipeline",
|
|
181
|
+
"PipelineVersion",
|
|
182
|
+
"Project",
|
|
183
|
+
"ProjectManager",
|
|
184
|
+
# Advanced Caching
|
|
185
|
+
"ContentBasedCache",
|
|
186
|
+
"SharedCache",
|
|
187
|
+
"SmartCache",
|
|
188
|
+
"memoize",
|
|
189
|
+
# Debugging
|
|
190
|
+
"StepDebugger",
|
|
191
|
+
"PipelineDebugger",
|
|
192
|
+
"debug_step",
|
|
193
|
+
"trace_step",
|
|
194
|
+
"profile_step",
|
|
195
|
+
"inspect_step",
|
|
196
|
+
# Performance
|
|
197
|
+
"LazyValue",
|
|
198
|
+
"lazy_property",
|
|
199
|
+
"ParallelExecutor",
|
|
200
|
+
"IncrementalComputation",
|
|
201
|
+
"GPUResourceManager",
|
|
202
|
+
"optimize_dataframe",
|
|
203
|
+
"batch_iterator",
|
|
204
|
+
# Registry
|
|
205
|
+
"pipeline_registry",
|
|
206
|
+
"register_pipeline",
|
|
207
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Asset-centric design for ML pipelines."""
|
|
2
|
+
|
|
3
|
+
from flowyml.assets.base import Asset, AssetMetadata
|
|
4
|
+
from flowyml.assets.dataset import Dataset
|
|
5
|
+
from flowyml.assets.model import Model
|
|
6
|
+
from flowyml.assets.metrics import Metrics
|
|
7
|
+
from flowyml.assets.artifact import Artifact
|
|
8
|
+
from flowyml.assets.featureset import FeatureSet
|
|
9
|
+
from flowyml.assets.report import Report
|
|
10
|
+
from flowyml.assets.registry import AssetRegistry
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Asset",
|
|
14
|
+
"AssetMetadata",
|
|
15
|
+
"Dataset",
|
|
16
|
+
"Model",
|
|
17
|
+
"Metrics",
|
|
18
|
+
"Artifact",
|
|
19
|
+
"FeatureSet",
|
|
20
|
+
"Report",
|
|
21
|
+
"AssetRegistry",
|
|
22
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Artifact Asset - Represents generic artifacts (configs, checkpoints, etc)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from flowyml.assets.base import Asset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Artifact(Asset):
|
|
8
|
+
"""Generic artifact asset for configs, checkpoints, reports, etc.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> config = Artifact(name="training_config", artifact_type="config", data={"lr": 0.001, "epochs": 10})
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
name: str,
|
|
17
|
+
artifact_type: str = "generic",
|
|
18
|
+
version: str | None = None,
|
|
19
|
+
data: Any = None,
|
|
20
|
+
file_path: str | None = None,
|
|
21
|
+
parent: Asset | None = None,
|
|
22
|
+
tags: dict[str, str] | None = None,
|
|
23
|
+
properties: dict[str, Any] | None = None,
|
|
24
|
+
):
|
|
25
|
+
super().__init__(
|
|
26
|
+
name=name,
|
|
27
|
+
version=version,
|
|
28
|
+
data=data,
|
|
29
|
+
parent=parent,
|
|
30
|
+
tags=tags,
|
|
31
|
+
properties=properties,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
self.artifact_type = artifact_type
|
|
35
|
+
self.file_path = file_path
|
|
36
|
+
|
|
37
|
+
# Add artifact-specific properties
|
|
38
|
+
self.metadata.properties["artifact_type"] = artifact_type
|
|
39
|
+
if file_path:
|
|
40
|
+
self.metadata.properties["file_path"] = file_path
|
flowyml/assets/base.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Base Asset - Foundation for all ML assets in flowyml."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from uuid import uuid4
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class AssetMetadata:
|
|
13
|
+
"""Metadata for an asset."""
|
|
14
|
+
|
|
15
|
+
asset_id: str
|
|
16
|
+
name: str
|
|
17
|
+
version: str
|
|
18
|
+
asset_type: str
|
|
19
|
+
created_at: datetime
|
|
20
|
+
created_by: str
|
|
21
|
+
parent_ids: list[str] = field(default_factory=list)
|
|
22
|
+
tags: dict[str, str] = field(default_factory=dict)
|
|
23
|
+
properties: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> dict[str, Any]:
|
|
26
|
+
"""Convert to dictionary."""
|
|
27
|
+
return {
|
|
28
|
+
"asset_id": self.asset_id,
|
|
29
|
+
"name": self.name,
|
|
30
|
+
"version": self.version,
|
|
31
|
+
"asset_type": self.asset_type,
|
|
32
|
+
"created_at": self.created_at.isoformat(),
|
|
33
|
+
"created_by": self.created_by,
|
|
34
|
+
"parent_ids": self.parent_ids,
|
|
35
|
+
"tags": self.tags,
|
|
36
|
+
"properties": self.properties,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Asset:
|
|
41
|
+
"""Base class for all ML assets (datasets, models, features, etc).
|
|
42
|
+
|
|
43
|
+
Assets are first-class objects in flowyml pipelines with full lineage tracking.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
name: str,
|
|
49
|
+
version: str | None = None,
|
|
50
|
+
data: Any = None,
|
|
51
|
+
parent: Optional["Asset"] = None,
|
|
52
|
+
tags: dict[str, str] | None = None,
|
|
53
|
+
properties: dict[str, Any] | None = None,
|
|
54
|
+
):
|
|
55
|
+
self.name = name
|
|
56
|
+
self.version = version or "v1.0.0"
|
|
57
|
+
self.data = data
|
|
58
|
+
self.asset_id = str(uuid4())
|
|
59
|
+
|
|
60
|
+
# Metadata
|
|
61
|
+
self.metadata = AssetMetadata(
|
|
62
|
+
asset_id=self.asset_id,
|
|
63
|
+
name=name,
|
|
64
|
+
version=self.version,
|
|
65
|
+
asset_type=self.__class__.__name__,
|
|
66
|
+
created_at=datetime.now(),
|
|
67
|
+
created_by="flowyml",
|
|
68
|
+
parent_ids=[parent.asset_id] if parent else [],
|
|
69
|
+
tags=tags or {},
|
|
70
|
+
properties=properties or {},
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Lineage tracking
|
|
74
|
+
self.parents: list[Asset] = [parent] if parent else []
|
|
75
|
+
self.children: list[Asset] = []
|
|
76
|
+
|
|
77
|
+
if parent:
|
|
78
|
+
parent.children.append(self)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def create(
|
|
82
|
+
cls,
|
|
83
|
+
data: Any,
|
|
84
|
+
name: str | None = None,
|
|
85
|
+
version: str | None = None,
|
|
86
|
+
parent: Optional["Asset"] = None,
|
|
87
|
+
**kwargs: Any,
|
|
88
|
+
) -> "Asset":
|
|
89
|
+
"""Factory method to create an asset.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
data: The actual data/object
|
|
93
|
+
name: Asset name
|
|
94
|
+
version: Asset version
|
|
95
|
+
parent: Parent asset for lineage
|
|
96
|
+
**kwargs: Additional metadata
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
New asset instance
|
|
100
|
+
"""
|
|
101
|
+
asset_name = name or f"{cls.__name__}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
102
|
+
|
|
103
|
+
# Extract tags and properties if passed explicitly
|
|
104
|
+
tags = kwargs.pop("tags", {})
|
|
105
|
+
props = kwargs.pop("properties", {})
|
|
106
|
+
# Merge remaining kwargs into properties
|
|
107
|
+
props.update(kwargs)
|
|
108
|
+
|
|
109
|
+
return cls(
|
|
110
|
+
name=asset_name,
|
|
111
|
+
version=version,
|
|
112
|
+
data=data,
|
|
113
|
+
parent=parent,
|
|
114
|
+
tags=tags,
|
|
115
|
+
properties=props,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def get_hash(self) -> str:
|
|
119
|
+
"""Generate hash of asset for caching/versioning."""
|
|
120
|
+
content = json.dumps(
|
|
121
|
+
{
|
|
122
|
+
"name": self.name,
|
|
123
|
+
"version": self.version,
|
|
124
|
+
"type": self.metadata.asset_type,
|
|
125
|
+
"created_at": self.metadata.created_at.isoformat(),
|
|
126
|
+
},
|
|
127
|
+
sort_keys=True,
|
|
128
|
+
)
|
|
129
|
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
130
|
+
|
|
131
|
+
def get_lineage(self, depth: int = -1) -> dict[str, Any]:
|
|
132
|
+
"""Get asset lineage.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
depth: How many levels to traverse (-1 for all)
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Lineage tree as nested dict
|
|
139
|
+
"""
|
|
140
|
+
lineage = {
|
|
141
|
+
"asset": {
|
|
142
|
+
"asset_id": self.asset_id,
|
|
143
|
+
"name": self.name,
|
|
144
|
+
"type": self.metadata.asset_type,
|
|
145
|
+
"version": self.version,
|
|
146
|
+
},
|
|
147
|
+
"parents": [],
|
|
148
|
+
"children": [],
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if depth != 0:
|
|
152
|
+
next_depth = depth - 1 if depth > 0 else -1
|
|
153
|
+
lineage["parents"] = [p.get_lineage(next_depth) for p in self.parents]
|
|
154
|
+
lineage["children"] = [c.get_lineage(next_depth) for c in self.children]
|
|
155
|
+
|
|
156
|
+
return lineage
|
|
157
|
+
|
|
158
|
+
def get_all_ancestors(self) -> set["Asset"]:
|
|
159
|
+
"""Get all ancestor assets."""
|
|
160
|
+
ancestors = set()
|
|
161
|
+
|
|
162
|
+
def traverse(asset) -> None:
|
|
163
|
+
for parent in asset.parents:
|
|
164
|
+
if parent not in ancestors:
|
|
165
|
+
ancestors.add(parent)
|
|
166
|
+
traverse(parent)
|
|
167
|
+
|
|
168
|
+
traverse(self)
|
|
169
|
+
return ancestors
|
|
170
|
+
|
|
171
|
+
def get_all_descendants(self) -> set["Asset"]:
|
|
172
|
+
"""Get all descendant assets."""
|
|
173
|
+
descendants = set()
|
|
174
|
+
|
|
175
|
+
def traverse(asset) -> None:
|
|
176
|
+
for child in asset.children:
|
|
177
|
+
if child not in descendants:
|
|
178
|
+
descendants.add(child)
|
|
179
|
+
traverse(child)
|
|
180
|
+
|
|
181
|
+
traverse(self)
|
|
182
|
+
return descendants
|
|
183
|
+
|
|
184
|
+
def add_tag(self, key: str, value: str) -> None:
|
|
185
|
+
"""Add a tag to the asset."""
|
|
186
|
+
self.metadata.tags[key] = value
|
|
187
|
+
|
|
188
|
+
def add_property(self, key: str, value: Any) -> None:
|
|
189
|
+
"""Add a property to the asset."""
|
|
190
|
+
self.metadata.properties[key] = value
|
|
191
|
+
|
|
192
|
+
def to_dict(self) -> dict[str, Any]:
|
|
193
|
+
"""Convert asset to dictionary."""
|
|
194
|
+
return {
|
|
195
|
+
"metadata": self.metadata.to_dict(),
|
|
196
|
+
"lineage": {
|
|
197
|
+
"parents": [p.asset_id for p in self.parents],
|
|
198
|
+
"children": [c.asset_id for c in self.children],
|
|
199
|
+
},
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def __repr__(self) -> str:
|
|
203
|
+
return f"{self.__class__.__name__}(name='{self.name}', version='{self.version}')"
|
|
204
|
+
|
|
205
|
+
def __hash__(self):
|
|
206
|
+
return hash(self.asset_id)
|
|
207
|
+
|
|
208
|
+
def __eq__(self, other):
|
|
209
|
+
return isinstance(other, Asset) and self.asset_id == other.asset_id
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Dataset Asset - Represents ML datasets with schema validation."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from flowyml.assets.base import Asset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Dataset(Asset):
|
|
8
|
+
"""Dataset asset with schema and lineage tracking.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> raw_data = Dataset(
|
|
12
|
+
... name="imagenet_train",
|
|
13
|
+
... version="v2.0",
|
|
14
|
+
... data=train_dataset,
|
|
15
|
+
... properties={"size": "150GB", "samples": 1_281_167},
|
|
16
|
+
... )
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
name: str,
|
|
22
|
+
version: str | None = None,
|
|
23
|
+
data: Any = None,
|
|
24
|
+
schema: Any | None = None,
|
|
25
|
+
location: str | None = None,
|
|
26
|
+
parent: Asset | None = None,
|
|
27
|
+
tags: dict[str, str] | None = None,
|
|
28
|
+
properties: dict[str, Any] | None = None,
|
|
29
|
+
):
|
|
30
|
+
super().__init__(
|
|
31
|
+
name=name,
|
|
32
|
+
version=version,
|
|
33
|
+
data=data,
|
|
34
|
+
parent=parent,
|
|
35
|
+
tags=tags,
|
|
36
|
+
properties=properties,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
self.schema = schema
|
|
40
|
+
self.location = location
|
|
41
|
+
|
|
42
|
+
# Add dataset-specific properties
|
|
43
|
+
if schema:
|
|
44
|
+
self.metadata.properties["schema"] = str(schema)
|
|
45
|
+
if location:
|
|
46
|
+
self.metadata.properties["location"] = location
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def size(self) -> int | None:
|
|
50
|
+
"""Get dataset size if available."""
|
|
51
|
+
return self.metadata.properties.get("size")
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def num_samples(self) -> int | None:
|
|
55
|
+
"""Get number of samples if available."""
|
|
56
|
+
return self.metadata.properties.get("samples") or self.metadata.properties.get("num_samples")
|
|
57
|
+
|
|
58
|
+
def validate_schema(self) -> bool:
|
|
59
|
+
"""Validate data against schema (placeholder)."""
|
|
60
|
+
if self.schema is None or self.data is None:
|
|
61
|
+
return True
|
|
62
|
+
# Schema validation would go here
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
def split(self, train_ratio: float = 0.8, name_prefix: str | None = None) -> tuple["Dataset", "Dataset"]:
|
|
66
|
+
"""Split dataset into train/test.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
train_ratio: Ratio for training split
|
|
70
|
+
name_prefix: Prefix for split dataset names
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (train_dataset, test_dataset)
|
|
74
|
+
"""
|
|
75
|
+
prefix = name_prefix or self.name
|
|
76
|
+
|
|
77
|
+
# Placeholder - actual splitting logic would depend on data type
|
|
78
|
+
_ = train_ratio # Unused in placeholder
|
|
79
|
+
train_data = self.data # Would actually split the data
|
|
80
|
+
test_data = self.data
|
|
81
|
+
|
|
82
|
+
train_dataset = Dataset(
|
|
83
|
+
name=f"{prefix}_train",
|
|
84
|
+
version=self.version,
|
|
85
|
+
data=train_data,
|
|
86
|
+
schema=self.schema,
|
|
87
|
+
parent=self,
|
|
88
|
+
tags={**self.metadata.tags, "split": "train"},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
test_dataset = Dataset(
|
|
92
|
+
name=f"{prefix}_test",
|
|
93
|
+
version=self.version,
|
|
94
|
+
data=test_data,
|
|
95
|
+
schema=self.schema,
|
|
96
|
+
parent=self,
|
|
97
|
+
tags={**self.metadata.tags, "split": "test"},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return train_dataset, test_dataset
|