flowyml 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +3 -0
- flowyml/assets/base.py +10 -0
- flowyml/assets/metrics.py +6 -0
- flowyml/cli/main.py +108 -2
- flowyml/cli/run.py +9 -2
- flowyml/core/execution_status.py +52 -0
- flowyml/core/hooks.py +106 -0
- flowyml/core/observability.py +210 -0
- flowyml/core/orchestrator.py +274 -0
- flowyml/core/pipeline.py +193 -231
- flowyml/core/project.py +34 -2
- flowyml/core/remote_orchestrator.py +109 -0
- flowyml/core/resources.py +22 -5
- flowyml/core/retry_policy.py +80 -0
- flowyml/core/step.py +18 -1
- flowyml/core/submission_result.py +53 -0
- flowyml/integrations/keras.py +95 -22
- flowyml/monitoring/alerts.py +2 -2
- flowyml/stacks/__init__.py +15 -0
- flowyml/stacks/aws.py +599 -0
- flowyml/stacks/azure.py +295 -0
- flowyml/stacks/components.py +24 -2
- flowyml/stacks/gcp.py +158 -11
- flowyml/stacks/local.py +5 -0
- flowyml/storage/artifacts.py +15 -5
- flowyml/storage/materializers/__init__.py +2 -0
- flowyml/storage/materializers/cloudpickle.py +74 -0
- flowyml/storage/metadata.py +166 -5
- flowyml/ui/backend/main.py +41 -1
- flowyml/ui/backend/routers/assets.py +356 -15
- flowyml/ui/backend/routers/client.py +46 -0
- flowyml/ui/backend/routers/execution.py +13 -2
- flowyml/ui/backend/routers/experiments.py +48 -12
- flowyml/ui/backend/routers/metrics.py +213 -0
- flowyml/ui/backend/routers/pipelines.py +63 -7
- flowyml/ui/backend/routers/projects.py +33 -7
- flowyml/ui/backend/routers/runs.py +150 -8
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/src/App.jsx +4 -1
- flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
- flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
- flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
- flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
- flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
- flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
- flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
- flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
- flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
- flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
- flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
- flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
- flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- flowyml/ui/frontend/src/utils/date.js +10 -0
- flowyml/ui/frontend/src/utils/downloads.js +11 -0
- flowyml/utils/config.py +6 -0
- flowyml/utils/stack_config.py +45 -3
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/METADATA +42 -4
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/RECORD +89 -52
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/licenses/LICENSE +1 -1
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/WHEEL +0 -0
- {flowyml-1.2.0.dist-info → flowyml-1.3.0.dist-info}/entry_points.txt +0 -0
flowyml/__init__.py
CHANGED
|
@@ -38,6 +38,7 @@ from flowyml.assets.registry import AssetRegistry
|
|
|
38
38
|
# Stack imports
|
|
39
39
|
from flowyml.stacks.base import Stack
|
|
40
40
|
from flowyml.stacks.local import LocalStack
|
|
41
|
+
from flowyml.stacks.components import ResourceConfig, DockerConfig
|
|
41
42
|
|
|
42
43
|
# Tracking imports
|
|
43
44
|
from flowyml.tracking.experiment import Experiment
|
|
@@ -140,6 +141,8 @@ __all__ = [
|
|
|
140
141
|
# Stacks
|
|
141
142
|
"Stack",
|
|
142
143
|
"LocalStack",
|
|
144
|
+
"ResourceConfig",
|
|
145
|
+
"DockerConfig",
|
|
143
146
|
# Tracking
|
|
144
147
|
"Experiment",
|
|
145
148
|
"Run",
|
flowyml/assets/base.py
CHANGED
|
@@ -77,6 +77,16 @@ class Asset:
|
|
|
77
77
|
if parent:
|
|
78
78
|
parent.children.append(self)
|
|
79
79
|
|
|
80
|
+
@property
|
|
81
|
+
def properties(self) -> dict[str, Any]:
|
|
82
|
+
"""Expose mutable properties stored in metadata."""
|
|
83
|
+
return self.metadata.properties
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def tags(self) -> dict[str, str]:
|
|
87
|
+
"""Expose mutable tags stored in metadata."""
|
|
88
|
+
return self.metadata.tags
|
|
89
|
+
|
|
80
90
|
@classmethod
|
|
81
91
|
def create(
|
|
82
92
|
cls,
|
flowyml/assets/metrics.py
CHANGED
|
@@ -89,7 +89,10 @@ class Metrics(Asset):
|
|
|
89
89
|
def create(
|
|
90
90
|
cls,
|
|
91
91
|
name: str | None = None,
|
|
92
|
+
version: str | None = None,
|
|
92
93
|
parent: Asset | None = None,
|
|
94
|
+
tags: dict[str, str] | None = None,
|
|
95
|
+
properties: dict[str, Any] | None = None,
|
|
93
96
|
**metrics,
|
|
94
97
|
) -> "Metrics":
|
|
95
98
|
"""Factory method to create metrics.
|
|
@@ -99,6 +102,9 @@ class Metrics(Asset):
|
|
|
99
102
|
"""
|
|
100
103
|
return cls(
|
|
101
104
|
name=name or "metrics",
|
|
105
|
+
version=version,
|
|
102
106
|
data=metrics,
|
|
103
107
|
parent=parent,
|
|
108
|
+
tags=tags,
|
|
109
|
+
properties=properties,
|
|
104
110
|
)
|
flowyml/cli/main.py
CHANGED
|
@@ -48,9 +48,11 @@ def init(name: str, template: str, directory: str) -> None:
|
|
|
48
48
|
@click.option("--stack", default="local", help="Stack to use for execution")
|
|
49
49
|
@click.option("--context", "-c", multiple=True, help="Context parameters (key=value)")
|
|
50
50
|
@click.option("--debug", is_flag=True, help="Enable debug mode")
|
|
51
|
-
|
|
51
|
+
@click.option("--retry", type=int, help="Number of retries for the pipeline")
|
|
52
|
+
def run(pipeline_name: str, stack: str, context: tuple, debug: bool, retry: int | None) -> None:
|
|
52
53
|
"""Run a pipeline."""
|
|
53
54
|
from flowyml.cli.run import run_pipeline
|
|
55
|
+
from flowyml.core.retry_policy import OrchestratorRetryPolicy
|
|
54
56
|
|
|
55
57
|
# Parse context parameters
|
|
56
58
|
ctx_params = {}
|
|
@@ -60,8 +62,13 @@ def run(pipeline_name: str, stack: str, context: tuple, debug: bool) -> None:
|
|
|
60
62
|
|
|
61
63
|
click.echo(f"Running pipeline '{pipeline_name}' on stack '{stack}'...")
|
|
62
64
|
|
|
65
|
+
kwargs = {}
|
|
66
|
+
if retry:
|
|
67
|
+
kwargs["retry_policy"] = OrchestratorRetryPolicy(max_attempts=retry)
|
|
68
|
+
click.echo(f" Retry policy enabled: max_attempts={retry}")
|
|
69
|
+
|
|
63
70
|
try:
|
|
64
|
-
result = run_pipeline(pipeline_name, stack, ctx_params, debug)
|
|
71
|
+
result = run_pipeline(pipeline_name, stack, ctx_params, debug, **kwargs)
|
|
65
72
|
click.echo("✓ Pipeline completed successfully")
|
|
66
73
|
click.echo(f" Run ID: {result.get('run_id', 'N/A')}")
|
|
67
74
|
click.echo(f" Duration: {result.get('duration', 'N/A')}")
|
|
@@ -70,6 +77,105 @@ def run(pipeline_name: str, stack: str, context: tuple, debug: bool) -> None:
|
|
|
70
77
|
raise click.Abort()
|
|
71
78
|
|
|
72
79
|
|
|
80
|
+
@cli.group()
|
|
81
|
+
def schedule() -> None:
|
|
82
|
+
"""Schedule management commands."""
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@schedule.command("create")
|
|
87
|
+
@click.argument("pipeline_name")
|
|
88
|
+
@click.argument("schedule_type", type=click.Choice(["cron", "interval", "daily", "hourly"]))
|
|
89
|
+
@click.argument("value")
|
|
90
|
+
@click.option("--stack", default="local", help="Stack to use for execution")
|
|
91
|
+
def create_schedule(pipeline_name: str, schedule_type: str, value: str, stack: str) -> None:
|
|
92
|
+
"""Create a new schedule for a pipeline.
|
|
93
|
+
|
|
94
|
+
VALUE format depends on SCHEDULE_TYPE:
|
|
95
|
+
- cron: "*/5 * * * *"
|
|
96
|
+
- interval: seconds (e.g. 60)
|
|
97
|
+
- daily: "HH:MM" (e.g. 14:30)
|
|
98
|
+
- hourly: minute (e.g. 30)
|
|
99
|
+
"""
|
|
100
|
+
from flowyml.core.scheduler import PipelineScheduler
|
|
101
|
+
from flowyml.cli.run import run_pipeline
|
|
102
|
+
|
|
103
|
+
# We need a callable for the scheduler.
|
|
104
|
+
# Since CLI is stateless, we wrap the run_pipeline command.
|
|
105
|
+
# Note: In a real distributed system, this would submit to a scheduler service.
|
|
106
|
+
# Here we are just registering it in the local scheduler DB.
|
|
107
|
+
|
|
108
|
+
# For now, we'll just use the scheduler API to register the definition
|
|
109
|
+
scheduler = PipelineScheduler()
|
|
110
|
+
|
|
111
|
+
# Define a wrapper that runs the pipeline via CLI logic
|
|
112
|
+
def job_func():
|
|
113
|
+
run_pipeline(pipeline_name, stack, {}, False)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
if schedule_type == "cron":
|
|
117
|
+
scheduler.schedule_cron(pipeline_name, job_func, value)
|
|
118
|
+
elif schedule_type == "interval":
|
|
119
|
+
scheduler.schedule_interval(pipeline_name, job_func, seconds=int(value))
|
|
120
|
+
elif schedule_type == "daily":
|
|
121
|
+
if ":" in value:
|
|
122
|
+
h, m = map(int, value.split(":"))
|
|
123
|
+
scheduler.schedule_daily(pipeline_name, job_func, hour=h, minute=m)
|
|
124
|
+
else:
|
|
125
|
+
raise ValueError("Daily value must be HH:MM")
|
|
126
|
+
elif schedule_type == "hourly":
|
|
127
|
+
scheduler.schedule_hourly(pipeline_name, job_func, minute=int(value))
|
|
128
|
+
|
|
129
|
+
click.echo(f"✓ Schedule created for '{pipeline_name}' ({schedule_type}={value})")
|
|
130
|
+
click.echo(" Note: Ensure the scheduler service is running to execute this schedule.")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
click.echo(f"✗ Error creating schedule: {e}", err=True)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@schedule.command("list")
|
|
136
|
+
def list_schedules() -> None:
|
|
137
|
+
"""List all active schedules."""
|
|
138
|
+
from flowyml.core.scheduler import PipelineScheduler
|
|
139
|
+
|
|
140
|
+
scheduler = PipelineScheduler()
|
|
141
|
+
jobs = scheduler.get_jobs()
|
|
142
|
+
|
|
143
|
+
if not jobs:
|
|
144
|
+
click.echo("No active schedules found.")
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
click.echo(f"Found {len(jobs)} schedules:\n")
|
|
148
|
+
for job in jobs:
|
|
149
|
+
click.echo(f" {job.id} - {job.name}")
|
|
150
|
+
click.echo(f" Next run: {job.next_run_time}")
|
|
151
|
+
click.echo()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@schedule.command("start")
|
|
155
|
+
def start_scheduler() -> None:
|
|
156
|
+
"""Start the scheduler service (blocking)."""
|
|
157
|
+
from flowyml.core.scheduler import PipelineScheduler
|
|
158
|
+
import time
|
|
159
|
+
|
|
160
|
+
click.echo("🚀 Starting Scheduler Service...")
|
|
161
|
+
scheduler = PipelineScheduler()
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# In a real app, this would load definitions from DB and register them
|
|
165
|
+
# For now, it just runs the scheduler loop for existing in-memory jobs
|
|
166
|
+
# (which might be empty if we restarted).
|
|
167
|
+
# To make this persistent, we'd need to serialize job definitions to DB.
|
|
168
|
+
# The current Scheduler implementation supports SQLite persistence for job state,
|
|
169
|
+
# but we need to re-register jobs on startup.
|
|
170
|
+
|
|
171
|
+
click.echo(" Scheduler running. Press Ctrl+C to stop.")
|
|
172
|
+
while True:
|
|
173
|
+
scheduler.run_pending()
|
|
174
|
+
time.sleep(1)
|
|
175
|
+
except KeyboardInterrupt:
|
|
176
|
+
click.echo("\n🛑 Scheduler stopped.")
|
|
177
|
+
|
|
178
|
+
|
|
73
179
|
@cli.group()
|
|
74
180
|
def ui() -> None:
|
|
75
181
|
"""UI server commands."""
|
flowyml/cli/run.py
CHANGED
|
@@ -6,7 +6,13 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def run_pipeline(
|
|
9
|
+
def run_pipeline(
|
|
10
|
+
pipeline_name: str,
|
|
11
|
+
stack: str,
|
|
12
|
+
context_params: dict[str, Any],
|
|
13
|
+
debug: bool,
|
|
14
|
+
**kwargs,
|
|
15
|
+
) -> dict[str, Any]:
|
|
10
16
|
"""Run a pipeline by name.
|
|
11
17
|
|
|
12
18
|
Args:
|
|
@@ -14,6 +20,7 @@ def run_pipeline(pipeline_name: str, stack: str, context_params: dict[str, Any],
|
|
|
14
20
|
stack: Stack to use for execution
|
|
15
21
|
context_params: Context parameters to override
|
|
16
22
|
debug: Enable debug mode
|
|
23
|
+
**kwargs: Additional arguments passed to pipeline.run
|
|
17
24
|
|
|
18
25
|
Returns:
|
|
19
26
|
Dictionary with run results
|
|
@@ -65,7 +72,7 @@ def run_pipeline(pipeline_name: str, stack: str, context_params: dict[str, Any],
|
|
|
65
72
|
pipeline.set_stack(stack)
|
|
66
73
|
|
|
67
74
|
# Run pipeline
|
|
68
|
-
result = pipeline.run(debug=debug)
|
|
75
|
+
result = pipeline.run(debug=debug, **kwargs)
|
|
69
76
|
|
|
70
77
|
return {
|
|
71
78
|
"run_id": result.run_id,
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Execution status tracking for pipeline runs."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ExecutionStatus(str, Enum):
|
|
7
|
+
"""Status of a pipeline or step execution."""
|
|
8
|
+
|
|
9
|
+
# Pre-execution states
|
|
10
|
+
INITIALIZING = "initializing"
|
|
11
|
+
PROVISIONING = "provisioning"
|
|
12
|
+
|
|
13
|
+
# Active execution states
|
|
14
|
+
RUNNING = "running"
|
|
15
|
+
|
|
16
|
+
# Terminal success states
|
|
17
|
+
COMPLETED = "completed"
|
|
18
|
+
CACHED = "cached"
|
|
19
|
+
|
|
20
|
+
# Terminal failure states
|
|
21
|
+
FAILED = "failed"
|
|
22
|
+
STOPPED = "stopped"
|
|
23
|
+
CANCELLED = "cancelled"
|
|
24
|
+
|
|
25
|
+
# Intermediate states
|
|
26
|
+
STOPPING = "stopping"
|
|
27
|
+
CANCELLING = "cancelling"
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def is_finished(self) -> bool:
|
|
31
|
+
"""Check if execution is in a terminal state."""
|
|
32
|
+
return self in {
|
|
33
|
+
ExecutionStatus.COMPLETED,
|
|
34
|
+
ExecutionStatus.CACHED,
|
|
35
|
+
ExecutionStatus.FAILED,
|
|
36
|
+
ExecutionStatus.STOPPED,
|
|
37
|
+
ExecutionStatus.CANCELLED,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def is_successful(self) -> bool:
|
|
42
|
+
"""Check if execution completed successfully."""
|
|
43
|
+
return self in {ExecutionStatus.COMPLETED, ExecutionStatus.CACHED}
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def is_failed(self) -> bool:
|
|
47
|
+
"""Check if execution failed."""
|
|
48
|
+
return self in {
|
|
49
|
+
ExecutionStatus.FAILED,
|
|
50
|
+
ExecutionStatus.STOPPED,
|
|
51
|
+
ExecutionStatus.CANCELLED,
|
|
52
|
+
}
|
flowyml/core/hooks.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Lifecycle hooks for pipelines and steps."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, TYPE_CHECKING
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from flowyml.core.pipeline import Pipeline, PipelineResult
|
|
9
|
+
from flowyml.core.step import Step
|
|
10
|
+
from flowyml.core.executor import ExecutionResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class HookRegistry:
|
|
15
|
+
"""Registry for pipeline and step lifecycle hooks."""
|
|
16
|
+
|
|
17
|
+
# Pipeline-level hooks
|
|
18
|
+
on_pipeline_start: list[Callable[["Pipeline"], None]] = field(default_factory=list)
|
|
19
|
+
on_pipeline_end: list[Callable[["Pipeline", "PipelineResult"], None]] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
# Step-level hooks
|
|
22
|
+
on_step_start: list[Callable[["Step", dict[str, Any]], None]] = field(default_factory=list)
|
|
23
|
+
on_step_end: list[Callable[["Step", "ExecutionResult"], None]] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
def register_pipeline_start_hook(self, hook: Callable[["Pipeline"], None]) -> None:
|
|
26
|
+
"""Register a hook to run at pipeline start."""
|
|
27
|
+
self.on_pipeline_start.append(hook)
|
|
28
|
+
|
|
29
|
+
def register_pipeline_end_hook(self, hook: Callable[["Pipeline", "PipelineResult"], None]) -> None:
|
|
30
|
+
"""Register a hook to run at pipeline end."""
|
|
31
|
+
self.on_pipeline_end.append(hook)
|
|
32
|
+
|
|
33
|
+
def register_step_start_hook(self, hook: Callable[["Step", dict[str, Any]], None]) -> None:
|
|
34
|
+
"""Register a hook to run before step execution."""
|
|
35
|
+
self.on_step_start.append(hook)
|
|
36
|
+
|
|
37
|
+
def register_step_end_hook(self, hook: Callable[["Step", "ExecutionResult"], None]) -> None:
|
|
38
|
+
"""Register a hook to run after step execution."""
|
|
39
|
+
self.on_step_end.append(hook)
|
|
40
|
+
|
|
41
|
+
def run_pipeline_start_hooks(self, pipeline: "Pipeline") -> None:
|
|
42
|
+
"""Execute all pipeline start hooks."""
|
|
43
|
+
for hook in self.on_pipeline_start:
|
|
44
|
+
try:
|
|
45
|
+
hook(pipeline)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f"Warning: Pipeline start hook failed: {e}")
|
|
48
|
+
|
|
49
|
+
def run_pipeline_end_hooks(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
|
|
50
|
+
"""Execute all pipeline end hooks."""
|
|
51
|
+
for hook in self.on_pipeline_end:
|
|
52
|
+
try:
|
|
53
|
+
hook(pipeline, result)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Warning: Pipeline end hook failed: {e}")
|
|
56
|
+
|
|
57
|
+
def run_step_start_hooks(self, step: "Step", inputs: dict[str, Any]) -> None:
|
|
58
|
+
"""Execute all step start hooks."""
|
|
59
|
+
for hook in self.on_step_start:
|
|
60
|
+
try:
|
|
61
|
+
hook(step, inputs)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f"Warning: Step start hook failed: {e}")
|
|
64
|
+
|
|
65
|
+
def run_step_end_hooks(self, step: "Step", result: "ExecutionResult") -> None:
|
|
66
|
+
"""Execute all step end hooks."""
|
|
67
|
+
for hook in self.on_step_end:
|
|
68
|
+
try:
|
|
69
|
+
hook(step, result)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
print(f"Warning: Step end hook failed: {e}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Global hook registry
|
|
75
|
+
_global_hooks = HookRegistry()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_global_hooks() -> HookRegistry:
|
|
79
|
+
"""Get the global hook registry."""
|
|
80
|
+
return _global_hooks
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def on_pipeline_start(func: Callable[["Pipeline"], None]) -> Callable[["Pipeline"], None]:
|
|
84
|
+
"""Decorator to register a pipeline start hook."""
|
|
85
|
+
_global_hooks.register_pipeline_start_hook(func)
|
|
86
|
+
return func
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def on_pipeline_end(
|
|
90
|
+
func: Callable[["Pipeline", "PipelineResult"], None],
|
|
91
|
+
) -> Callable[["Pipeline", "PipelineResult"], None]:
|
|
92
|
+
"""Decorator to register a pipeline end hook."""
|
|
93
|
+
_global_hooks.register_pipeline_end_hook(func)
|
|
94
|
+
return func
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def on_step_start(func: Callable[["Step", dict[str, Any]], None]) -> Callable[["Step", dict[str, Any]], None]:
|
|
98
|
+
"""Decorator to register a step start hook."""
|
|
99
|
+
_global_hooks.register_step_start_hook(func)
|
|
100
|
+
return func
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def on_step_end(func: Callable[["Step", "ExecutionResult"], None]) -> Callable[["Step", "ExecutionResult"], None]:
|
|
104
|
+
"""Decorator to register a step end hook."""
|
|
105
|
+
_global_hooks.register_step_end_hook(func)
|
|
106
|
+
return func
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Observability hooks for monitoring and metrics collection."""
|
|
2
|
+
|
|
3
|
+
from typing import Protocol, Any, Optional, TYPE_CHECKING
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from flowyml.core.pipeline import Pipeline, PipelineResult
|
|
9
|
+
from flowyml.core.step import Step
|
|
10
|
+
from flowyml.core.executor import ExecutionResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class MetricEvent:
|
|
15
|
+
"""Base metric event."""
|
|
16
|
+
|
|
17
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
18
|
+
tags: dict[str, str] = field(default_factory=dict)
|
|
19
|
+
|
|
20
|
+
def to_dict(self) -> dict[str, Any]:
|
|
21
|
+
"""Convert to dictionary."""
|
|
22
|
+
return {
|
|
23
|
+
"timestamp": self.timestamp.isoformat(),
|
|
24
|
+
"tags": self.tags,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class PipelineMetricEvent(MetricEvent):
|
|
30
|
+
"""Pipeline-level metric event."""
|
|
31
|
+
|
|
32
|
+
pipeline_name: str = ""
|
|
33
|
+
run_id: str = ""
|
|
34
|
+
duration_seconds: Optional[float] = None
|
|
35
|
+
success: Optional[bool] = None
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict[str, Any]:
|
|
38
|
+
base = super().to_dict()
|
|
39
|
+
base.update(
|
|
40
|
+
{
|
|
41
|
+
"pipeline_name": self.pipeline_name,
|
|
42
|
+
"run_id": self.run_id,
|
|
43
|
+
"duration_seconds": self.duration_seconds,
|
|
44
|
+
"success": self.success,
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
return base
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class StepMetricEvent(MetricEvent):
|
|
52
|
+
"""Step-level metric event."""
|
|
53
|
+
|
|
54
|
+
step_name: str = ""
|
|
55
|
+
pipeline_name: str = ""
|
|
56
|
+
run_id: str = ""
|
|
57
|
+
duration_seconds: Optional[float] = None
|
|
58
|
+
success: Optional[bool] = None
|
|
59
|
+
cached: bool = False
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> dict[str, Any]:
|
|
62
|
+
base = super().to_dict()
|
|
63
|
+
base.update(
|
|
64
|
+
{
|
|
65
|
+
"step_name": self.step_name,
|
|
66
|
+
"pipeline_name": self.pipeline_name,
|
|
67
|
+
"run_id": self.run_id,
|
|
68
|
+
"duration_seconds": self.duration_seconds,
|
|
69
|
+
"success": self.success,
|
|
70
|
+
"cached": self.cached,
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
return base
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MetricsCollector(Protocol):
|
|
77
|
+
"""Protocol for metrics collectors."""
|
|
78
|
+
|
|
79
|
+
def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
|
|
80
|
+
"""Record pipeline start."""
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
|
|
84
|
+
"""Record pipeline completion."""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
|
|
88
|
+
"""Record step start."""
|
|
89
|
+
...
|
|
90
|
+
|
|
91
|
+
def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
|
|
92
|
+
"""Record step completion."""
|
|
93
|
+
...
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ConsoleMetricsCollector:
|
|
97
|
+
"""Simple console metrics collector for debugging."""
|
|
98
|
+
|
|
99
|
+
def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
|
|
100
|
+
event = PipelineMetricEvent(
|
|
101
|
+
pipeline_name=pipeline.name,
|
|
102
|
+
run_id=run_id,
|
|
103
|
+
)
|
|
104
|
+
print(f"📊 Pipeline Started: {event.to_dict()}")
|
|
105
|
+
|
|
106
|
+
def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
|
|
107
|
+
event = PipelineMetricEvent(
|
|
108
|
+
pipeline_name=pipeline.name,
|
|
109
|
+
run_id=result.run_id,
|
|
110
|
+
duration_seconds=result.duration_seconds,
|
|
111
|
+
success=result.success,
|
|
112
|
+
)
|
|
113
|
+
print(f"📊 Pipeline Ended: {event.to_dict()}")
|
|
114
|
+
|
|
115
|
+
def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
|
|
116
|
+
event = StepMetricEvent(
|
|
117
|
+
step_name=step.name,
|
|
118
|
+
pipeline_name=pipeline_name,
|
|
119
|
+
run_id=run_id,
|
|
120
|
+
)
|
|
121
|
+
print(f"📊 Step Started: {event.to_dict()}")
|
|
122
|
+
|
|
123
|
+
def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
|
|
124
|
+
event = StepMetricEvent(
|
|
125
|
+
step_name=step.name,
|
|
126
|
+
pipeline_name=pipeline_name,
|
|
127
|
+
run_id=run_id,
|
|
128
|
+
duration_seconds=getattr(result, "duration_seconds", None),
|
|
129
|
+
success=result.success,
|
|
130
|
+
cached=getattr(result, "cached", False),
|
|
131
|
+
)
|
|
132
|
+
print(f"📊 Step Ended: {event.to_dict()}")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class PrometheusMetricsCollector:
|
|
136
|
+
"""Prometheus metrics collector (requires prometheus_client)."""
|
|
137
|
+
|
|
138
|
+
def __init__(self):
|
|
139
|
+
try:
|
|
140
|
+
from prometheus_client import Counter, Histogram
|
|
141
|
+
|
|
142
|
+
self.pipeline_starts = Counter(
|
|
143
|
+
"flowyml_pipeline_starts_total",
|
|
144
|
+
"Total pipeline starts",
|
|
145
|
+
["pipeline_name"],
|
|
146
|
+
)
|
|
147
|
+
self.pipeline_completions = Counter(
|
|
148
|
+
"flowyml_pipeline_completions_total",
|
|
149
|
+
"Total pipeline completions",
|
|
150
|
+
["pipeline_name", "status"],
|
|
151
|
+
)
|
|
152
|
+
self.pipeline_duration = Histogram(
|
|
153
|
+
"flowyml_pipeline_duration_seconds",
|
|
154
|
+
"Pipeline duration in seconds",
|
|
155
|
+
["pipeline_name"],
|
|
156
|
+
)
|
|
157
|
+
self.step_duration = Histogram(
|
|
158
|
+
"flowyml_step_duration_seconds",
|
|
159
|
+
"Step duration in seconds",
|
|
160
|
+
["pipeline_name", "step_name"],
|
|
161
|
+
)
|
|
162
|
+
self.step_cache_hits = Counter(
|
|
163
|
+
"flowyml_step_cache_hits_total",
|
|
164
|
+
"Total step cache hits",
|
|
165
|
+
["pipeline_name", "step_name"],
|
|
166
|
+
)
|
|
167
|
+
except ImportError:
|
|
168
|
+
raise ImportError("prometheus_client required for PrometheusMetricsCollector")
|
|
169
|
+
|
|
170
|
+
def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
|
|
171
|
+
self.pipeline_starts.labels(pipeline_name=pipeline.name).inc()
|
|
172
|
+
|
|
173
|
+
def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
|
|
174
|
+
status = "success" if result.success else "failure"
|
|
175
|
+
self.pipeline_completions.labels(pipeline_name=pipeline.name, status=status).inc()
|
|
176
|
+
|
|
177
|
+
if result.duration_seconds:
|
|
178
|
+
self.pipeline_duration.labels(pipeline_name=pipeline.name).observe(result.duration_seconds)
|
|
179
|
+
|
|
180
|
+
def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
|
|
181
|
+
pass # No-op for Prometheus (only track completion)
|
|
182
|
+
|
|
183
|
+
def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
|
|
184
|
+
duration = getattr(result, "duration_seconds", None)
|
|
185
|
+
if duration:
|
|
186
|
+
self.step_duration.labels(
|
|
187
|
+
pipeline_name=pipeline_name,
|
|
188
|
+
step_name=step.name,
|
|
189
|
+
).observe(duration)
|
|
190
|
+
|
|
191
|
+
if getattr(result, "cached", False):
|
|
192
|
+
self.step_cache_hits.labels(
|
|
193
|
+
pipeline_name=pipeline_name,
|
|
194
|
+
step_name=step.name,
|
|
195
|
+
).inc()
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Global metrics collector
|
|
199
|
+
_metrics_collector: Optional[MetricsCollector] = None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def set_metrics_collector(collector: MetricsCollector) -> None:
|
|
203
|
+
"""Set global metrics collector."""
|
|
204
|
+
global _metrics_collector
|
|
205
|
+
_metrics_collector = collector
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def get_metrics_collector() -> Optional[MetricsCollector]:
|
|
209
|
+
"""Get global metrics collector."""
|
|
210
|
+
return _metrics_collector
|