flowyml 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. flowyml/__init__.py +3 -0
  2. flowyml/assets/base.py +10 -0
  3. flowyml/assets/metrics.py +6 -0
  4. flowyml/cli/main.py +108 -2
  5. flowyml/cli/run.py +9 -2
  6. flowyml/core/execution_status.py +52 -0
  7. flowyml/core/hooks.py +106 -0
  8. flowyml/core/observability.py +210 -0
  9. flowyml/core/orchestrator.py +274 -0
  10. flowyml/core/pipeline.py +193 -231
  11. flowyml/core/project.py +34 -2
  12. flowyml/core/remote_orchestrator.py +109 -0
  13. flowyml/core/resources.py +34 -17
  14. flowyml/core/retry_policy.py +80 -0
  15. flowyml/core/scheduler.py +9 -9
  16. flowyml/core/scheduler_config.py +2 -3
  17. flowyml/core/step.py +18 -1
  18. flowyml/core/submission_result.py +53 -0
  19. flowyml/integrations/keras.py +95 -22
  20. flowyml/monitoring/alerts.py +2 -2
  21. flowyml/stacks/__init__.py +15 -0
  22. flowyml/stacks/aws.py +599 -0
  23. flowyml/stacks/azure.py +295 -0
  24. flowyml/stacks/bridge.py +9 -9
  25. flowyml/stacks/components.py +24 -2
  26. flowyml/stacks/gcp.py +158 -11
  27. flowyml/stacks/local.py +5 -0
  28. flowyml/stacks/plugins.py +2 -2
  29. flowyml/stacks/registry.py +21 -0
  30. flowyml/storage/artifacts.py +15 -5
  31. flowyml/storage/materializers/__init__.py +2 -0
  32. flowyml/storage/materializers/base.py +33 -0
  33. flowyml/storage/materializers/cloudpickle.py +74 -0
  34. flowyml/storage/metadata.py +3 -881
  35. flowyml/storage/remote.py +590 -0
  36. flowyml/storage/sql.py +911 -0
  37. flowyml/ui/backend/dependencies.py +28 -0
  38. flowyml/ui/backend/main.py +43 -80
  39. flowyml/ui/backend/routers/assets.py +483 -17
  40. flowyml/ui/backend/routers/client.py +46 -0
  41. flowyml/ui/backend/routers/execution.py +13 -2
  42. flowyml/ui/backend/routers/experiments.py +97 -14
  43. flowyml/ui/backend/routers/metrics.py +168 -0
  44. flowyml/ui/backend/routers/pipelines.py +77 -12
  45. flowyml/ui/backend/routers/projects.py +33 -7
  46. flowyml/ui/backend/routers/runs.py +221 -12
  47. flowyml/ui/backend/routers/schedules.py +5 -21
  48. flowyml/ui/backend/routers/stats.py +14 -0
  49. flowyml/ui/backend/routers/traces.py +37 -53
  50. flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
  51. flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
  52. flowyml/ui/frontend/dist/index.html +2 -2
  53. flowyml/ui/frontend/src/App.jsx +4 -1
  54. flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
  55. flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
  56. flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
  57. flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
  58. flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
  59. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
  60. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
  61. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
  62. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
  63. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
  64. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
  65. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
  66. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
  67. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
  68. flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
  69. flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
  70. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
  71. flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
  72. flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
  73. flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
  74. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
  75. flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
  76. flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
  77. flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
  78. flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
  79. flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
  80. flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
  81. flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
  82. flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
  83. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
  84. flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
  85. flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
  86. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
  87. flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
  88. flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
  89. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
  90. flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
  91. flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
  92. flowyml/ui/frontend/src/router/index.jsx +4 -0
  93. flowyml/ui/frontend/src/utils/date.js +10 -0
  94. flowyml/ui/frontend/src/utils/downloads.js +11 -0
  95. flowyml/utils/config.py +6 -0
  96. flowyml/utils/stack_config.py +45 -3
  97. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/METADATA +44 -4
  98. flowyml-1.4.0.dist-info/RECORD +200 -0
  99. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/licenses/LICENSE +1 -1
  100. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
  101. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
  102. flowyml-1.2.0.dist-info/RECORD +0 -159
  103. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/WHEEL +0 -0
  104. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/entry_points.txt +0 -0
flowyml/__init__.py CHANGED
@@ -38,6 +38,7 @@ from flowyml.assets.registry import AssetRegistry
38
38
  # Stack imports
39
39
  from flowyml.stacks.base import Stack
40
40
  from flowyml.stacks.local import LocalStack
41
+ from flowyml.stacks.components import ResourceConfig, DockerConfig
41
42
 
42
43
  # Tracking imports
43
44
  from flowyml.tracking.experiment import Experiment
@@ -140,6 +141,8 @@ __all__ = [
140
141
  # Stacks
141
142
  "Stack",
142
143
  "LocalStack",
144
+ "ResourceConfig",
145
+ "DockerConfig",
143
146
  # Tracking
144
147
  "Experiment",
145
148
  "Run",
flowyml/assets/base.py CHANGED
@@ -77,6 +77,16 @@ class Asset:
77
77
  if parent:
78
78
  parent.children.append(self)
79
79
 
80
+ @property
81
+ def properties(self) -> dict[str, Any]:
82
+ """Expose mutable properties stored in metadata."""
83
+ return self.metadata.properties
84
+
85
+ @property
86
+ def tags(self) -> dict[str, str]:
87
+ """Expose mutable tags stored in metadata."""
88
+ return self.metadata.tags
89
+
80
90
  @classmethod
81
91
  def create(
82
92
  cls,
flowyml/assets/metrics.py CHANGED
@@ -89,7 +89,10 @@ class Metrics(Asset):
89
89
  def create(
90
90
  cls,
91
91
  name: str | None = None,
92
+ version: str | None = None,
92
93
  parent: Asset | None = None,
94
+ tags: dict[str, str] | None = None,
95
+ properties: dict[str, Any] | None = None,
93
96
  **metrics,
94
97
  ) -> "Metrics":
95
98
  """Factory method to create metrics.
@@ -99,6 +102,9 @@ class Metrics(Asset):
99
102
  """
100
103
  return cls(
101
104
  name=name or "metrics",
105
+ version=version,
102
106
  data=metrics,
103
107
  parent=parent,
108
+ tags=tags,
109
+ properties=properties,
104
110
  )
flowyml/cli/main.py CHANGED
@@ -48,9 +48,11 @@ def init(name: str, template: str, directory: str) -> None:
48
48
  @click.option("--stack", default="local", help="Stack to use for execution")
49
49
  @click.option("--context", "-c", multiple=True, help="Context parameters (key=value)")
50
50
  @click.option("--debug", is_flag=True, help="Enable debug mode")
51
- def run(pipeline_name: str, stack: str, context: tuple, debug: bool) -> None:
51
+ @click.option("--retry", type=int, help="Number of retries for the pipeline")
52
+ def run(pipeline_name: str, stack: str, context: tuple, debug: bool, retry: int | None) -> None:
52
53
  """Run a pipeline."""
53
54
  from flowyml.cli.run import run_pipeline
55
+ from flowyml.core.retry_policy import OrchestratorRetryPolicy
54
56
 
55
57
  # Parse context parameters
56
58
  ctx_params = {}
@@ -60,8 +62,13 @@ def run(pipeline_name: str, stack: str, context: tuple, debug: bool) -> None:
60
62
 
61
63
  click.echo(f"Running pipeline '{pipeline_name}' on stack '{stack}'...")
62
64
 
65
+ kwargs = {}
66
+ if retry:
67
+ kwargs["retry_policy"] = OrchestratorRetryPolicy(max_attempts=retry)
68
+ click.echo(f" Retry policy enabled: max_attempts={retry}")
69
+
63
70
  try:
64
- result = run_pipeline(pipeline_name, stack, ctx_params, debug)
71
+ result = run_pipeline(pipeline_name, stack, ctx_params, debug, **kwargs)
65
72
  click.echo("✓ Pipeline completed successfully")
66
73
  click.echo(f" Run ID: {result.get('run_id', 'N/A')}")
67
74
  click.echo(f" Duration: {result.get('duration', 'N/A')}")
@@ -70,6 +77,105 @@ def run(pipeline_name: str, stack: str, context: tuple, debug: bool) -> None:
70
77
  raise click.Abort()
71
78
 
72
79
 
80
+ @cli.group()
81
+ def schedule() -> None:
82
+ """Schedule management commands."""
83
+ pass
84
+
85
+
86
+ @schedule.command("create")
87
+ @click.argument("pipeline_name")
88
+ @click.argument("schedule_type", type=click.Choice(["cron", "interval", "daily", "hourly"]))
89
+ @click.argument("value")
90
+ @click.option("--stack", default="local", help="Stack to use for execution")
91
+ def create_schedule(pipeline_name: str, schedule_type: str, value: str, stack: str) -> None:
92
+ """Create a new schedule for a pipeline.
93
+
94
+ VALUE format depends on SCHEDULE_TYPE:
95
+ - cron: "*/5 * * * *"
96
+ - interval: seconds (e.g. 60)
97
+ - daily: "HH:MM" (e.g. 14:30)
98
+ - hourly: minute (e.g. 30)
99
+ """
100
+ from flowyml.core.scheduler import PipelineScheduler
101
+ from flowyml.cli.run import run_pipeline
102
+
103
+ # We need a callable for the scheduler.
104
+ # Since CLI is stateless, we wrap the run_pipeline command.
105
+ # Note: In a real distributed system, this would submit to a scheduler service.
106
+ # Here we are just registering it in the local scheduler DB.
107
+
108
+ # For now, we'll just use the scheduler API to register the definition
109
+ scheduler = PipelineScheduler()
110
+
111
+ # Define a wrapper that runs the pipeline via CLI logic
112
+ def job_func():
113
+ run_pipeline(pipeline_name, stack, {}, False)
114
+
115
+ try:
116
+ if schedule_type == "cron":
117
+ scheduler.schedule_cron(pipeline_name, job_func, value)
118
+ elif schedule_type == "interval":
119
+ scheduler.schedule_interval(pipeline_name, job_func, seconds=int(value))
120
+ elif schedule_type == "daily":
121
+ if ":" in value:
122
+ h, m = map(int, value.split(":"))
123
+ scheduler.schedule_daily(pipeline_name, job_func, hour=h, minute=m)
124
+ else:
125
+ raise ValueError("Daily value must be HH:MM")
126
+ elif schedule_type == "hourly":
127
+ scheduler.schedule_hourly(pipeline_name, job_func, minute=int(value))
128
+
129
+ click.echo(f"✓ Schedule created for '{pipeline_name}' ({schedule_type}={value})")
130
+ click.echo(" Note: Ensure the scheduler service is running to execute this schedule.")
131
+ except Exception as e:
132
+ click.echo(f"✗ Error creating schedule: {e}", err=True)
133
+
134
+
135
+ @schedule.command("list")
136
+ def list_schedules() -> None:
137
+ """List all active schedules."""
138
+ from flowyml.core.scheduler import PipelineScheduler
139
+
140
+ scheduler = PipelineScheduler()
141
+ jobs = scheduler.get_jobs()
142
+
143
+ if not jobs:
144
+ click.echo("No active schedules found.")
145
+ return
146
+
147
+ click.echo(f"Found {len(jobs)} schedules:\n")
148
+ for job in jobs:
149
+ click.echo(f" {job.id} - {job.name}")
150
+ click.echo(f" Next run: {job.next_run_time}")
151
+ click.echo()
152
+
153
+
154
+ @schedule.command("start")
155
+ def start_scheduler() -> None:
156
+ """Start the scheduler service (blocking)."""
157
+ from flowyml.core.scheduler import PipelineScheduler
158
+ import time
159
+
160
+ click.echo("🚀 Starting Scheduler Service...")
161
+ scheduler = PipelineScheduler()
162
+
163
+ try:
164
+ # In a real app, this would load definitions from DB and register them
165
+ # For now, it just runs the scheduler loop for existing in-memory jobs
166
+ # (which might be empty if we restarted).
167
+ # To make this persistent, we'd need to serialize job definitions to DB.
168
+ # The current Scheduler implementation supports SQLite persistence for job state,
169
+ # but we need to re-register jobs on startup.
170
+
171
+ click.echo(" Scheduler running. Press Ctrl+C to stop.")
172
+ while True:
173
+ scheduler.run_pending()
174
+ time.sleep(1)
175
+ except KeyboardInterrupt:
176
+ click.echo("\n🛑 Scheduler stopped.")
177
+
178
+
73
179
  @cli.group()
74
180
  def ui() -> None:
75
181
  """UI server commands."""
flowyml/cli/run.py CHANGED
@@ -6,7 +6,13 @@ from pathlib import Path
6
6
  from typing import Any
7
7
 
8
8
 
9
- def run_pipeline(pipeline_name: str, stack: str, context_params: dict[str, Any], debug: bool) -> dict[str, Any]:
9
+ def run_pipeline(
10
+ pipeline_name: str,
11
+ stack: str,
12
+ context_params: dict[str, Any],
13
+ debug: bool,
14
+ **kwargs,
15
+ ) -> dict[str, Any]:
10
16
  """Run a pipeline by name.
11
17
 
12
18
  Args:
@@ -14,6 +20,7 @@ def run_pipeline(pipeline_name: str, stack: str, context_params: dict[str, Any],
14
20
  stack: Stack to use for execution
15
21
  context_params: Context parameters to override
16
22
  debug: Enable debug mode
23
+ **kwargs: Additional arguments passed to pipeline.run
17
24
 
18
25
  Returns:
19
26
  Dictionary with run results
@@ -65,7 +72,7 @@ def run_pipeline(pipeline_name: str, stack: str, context_params: dict[str, Any],
65
72
  pipeline.set_stack(stack)
66
73
 
67
74
  # Run pipeline
68
- result = pipeline.run(debug=debug)
75
+ result = pipeline.run(debug=debug, **kwargs)
69
76
 
70
77
  return {
71
78
  "run_id": result.run_id,
@@ -0,0 +1,52 @@
1
+ """Execution status tracking for pipeline runs."""
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class ExecutionStatus(str, Enum):
7
+ """Status of a pipeline or step execution."""
8
+
9
+ # Pre-execution states
10
+ INITIALIZING = "initializing"
11
+ PROVISIONING = "provisioning"
12
+
13
+ # Active execution states
14
+ RUNNING = "running"
15
+
16
+ # Terminal success states
17
+ COMPLETED = "completed"
18
+ CACHED = "cached"
19
+
20
+ # Terminal failure states
21
+ FAILED = "failed"
22
+ STOPPED = "stopped"
23
+ CANCELLED = "cancelled"
24
+
25
+ # Intermediate states
26
+ STOPPING = "stopping"
27
+ CANCELLING = "cancelling"
28
+
29
+ @property
30
+ def is_finished(self) -> bool:
31
+ """Check if execution is in a terminal state."""
32
+ return self in {
33
+ ExecutionStatus.COMPLETED,
34
+ ExecutionStatus.CACHED,
35
+ ExecutionStatus.FAILED,
36
+ ExecutionStatus.STOPPED,
37
+ ExecutionStatus.CANCELLED,
38
+ }
39
+
40
+ @property
41
+ def is_successful(self) -> bool:
42
+ """Check if execution completed successfully."""
43
+ return self in {ExecutionStatus.COMPLETED, ExecutionStatus.CACHED}
44
+
45
+ @property
46
+ def is_failed(self) -> bool:
47
+ """Check if execution failed."""
48
+ return self in {
49
+ ExecutionStatus.FAILED,
50
+ ExecutionStatus.STOPPED,
51
+ ExecutionStatus.CANCELLED,
52
+ }
flowyml/core/hooks.py ADDED
@@ -0,0 +1,106 @@
1
+ """Lifecycle hooks for pipelines and steps."""
2
+
3
+ from typing import Any, TYPE_CHECKING
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass, field
6
+
7
+ if TYPE_CHECKING:
8
+ from flowyml.core.pipeline import Pipeline, PipelineResult
9
+ from flowyml.core.step import Step
10
+ from flowyml.core.executor import ExecutionResult
11
+
12
+
13
+ @dataclass
14
+ class HookRegistry:
15
+ """Registry for pipeline and step lifecycle hooks."""
16
+
17
+ # Pipeline-level hooks
18
+ on_pipeline_start: list[Callable[["Pipeline"], None]] = field(default_factory=list)
19
+ on_pipeline_end: list[Callable[["Pipeline", "PipelineResult"], None]] = field(default_factory=list)
20
+
21
+ # Step-level hooks
22
+ on_step_start: list[Callable[["Step", dict[str, Any]], None]] = field(default_factory=list)
23
+ on_step_end: list[Callable[["Step", "ExecutionResult"], None]] = field(default_factory=list)
24
+
25
+ def register_pipeline_start_hook(self, hook: Callable[["Pipeline"], None]) -> None:
26
+ """Register a hook to run at pipeline start."""
27
+ self.on_pipeline_start.append(hook)
28
+
29
+ def register_pipeline_end_hook(self, hook: Callable[["Pipeline", "PipelineResult"], None]) -> None:
30
+ """Register a hook to run at pipeline end."""
31
+ self.on_pipeline_end.append(hook)
32
+
33
+ def register_step_start_hook(self, hook: Callable[["Step", dict[str, Any]], None]) -> None:
34
+ """Register a hook to run before step execution."""
35
+ self.on_step_start.append(hook)
36
+
37
+ def register_step_end_hook(self, hook: Callable[["Step", "ExecutionResult"], None]) -> None:
38
+ """Register a hook to run after step execution."""
39
+ self.on_step_end.append(hook)
40
+
41
+ def run_pipeline_start_hooks(self, pipeline: "Pipeline") -> None:
42
+ """Execute all pipeline start hooks."""
43
+ for hook in self.on_pipeline_start:
44
+ try:
45
+ hook(pipeline)
46
+ except Exception as e:
47
+ print(f"Warning: Pipeline start hook failed: {e}")
48
+
49
+ def run_pipeline_end_hooks(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
50
+ """Execute all pipeline end hooks."""
51
+ for hook in self.on_pipeline_end:
52
+ try:
53
+ hook(pipeline, result)
54
+ except Exception as e:
55
+ print(f"Warning: Pipeline end hook failed: {e}")
56
+
57
+ def run_step_start_hooks(self, step: "Step", inputs: dict[str, Any]) -> None:
58
+ """Execute all step start hooks."""
59
+ for hook in self.on_step_start:
60
+ try:
61
+ hook(step, inputs)
62
+ except Exception as e:
63
+ print(f"Warning: Step start hook failed: {e}")
64
+
65
+ def run_step_end_hooks(self, step: "Step", result: "ExecutionResult") -> None:
66
+ """Execute all step end hooks."""
67
+ for hook in self.on_step_end:
68
+ try:
69
+ hook(step, result)
70
+ except Exception as e:
71
+ print(f"Warning: Step end hook failed: {e}")
72
+
73
+
74
+ # Global hook registry
75
+ _global_hooks = HookRegistry()
76
+
77
+
78
+ def get_global_hooks() -> HookRegistry:
79
+ """Get the global hook registry."""
80
+ return _global_hooks
81
+
82
+
83
+ def on_pipeline_start(func: Callable[["Pipeline"], None]) -> Callable[["Pipeline"], None]:
84
+ """Decorator to register a pipeline start hook."""
85
+ _global_hooks.register_pipeline_start_hook(func)
86
+ return func
87
+
88
+
89
+ def on_pipeline_end(
90
+ func: Callable[["Pipeline", "PipelineResult"], None],
91
+ ) -> Callable[["Pipeline", "PipelineResult"], None]:
92
+ """Decorator to register a pipeline end hook."""
93
+ _global_hooks.register_pipeline_end_hook(func)
94
+ return func
95
+
96
+
97
+ def on_step_start(func: Callable[["Step", dict[str, Any]], None]) -> Callable[["Step", dict[str, Any]], None]:
98
+ """Decorator to register a step start hook."""
99
+ _global_hooks.register_step_start_hook(func)
100
+ return func
101
+
102
+
103
+ def on_step_end(func: Callable[["Step", "ExecutionResult"], None]) -> Callable[["Step", "ExecutionResult"], None]:
104
+ """Decorator to register a step end hook."""
105
+ _global_hooks.register_step_end_hook(func)
106
+ return func
@@ -0,0 +1,210 @@
1
+ """Observability hooks for monitoring and metrics collection."""
2
+
3
+ from typing import Protocol, Any, TYPE_CHECKING
4
+ from datetime import datetime
5
+ from dataclasses import dataclass, field
6
+
7
+ if TYPE_CHECKING:
8
+ from flowyml.core.pipeline import Pipeline, PipelineResult
9
+ from flowyml.core.step import Step
10
+ from flowyml.core.executor import ExecutionResult
11
+
12
+
13
+ @dataclass
14
+ class MetricEvent:
15
+ """Base metric event."""
16
+
17
+ timestamp: datetime = field(default_factory=datetime.now)
18
+ tags: dict[str, str] = field(default_factory=dict)
19
+
20
+ def to_dict(self) -> dict[str, Any]:
21
+ """Convert to dictionary."""
22
+ return {
23
+ "timestamp": self.timestamp.isoformat(),
24
+ "tags": self.tags,
25
+ }
26
+
27
+
28
+ @dataclass
29
+ class PipelineMetricEvent(MetricEvent):
30
+ """Pipeline-level metric event."""
31
+
32
+ pipeline_name: str = ""
33
+ run_id: str = ""
34
+ duration_seconds: float | None = None
35
+ success: bool | None = None
36
+
37
+ def to_dict(self) -> dict[str, Any]:
38
+ base = super().to_dict()
39
+ base.update(
40
+ {
41
+ "pipeline_name": self.pipeline_name,
42
+ "run_id": self.run_id,
43
+ "duration_seconds": self.duration_seconds,
44
+ "success": self.success,
45
+ },
46
+ )
47
+ return base
48
+
49
+
50
+ @dataclass
51
+ class StepMetricEvent(MetricEvent):
52
+ """Step-level metric event."""
53
+
54
+ step_name: str = ""
55
+ pipeline_name: str = ""
56
+ run_id: str = ""
57
+ duration_seconds: float | None = None
58
+ success: bool | None = None
59
+ cached: bool = False
60
+
61
+ def to_dict(self) -> dict[str, Any]:
62
+ base = super().to_dict()
63
+ base.update(
64
+ {
65
+ "step_name": self.step_name,
66
+ "pipeline_name": self.pipeline_name,
67
+ "run_id": self.run_id,
68
+ "duration_seconds": self.duration_seconds,
69
+ "success": self.success,
70
+ "cached": self.cached,
71
+ },
72
+ )
73
+ return base
74
+
75
+
76
+ class MetricsCollector(Protocol):
77
+ """Protocol for metrics collectors."""
78
+
79
+ def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
80
+ """Record pipeline start."""
81
+ ...
82
+
83
+ def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
84
+ """Record pipeline completion."""
85
+ ...
86
+
87
+ def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
88
+ """Record step start."""
89
+ ...
90
+
91
+ def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
92
+ """Record step completion."""
93
+ ...
94
+
95
+
96
+ class ConsoleMetricsCollector:
97
+ """Simple console metrics collector for debugging."""
98
+
99
+ def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
100
+ event = PipelineMetricEvent(
101
+ pipeline_name=pipeline.name,
102
+ run_id=run_id,
103
+ )
104
+ print(f"📊 Pipeline Started: {event.to_dict()}")
105
+
106
+ def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
107
+ event = PipelineMetricEvent(
108
+ pipeline_name=pipeline.name,
109
+ run_id=result.run_id,
110
+ duration_seconds=result.duration_seconds,
111
+ success=result.success,
112
+ )
113
+ print(f"📊 Pipeline Ended: {event.to_dict()}")
114
+
115
+ def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
116
+ event = StepMetricEvent(
117
+ step_name=step.name,
118
+ pipeline_name=pipeline_name,
119
+ run_id=run_id,
120
+ )
121
+ print(f"📊 Step Started: {event.to_dict()}")
122
+
123
+ def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
124
+ event = StepMetricEvent(
125
+ step_name=step.name,
126
+ pipeline_name=pipeline_name,
127
+ run_id=run_id,
128
+ duration_seconds=getattr(result, "duration_seconds", None),
129
+ success=result.success,
130
+ cached=getattr(result, "cached", False),
131
+ )
132
+ print(f"📊 Step Ended: {event.to_dict()}")
133
+
134
+
135
+ class PrometheusMetricsCollector:
136
+ """Prometheus metrics collector (requires prometheus_client)."""
137
+
138
+ def __init__(self):
139
+ try:
140
+ from prometheus_client import Counter, Histogram
141
+
142
+ self.pipeline_starts = Counter(
143
+ "flowyml_pipeline_starts_total",
144
+ "Total pipeline starts",
145
+ ["pipeline_name"],
146
+ )
147
+ self.pipeline_completions = Counter(
148
+ "flowyml_pipeline_completions_total",
149
+ "Total pipeline completions",
150
+ ["pipeline_name", "status"],
151
+ )
152
+ self.pipeline_duration = Histogram(
153
+ "flowyml_pipeline_duration_seconds",
154
+ "Pipeline duration in seconds",
155
+ ["pipeline_name"],
156
+ )
157
+ self.step_duration = Histogram(
158
+ "flowyml_step_duration_seconds",
159
+ "Step duration in seconds",
160
+ ["pipeline_name", "step_name"],
161
+ )
162
+ self.step_cache_hits = Counter(
163
+ "flowyml_step_cache_hits_total",
164
+ "Total step cache hits",
165
+ ["pipeline_name", "step_name"],
166
+ )
167
+ except ImportError:
168
+ raise ImportError("prometheus_client required for PrometheusMetricsCollector")
169
+
170
+ def record_pipeline_start(self, pipeline: "Pipeline", run_id: str) -> None:
171
+ self.pipeline_starts.labels(pipeline_name=pipeline.name).inc()
172
+
173
+ def record_pipeline_end(self, pipeline: "Pipeline", result: "PipelineResult") -> None:
174
+ status = "success" if result.success else "failure"
175
+ self.pipeline_completions.labels(pipeline_name=pipeline.name, status=status).inc()
176
+
177
+ if result.duration_seconds:
178
+ self.pipeline_duration.labels(pipeline_name=pipeline.name).observe(result.duration_seconds)
179
+
180
+ def record_step_start(self, step: "Step", pipeline_name: str, run_id: str) -> None:
181
+ pass # No-op for Prometheus (only track completion)
182
+
183
+ def record_step_end(self, step: "Step", result: "ExecutionResult", pipeline_name: str, run_id: str) -> None:
184
+ duration = getattr(result, "duration_seconds", None)
185
+ if duration:
186
+ self.step_duration.labels(
187
+ pipeline_name=pipeline_name,
188
+ step_name=step.name,
189
+ ).observe(duration)
190
+
191
+ if getattr(result, "cached", False):
192
+ self.step_cache_hits.labels(
193
+ pipeline_name=pipeline_name,
194
+ step_name=step.name,
195
+ ).inc()
196
+
197
+
198
+ # Global metrics collector
199
+ _metrics_collector: MetricsCollector | None = None
200
+
201
+
202
+ def set_metrics_collector(collector: MetricsCollector) -> None:
203
+ """Set global metrics collector."""
204
+ global _metrics_collector
205
+ _metrics_collector = collector
206
+
207
+
208
+ def get_metrics_collector() -> MetricsCollector | None:
209
+ """Get global metrics collector."""
210
+ return _metrics_collector