flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Data monitoring and drift detection."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compute_stats(data: list | np.ndarray) -> dict[str, float]:
|
|
8
|
+
"""Compute basic statistics for a dataset.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
data: Input data (list or numpy array)
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Dictionary of statistics
|
|
15
|
+
"""
|
|
16
|
+
if isinstance(data, list):
|
|
17
|
+
data = np.array(data)
|
|
18
|
+
|
|
19
|
+
if len(data) == 0:
|
|
20
|
+
return {}
|
|
21
|
+
|
|
22
|
+
stats = {
|
|
23
|
+
"count": float(len(data)),
|
|
24
|
+
"mean": float(np.mean(data)),
|
|
25
|
+
"std": float(np.std(data)),
|
|
26
|
+
"min": float(np.min(data)),
|
|
27
|
+
"max": float(np.max(data)),
|
|
28
|
+
"median": float(np.median(data)),
|
|
29
|
+
"q25": float(np.percentile(data, 25)),
|
|
30
|
+
"q75": float(np.percentile(data, 75)),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return stats
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def calculate_psi(expected: np.ndarray, actual: np.ndarray, buckets: int = 10) -> float:
|
|
37
|
+
"""Calculate Population Stability Index (PSI) to detect drift.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
expected: Reference distribution
|
|
41
|
+
actual: Current distribution
|
|
42
|
+
buckets: Number of buckets for histogram
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
PSI value
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def scale_range(input_array, min_val, max_val):
|
|
49
|
+
input_array += -(np.min(input_array))
|
|
50
|
+
input_array /= np.max(input_array) / (max_val - min_val)
|
|
51
|
+
input_array += min_val
|
|
52
|
+
return input_array
|
|
53
|
+
|
|
54
|
+
breakpoints = np.arange(0, buckets + 1) / (buckets) * 100
|
|
55
|
+
breakpoints = np.percentile(expected, breakpoints)
|
|
56
|
+
|
|
57
|
+
expected_percents = np.histogram(expected, breakpoints)[0] / len(expected)
|
|
58
|
+
actual_percents = np.histogram(actual, breakpoints)[0] / len(actual)
|
|
59
|
+
|
|
60
|
+
def sub_psi(e_perc, a_perc):
|
|
61
|
+
if a_perc == 0:
|
|
62
|
+
a_perc = 0.0001
|
|
63
|
+
if e_perc == 0:
|
|
64
|
+
e_perc = 0.0001
|
|
65
|
+
|
|
66
|
+
value = (e_perc - a_perc) * np.log(e_perc / a_perc)
|
|
67
|
+
return value
|
|
68
|
+
|
|
69
|
+
psi_value = np.sum([sub_psi(expected_percents[i], actual_percents[i]) for i in range(0, len(expected_percents))])
|
|
70
|
+
|
|
71
|
+
return psi_value
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def detect_drift(
|
|
75
|
+
reference_data: list | np.ndarray,
|
|
76
|
+
current_data: list | np.ndarray,
|
|
77
|
+
threshold: float = 0.1,
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
"""Detect data drift between reference and current data.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
reference_data: Reference dataset (e.g. training data)
|
|
83
|
+
current_data: Current dataset (e.g. inference data)
|
|
84
|
+
threshold: PSI threshold for drift warning
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Drift detection result
|
|
88
|
+
"""
|
|
89
|
+
if isinstance(reference_data, list):
|
|
90
|
+
reference_data = np.array(reference_data)
|
|
91
|
+
if isinstance(current_data, list):
|
|
92
|
+
current_data = np.array(current_data)
|
|
93
|
+
|
|
94
|
+
psi = calculate_psi(reference_data, current_data)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"drift_detected": psi > threshold,
|
|
98
|
+
"psi": psi,
|
|
99
|
+
"threshold": threshold,
|
|
100
|
+
"reference_stats": compute_stats(reference_data),
|
|
101
|
+
"current_stats": compute_stats(current_data),
|
|
102
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""LLM Monitoring and Observability module."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
from dataclasses import dataclass, field, asdict
|
|
6
|
+
from typing import Any
|
|
7
|
+
import functools
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class LLMEvent:
|
|
12
|
+
"""Event representing an LLM interaction."""
|
|
13
|
+
|
|
14
|
+
event_id: str
|
|
15
|
+
trace_id: str
|
|
16
|
+
parent_id: str | None
|
|
17
|
+
event_type: str # 'llm', 'tool', 'chain', 'agent'
|
|
18
|
+
name: str
|
|
19
|
+
inputs: dict[str, Any]
|
|
20
|
+
outputs: dict[str, Any] | None = None
|
|
21
|
+
start_time: float = field(default_factory=time.time)
|
|
22
|
+
end_time: float | None = None
|
|
23
|
+
duration: float | None = None
|
|
24
|
+
status: str = "running" # 'running', 'success', 'error'
|
|
25
|
+
error: str | None = None
|
|
26
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
# Token usage and cost
|
|
29
|
+
prompt_tokens: int = 0
|
|
30
|
+
completion_tokens: int = 0
|
|
31
|
+
total_tokens: int = 0
|
|
32
|
+
cost: float = 0.0
|
|
33
|
+
model: str | None = None
|
|
34
|
+
|
|
35
|
+
def end(self, outputs: dict[str, Any] | None = None, error: str | None = None) -> None:
|
|
36
|
+
"""End the event."""
|
|
37
|
+
self.end_time = time.time()
|
|
38
|
+
self.duration = self.end_time - self.start_time
|
|
39
|
+
self.outputs = outputs
|
|
40
|
+
if error:
|
|
41
|
+
self.status = "error"
|
|
42
|
+
self.error = str(error)
|
|
43
|
+
else:
|
|
44
|
+
self.status = "success"
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
"""Convert to dictionary."""
|
|
48
|
+
return asdict(self)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LLMTracer:
|
|
52
|
+
"""Tracer for LLM calls."""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
self.current_trace_id: str | None = None
|
|
56
|
+
self.event_stack: list[LLMEvent] = []
|
|
57
|
+
self._metadata_store = None
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def metadata_store(self):
|
|
61
|
+
if self._metadata_store is None:
|
|
62
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
63
|
+
|
|
64
|
+
self._metadata_store = SQLiteMetadataStore()
|
|
65
|
+
return self._metadata_store
|
|
66
|
+
|
|
67
|
+
def start_trace(self, name: str = "root") -> str:
|
|
68
|
+
"""Start a new trace."""
|
|
69
|
+
self.current_trace_id = str(uuid.uuid4())
|
|
70
|
+
return self.current_trace_id
|
|
71
|
+
|
|
72
|
+
def start_event(
|
|
73
|
+
self,
|
|
74
|
+
name: str,
|
|
75
|
+
event_type: str,
|
|
76
|
+
inputs: dict[str, Any],
|
|
77
|
+
metadata: dict[str, Any] | None = None,
|
|
78
|
+
parent_id: str | None = None,
|
|
79
|
+
) -> LLMEvent:
|
|
80
|
+
"""Start a new event."""
|
|
81
|
+
if not self.current_trace_id:
|
|
82
|
+
self.start_trace()
|
|
83
|
+
|
|
84
|
+
event = LLMEvent(
|
|
85
|
+
event_id=str(uuid.uuid4()),
|
|
86
|
+
trace_id=self.current_trace_id,
|
|
87
|
+
parent_id=parent_id or (self.event_stack[-1].event_id if self.event_stack else None),
|
|
88
|
+
event_type=event_type,
|
|
89
|
+
name=name,
|
|
90
|
+
inputs=inputs,
|
|
91
|
+
metadata=metadata or {},
|
|
92
|
+
)
|
|
93
|
+
self.event_stack.append(event)
|
|
94
|
+
return event
|
|
95
|
+
|
|
96
|
+
def end_event(
|
|
97
|
+
self,
|
|
98
|
+
outputs: dict[str, Any] | None = None,
|
|
99
|
+
error: str | None = None,
|
|
100
|
+
metrics: dict[str, Any] | None = None,
|
|
101
|
+
):
|
|
102
|
+
"""End the current event."""
|
|
103
|
+
if not self.event_stack:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
event = self.event_stack.pop()
|
|
107
|
+
event.end(outputs, error)
|
|
108
|
+
|
|
109
|
+
if metrics:
|
|
110
|
+
event.prompt_tokens = metrics.get("prompt_tokens", 0)
|
|
111
|
+
event.completion_tokens = metrics.get("completion_tokens", 0)
|
|
112
|
+
event.total_tokens = metrics.get("total_tokens", 0)
|
|
113
|
+
event.cost = metrics.get("cost", 0.0)
|
|
114
|
+
event.model = metrics.get("model")
|
|
115
|
+
|
|
116
|
+
# Save to storage
|
|
117
|
+
self.metadata_store.save_trace_event(event.to_dict())
|
|
118
|
+
|
|
119
|
+
return event
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Global tracer instance
|
|
123
|
+
tracer = LLMTracer()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def trace_llm(name: str = None, event_type: str = "llm"):
|
|
127
|
+
"""Decorator to trace LLM calls."""
|
|
128
|
+
|
|
129
|
+
def decorator(func):
|
|
130
|
+
@functools.wraps(func)
|
|
131
|
+
def wrapper(*args, **kwargs):
|
|
132
|
+
event_name = name or func.__name__
|
|
133
|
+
|
|
134
|
+
# Capture inputs
|
|
135
|
+
inputs = {
|
|
136
|
+
"args": [str(a) for a in args],
|
|
137
|
+
"kwargs": {k: str(v) for k, v in kwargs.items()},
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
tracer.start_event(event_name, event_type, inputs)
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
result = func(*args, **kwargs)
|
|
144
|
+
|
|
145
|
+
# Try to extract metrics if result has them (e.g. OpenAI response)
|
|
146
|
+
metrics = {}
|
|
147
|
+
if hasattr(result, "usage"): # OpenAI style
|
|
148
|
+
metrics["prompt_tokens"] = getattr(result.usage, "prompt_tokens", 0)
|
|
149
|
+
metrics["completion_tokens"] = getattr(result.usage, "completion_tokens", 0)
|
|
150
|
+
metrics["total_tokens"] = getattr(result.usage, "total_tokens", 0)
|
|
151
|
+
|
|
152
|
+
tracer.end_event(outputs={"result": str(result)}, metrics=metrics)
|
|
153
|
+
return result
|
|
154
|
+
except Exception as e:
|
|
155
|
+
tracer.end_event(error=str(e))
|
|
156
|
+
raise e
|
|
157
|
+
|
|
158
|
+
return wrapper
|
|
159
|
+
|
|
160
|
+
return decorator
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from flowyml.monitoring.alerts import alert_manager, AlertLevel
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Monitor:
|
|
5
|
+
"""Base class for monitors."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, name: str):
|
|
8
|
+
self.name = name
|
|
9
|
+
|
|
10
|
+
def check(self) -> bool:
|
|
11
|
+
"""Perform check. Return True if healthy."""
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SystemMonitor(Monitor):
|
|
16
|
+
"""Monitors system resources."""
|
|
17
|
+
|
|
18
|
+
def check(self) -> bool:
|
|
19
|
+
try:
|
|
20
|
+
import psutil
|
|
21
|
+
except ImportError:
|
|
22
|
+
return True # Skip if psutil not installed
|
|
23
|
+
|
|
24
|
+
cpu = psutil.cpu_percent()
|
|
25
|
+
mem = psutil.virtual_memory().percent
|
|
26
|
+
|
|
27
|
+
if cpu > 90:
|
|
28
|
+
alert_manager.send_alert(
|
|
29
|
+
"High CPU Usage",
|
|
30
|
+
f"CPU usage is at {cpu}%",
|
|
31
|
+
AlertLevel.WARNING,
|
|
32
|
+
)
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
if mem > 90:
|
|
36
|
+
alert_manager.send_alert(
|
|
37
|
+
"High Memory Usage",
|
|
38
|
+
f"Memory usage is at {mem}%",
|
|
39
|
+
AlertLevel.WARNING,
|
|
40
|
+
)
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class PipelineMonitor(Monitor):
|
|
47
|
+
"""Monitors pipeline execution health."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, pipeline_name: str):
|
|
50
|
+
super().__init__(f"pipeline-{pipeline_name}")
|
|
51
|
+
self.pipeline_name = pipeline_name
|
|
52
|
+
self.failed_runs_threshold = 3
|
|
53
|
+
|
|
54
|
+
def check(self) -> bool:
|
|
55
|
+
# Logic to check recent runs from metadata store
|
|
56
|
+
# For now, placeholder
|
|
57
|
+
return True
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Notification system for pipeline events."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import contextlib
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Notification:
|
|
13
|
+
"""A notification about a pipeline event."""
|
|
14
|
+
|
|
15
|
+
title: str
|
|
16
|
+
message: str
|
|
17
|
+
level: str # 'info', 'warning', 'error', 'success'
|
|
18
|
+
timestamp: datetime
|
|
19
|
+
metadata: dict[str, Any]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NotificationChannel(ABC):
|
|
23
|
+
"""Base class for notification channels."""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def send(self, notification: Notification) -> bool:
|
|
27
|
+
"""Send a notification."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ConsoleNotifier(NotificationChannel):
|
|
32
|
+
"""Print notifications to console."""
|
|
33
|
+
|
|
34
|
+
def send(self, notification: Notification) -> bool:
|
|
35
|
+
{
|
|
36
|
+
"info": "ℹ️",
|
|
37
|
+
"warning": "⚠️",
|
|
38
|
+
"error": "❌",
|
|
39
|
+
"success": "✅",
|
|
40
|
+
}.get(notification.level, "📢")
|
|
41
|
+
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class SlackNotifier(NotificationChannel):
|
|
46
|
+
"""Send notifications to Slack."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, webhook_url: str | None = None):
|
|
49
|
+
self.webhook_url = webhook_url or os.getenv("SLACK_WEBHOOK_URL")
|
|
50
|
+
|
|
51
|
+
def send(self, notification: Notification) -> bool:
|
|
52
|
+
if not self.webhook_url:
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
import requests
|
|
57
|
+
|
|
58
|
+
color = {
|
|
59
|
+
"info": "#36a64f",
|
|
60
|
+
"warning": "#ff9900",
|
|
61
|
+
"error": "#ff0000",
|
|
62
|
+
"success": "#00ff00",
|
|
63
|
+
}.get(notification.level, "#cccccc")
|
|
64
|
+
|
|
65
|
+
payload = {
|
|
66
|
+
"attachments": [
|
|
67
|
+
{
|
|
68
|
+
"color": color,
|
|
69
|
+
"title": notification.title,
|
|
70
|
+
"text": notification.message,
|
|
71
|
+
"footer": "flowyml",
|
|
72
|
+
"ts": int(notification.timestamp.timestamp()),
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
response = requests.post(self.webhook_url, json=payload)
|
|
78
|
+
return response.status_code == 200
|
|
79
|
+
except Exception:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class EmailNotifier(NotificationChannel):
|
|
84
|
+
"""Send notifications via email."""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
smtp_host: str | None = None,
|
|
89
|
+
smtp_port: int = 587,
|
|
90
|
+
username: str | None = None,
|
|
91
|
+
password: str | None = None,
|
|
92
|
+
from_addr: str | None = None,
|
|
93
|
+
to_addrs: list[str] | None = None,
|
|
94
|
+
):
|
|
95
|
+
self.smtp_host = smtp_host or os.getenv("SMTP_HOST")
|
|
96
|
+
self.smtp_port = smtp_port
|
|
97
|
+
self.username = username or os.getenv("SMTP_USERNAME")
|
|
98
|
+
self.password = password or os.getenv("SMTP_PASSWORD")
|
|
99
|
+
self.from_addr = from_addr or os.getenv("SMTP_FROM")
|
|
100
|
+
self.to_addrs = to_addrs or []
|
|
101
|
+
|
|
102
|
+
def send(self, notification: Notification) -> bool:
|
|
103
|
+
if not all([self.smtp_host, self.username, self.password, self.from_addr]):
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
import smtplib
|
|
108
|
+
from email.mime.text import MIMEText
|
|
109
|
+
from email.mime.multipart import MIMEMultipart
|
|
110
|
+
|
|
111
|
+
msg = MIMEMultipart()
|
|
112
|
+
msg["From"] = self.from_addr
|
|
113
|
+
msg["To"] = ", ".join(self.to_addrs)
|
|
114
|
+
msg["Subject"] = notification.title
|
|
115
|
+
|
|
116
|
+
body = f"{notification.message}\n\nTime: {notification.timestamp}"
|
|
117
|
+
msg.attach(MIMEText(body, "plain"))
|
|
118
|
+
|
|
119
|
+
with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
|
|
120
|
+
server.starttls()
|
|
121
|
+
server.login(self.username, self.password)
|
|
122
|
+
server.send_message(msg)
|
|
123
|
+
|
|
124
|
+
return True
|
|
125
|
+
except Exception:
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class NotificationManager:
|
|
130
|
+
"""Manage notifications across channels.
|
|
131
|
+
|
|
132
|
+
Examples:
|
|
133
|
+
>>> notifier = NotificationManager()
|
|
134
|
+
>>> notifier.add_channel(ConsoleNotifier())
|
|
135
|
+
>>> notifier.add_channel(SlackNotifier())
|
|
136
|
+
>>> # Send notification
|
|
137
|
+
>>> notifier.notify(title="Pipeline Failed", message="Training pipeline failed at step 3", level="error")
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self):
|
|
141
|
+
self.channels: list[NotificationChannel] = []
|
|
142
|
+
self.enabled = True
|
|
143
|
+
|
|
144
|
+
def add_channel(self, channel: NotificationChannel) -> None:
|
|
145
|
+
"""Add a notification channel."""
|
|
146
|
+
self.channels.append(channel)
|
|
147
|
+
|
|
148
|
+
def remove_channel(self, channel: NotificationChannel) -> None:
|
|
149
|
+
"""Remove a notification channel."""
|
|
150
|
+
if channel in self.channels:
|
|
151
|
+
self.channels.remove(channel)
|
|
152
|
+
|
|
153
|
+
def notify(
|
|
154
|
+
self,
|
|
155
|
+
title: str,
|
|
156
|
+
message: str,
|
|
157
|
+
level: str = "info",
|
|
158
|
+
metadata: dict[str, Any] | None = None,
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Send a notification to all channels."""
|
|
161
|
+
if not self.enabled:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
notification = Notification(
|
|
165
|
+
title=title,
|
|
166
|
+
message=message,
|
|
167
|
+
level=level,
|
|
168
|
+
timestamp=datetime.now(),
|
|
169
|
+
metadata=metadata or {},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
for channel in self.channels:
|
|
173
|
+
with contextlib.suppress(Exception):
|
|
174
|
+
channel.send(notification)
|
|
175
|
+
|
|
176
|
+
def on_pipeline_start(self, pipeline_name: str, run_id: str) -> None:
|
|
177
|
+
"""Notify when pipeline starts."""
|
|
178
|
+
self.notify(
|
|
179
|
+
title="Pipeline Started",
|
|
180
|
+
message=f"Pipeline '{pipeline_name}' started (Run: {run_id})",
|
|
181
|
+
level="info",
|
|
182
|
+
metadata={"pipeline": pipeline_name, "run_id": run_id},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def on_pipeline_success(self, pipeline_name: str, run_id: str, duration: float) -> None:
|
|
186
|
+
"""Notify when pipeline succeeds."""
|
|
187
|
+
self.notify(
|
|
188
|
+
title="Pipeline Completed",
|
|
189
|
+
message=f"Pipeline '{pipeline_name}' completed successfully in {duration:.2f}s",
|
|
190
|
+
level="success",
|
|
191
|
+
metadata={"pipeline": pipeline_name, "run_id": run_id, "duration": duration},
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def on_pipeline_failure(self, pipeline_name: str, run_id: str, error: str) -> None:
|
|
195
|
+
"""Notify when pipeline fails."""
|
|
196
|
+
self.notify(
|
|
197
|
+
title="Pipeline Failed",
|
|
198
|
+
message=f"Pipeline '{pipeline_name}' failed: {error}",
|
|
199
|
+
level="error",
|
|
200
|
+
metadata={"pipeline": pipeline_name, "run_id": run_id, "error": error},
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def on_drift_detected(self, feature: str, psi: float) -> None:
|
|
204
|
+
"""Notify when data drift is detected."""
|
|
205
|
+
self.notify(
|
|
206
|
+
title="Data Drift Detected",
|
|
207
|
+
message=f"Drift detected in feature '{feature}' (PSI: {psi:.4f})",
|
|
208
|
+
level="warning",
|
|
209
|
+
metadata={"feature": feature, "psi": psi},
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Global notification manager
|
|
214
|
+
_global_notifier = NotificationManager()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_notifier() -> NotificationManager:
|
|
218
|
+
"""Get the global notification manager."""
|
|
219
|
+
return _global_notifier
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def configure_notifications(
|
|
223
|
+
console: bool = True,
|
|
224
|
+
slack_webhook: str | None = None,
|
|
225
|
+
email_config: dict[str, Any] | None = None,
|
|
226
|
+
) -> None:
|
|
227
|
+
"""Configure notifications.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
console: Enable console notifications
|
|
231
|
+
slack_webhook: Slack webhook URL
|
|
232
|
+
email_config: Email configuration dict
|
|
233
|
+
"""
|
|
234
|
+
notifier = get_notifier()
|
|
235
|
+
|
|
236
|
+
# Clear existing channels
|
|
237
|
+
notifier.channels = []
|
|
238
|
+
|
|
239
|
+
if console:
|
|
240
|
+
notifier.add_channel(ConsoleNotifier())
|
|
241
|
+
|
|
242
|
+
if slack_webhook:
|
|
243
|
+
notifier.add_channel(SlackNotifier(slack_webhook))
|
|
244
|
+
|
|
245
|
+
if email_config:
|
|
246
|
+
notifier.add_channel(EmailNotifier(**email_config))
|