flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,630 @@
1
+ """Pipeline scheduling and automation."""
2
+
3
+ import contextlib
4
+ import json
5
+ import logging
6
+ import sqlite3
7
+ import threading
8
+ import time
9
+ from collections.abc import Callable
10
+ from dataclasses import dataclass
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ from typing import Any, Optional
14
+
15
+ from flowyml.core.scheduler_config import SchedulerConfig
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Optional dependencies
20
+ try:
21
+ from croniter import croniter
22
+ except ImportError:
23
+ croniter = None
24
+
25
+ try:
26
+ import pytz
27
+ except ImportError:
28
+ pytz = None
29
+
30
+
31
+ @dataclass
32
+ class Schedule:
33
+ """Represents a pipeline schedule."""
34
+
35
+ pipeline_name: str
36
+ schedule_type: str # 'cron', 'interval', 'daily', 'hourly'
37
+ schedule_value: str # cron expression or interval in seconds
38
+ pipeline_func: Callable
39
+ context: dict[str, Any] | None = None
40
+ enabled: bool = True
41
+ last_run: datetime | None = None
42
+ next_run: datetime | None = None
43
+ timezone: str = "UTC"
44
+
45
+ def to_dict(self) -> dict[str, Any]:
46
+ """Convert to dictionary."""
47
+ return {
48
+ "pipeline_name": self.pipeline_name,
49
+ "schedule_type": self.schedule_type,
50
+ "schedule_value": self.schedule_value,
51
+ "context": self.context,
52
+ "enabled": self.enabled,
53
+ "last_run": self.last_run.isoformat() if self.last_run else None,
54
+ "next_run": self.next_run.isoformat() if self.next_run else None,
55
+ "timezone": self.timezone,
56
+ }
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict[str, Any], pipeline_func: Callable) -> "Schedule":
60
+ """Create from dictionary."""
61
+ return cls(
62
+ pipeline_name=data["pipeline_name"],
63
+ schedule_type=data["schedule_type"],
64
+ schedule_value=data["schedule_value"],
65
+ pipeline_func=pipeline_func,
66
+ context=data.get("context"),
67
+ enabled=data.get("enabled", True),
68
+ last_run=datetime.fromisoformat(data["last_run"]) if data.get("last_run") else None,
69
+ next_run=datetime.fromisoformat(data["next_run"]) if data.get("next_run") else None,
70
+ timezone=data.get("timezone", "UTC"),
71
+ )
72
+
73
+
74
+ @dataclass
75
+ class ScheduleExecution:
76
+ """Record of schedule execution."""
77
+
78
+ schedule_name: str
79
+ started_at: datetime
80
+ completed_at: Optional[datetime] = None
81
+ success: bool = False
82
+ error: Optional[str] = None
83
+ duration_seconds: Optional[float] = None
84
+
85
+
86
+ class SchedulerMetrics:
87
+ """Track scheduler metrics."""
88
+
89
+ def __init__(self):
90
+ self.total_runs = 0
91
+ self.successful_runs = 0
92
+ self.failed_runs = 0
93
+ self.avg_duration_seconds = 0.0
94
+ self.last_heartbeat = datetime.now()
95
+
96
+ def update(self, execution: ScheduleExecution):
97
+ """Update metrics with new execution."""
98
+ self.total_runs += 1
99
+ if execution.success:
100
+ self.successful_runs += 1
101
+ else:
102
+ self.failed_runs += 1
103
+
104
+ if execution.duration_seconds is not None:
105
+ # Moving average
106
+ alpha = 0.1
107
+ self.avg_duration_seconds = alpha * execution.duration_seconds + (1 - alpha) * self.avg_duration_seconds
108
+
109
+ def to_dict(self) -> dict:
110
+ """Convert to dictionary."""
111
+ return {
112
+ "total_runs": self.total_runs,
113
+ "successful_runs": self.successful_runs,
114
+ "failed_runs": self.failed_runs,
115
+ "success_rate": self.successful_runs / max(self.total_runs, 1),
116
+ "avg_duration_seconds": self.avg_duration_seconds,
117
+ "last_heartbeat": self.last_heartbeat.isoformat(),
118
+ }
119
+
120
+
121
+ class SchedulerPersistence:
122
+ """Persist schedules to SQLite database."""
123
+
124
+ def __init__(self, db_path: Optional[str] = None):
125
+ self.db_path = db_path or str(Path.cwd() / ".flowyml_scheduler.db")
126
+ self._init_db()
127
+
128
+ def _init_db(self):
129
+ """Initialize database schema."""
130
+ with sqlite3.connect(self.db_path) as conn:
131
+ conn.execute(
132
+ """
133
+ CREATE TABLE IF NOT EXISTS schedules (
134
+ name TEXT PRIMARY KEY,
135
+ data TEXT NOT NULL,
136
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
137
+ )
138
+ """,
139
+ )
140
+ conn.execute(
141
+ """
142
+ CREATE TABLE IF NOT EXISTS executions (
143
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
144
+ schedule_name TEXT NOT NULL,
145
+ started_at TIMESTAMP NOT NULL,
146
+ completed_at TIMESTAMP,
147
+ success BOOLEAN,
148
+ error TEXT,
149
+ duration_seconds REAL,
150
+ FOREIGN KEY(schedule_name) REFERENCES schedules(name)
151
+ )
152
+ """,
153
+ )
154
+
155
+ def save_schedule(self, schedule: Schedule) -> None:
156
+ """Save schedule to database."""
157
+ data = schedule.to_dict()
158
+ with sqlite3.connect(self.db_path) as conn:
159
+ conn.execute(
160
+ "INSERT OR REPLACE INTO schedules (name, data) VALUES (?, ?)",
161
+ (schedule.pipeline_name, json.dumps(data)),
162
+ )
163
+
164
+ def load_schedules(self, pipeline_funcs: dict[str, Callable]) -> dict[str, Schedule]:
165
+ """Load all schedules from database."""
166
+ schedules = {}
167
+ with sqlite3.connect(self.db_path) as conn:
168
+ cursor = conn.execute("SELECT name, data FROM schedules")
169
+ for name, data_json in cursor:
170
+ try:
171
+ data = json.loads(data_json)
172
+ if name in pipeline_funcs:
173
+ schedules[name] = Schedule.from_dict(data, pipeline_funcs[name])
174
+ except Exception as e:
175
+ logger.error(f"Failed to load schedule {name}: {e}")
176
+ return schedules
177
+
178
+ def delete_schedule(self, name: str) -> None:
179
+ """Delete schedule from database."""
180
+ with sqlite3.connect(self.db_path) as conn:
181
+ conn.execute("DELETE FROM schedules WHERE name = ?", (name,))
182
+ conn.execute("DELETE FROM executions WHERE schedule_name = ?", (name,))
183
+
184
+ def save_execution(self, execution: ScheduleExecution) -> None:
185
+ """Save execution record."""
186
+ with sqlite3.connect(self.db_path) as conn:
187
+ conn.execute(
188
+ """
189
+ INSERT INTO executions
190
+ (schedule_name, started_at, completed_at, success, error, duration_seconds)
191
+ VALUES (?, ?, ?, ?, ?, ?)
192
+ """,
193
+ (
194
+ execution.schedule_name,
195
+ execution.started_at,
196
+ execution.completed_at,
197
+ execution.success,
198
+ execution.error,
199
+ execution.duration_seconds,
200
+ ),
201
+ )
202
+
203
+ def get_history(self, schedule_name: str, limit: int = 50) -> list[dict[str, Any]]:
204
+ """Get execution history for a schedule."""
205
+ history = []
206
+ with sqlite3.connect(self.db_path) as conn:
207
+ cursor = conn.execute(
208
+ """
209
+ SELECT started_at, completed_at, success, error, duration_seconds
210
+ FROM executions
211
+ WHERE schedule_name = ?
212
+ ORDER BY started_at DESC
213
+ LIMIT ?
214
+ """,
215
+ (schedule_name, limit),
216
+ )
217
+ for row in cursor:
218
+ history.append(
219
+ {
220
+ "started_at": row[0],
221
+ "completed_at": row[1],
222
+ "success": bool(row[2]),
223
+ "error": row[3],
224
+ "duration_seconds": row[4],
225
+ },
226
+ )
227
+ return history
228
+
229
+
230
+ class DistributedLock:
231
+ """Distributed lock for coordinating multiple scheduler instances."""
232
+
233
+ def __init__(self, backend: str = "file", redis_url: Optional[str] = None):
234
+ self.backend = backend
235
+ self.redis_url = redis_url
236
+ self._redis = None
237
+ if backend == "redis" and redis_url:
238
+ try:
239
+ import redis
240
+
241
+ self._redis = redis.from_url(redis_url)
242
+ except ImportError:
243
+ logger.warning("Redis not installed, falling back to file lock")
244
+ self.backend = "file"
245
+
246
+ def acquire(self, schedule_name: str, timeout: int = 60) -> bool:
247
+ """Acquire lock for schedule execution."""
248
+ if self.backend == "redis" and self._redis:
249
+ lock_key = f"flowyml:scheduler:lock:{schedule_name}"
250
+ return bool(self._redis.set(lock_key, "locked", ex=timeout, nx=True))
251
+ else:
252
+ # Simple file-based lock (not truly distributed across machines, but works for processes)
253
+ lock_file = f".lock_{schedule_name}"
254
+ lock_path = Path(lock_file)
255
+ if lock_path.exists():
256
+ # Check if stale
257
+ if time.time() - lock_path.stat().st_mtime > timeout:
258
+ lock_path.unlink()
259
+ else:
260
+ return False
261
+ try:
262
+ with open(lock_file, "w") as f:
263
+ f.write(str(time.time()))
264
+ return True
265
+ except OSError:
266
+ return False
267
+
268
+ def release(self, schedule_name: str) -> None:
269
+ """Release lock after execution."""
270
+ if self.backend == "redis" and self._redis:
271
+ lock_key = f"flowyml:scheduler:lock:{schedule_name}"
272
+ self._redis.delete(lock_key)
273
+ else:
274
+ lock_file = f".lock_{schedule_name}"
275
+ lock_path = Path(lock_file)
276
+ if lock_path.exists():
277
+ with contextlib.suppress(OSError):
278
+ lock_path.unlink()
279
+
280
+
281
+ class PipelineScheduler:
282
+ """Schedule pipelines to run automatically.
283
+
284
+ Enhanced scheduler with persistence, distributed locking, and cron support.
285
+ """
286
+
287
+ def __init__(
288
+ self,
289
+ config: Optional[SchedulerConfig] = None,
290
+ on_success: Optional[Callable] = None,
291
+ on_failure: Optional[Callable] = None,
292
+ ):
293
+ self.config = config or SchedulerConfig.from_env()
294
+ self.schedules: dict[str, Schedule] = {}
295
+ self.running = False
296
+ self._thread = None
297
+ self.on_success = on_success
298
+ self.on_failure = on_failure
299
+ self.metrics = SchedulerMetrics()
300
+
301
+ # Persistence
302
+ self._persistence = None
303
+ if self.config.persist_schedules:
304
+ self._persistence = SchedulerPersistence(self.config.db_path)
305
+
306
+ # Locking
307
+ self._lock = DistributedLock(self.config.lock_backend, self.config.redis_url)
308
+
309
+ # Registry of pipeline functions for reloading
310
+ self._pipeline_funcs: dict[str, Callable] = {}
311
+
312
+ def _register_func(self, name: str, func: Callable):
313
+ """Register pipeline function for persistence reloading."""
314
+ self._pipeline_funcs[name] = func
315
+
316
+ def schedule_daily(
317
+ self,
318
+ name: str,
319
+ pipeline_func: Callable,
320
+ hour: int = 0,
321
+ minute: int = 0,
322
+ timezone: str = "UTC",
323
+ context: dict[str, Any] | None = None,
324
+ ) -> Schedule:
325
+ """Schedule pipeline to run daily at a specific time."""
326
+ self._register_func(name, pipeline_func)
327
+
328
+ if pytz:
329
+ tz = pytz.timezone(timezone)
330
+ now = datetime.now(tz)
331
+ next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
332
+ if next_run <= now:
333
+ next_run += timedelta(days=1)
334
+ else:
335
+ # Fallback to local time if pytz missing
336
+ now = datetime.now()
337
+ next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
338
+ if next_run <= now:
339
+ next_run += timedelta(days=1)
340
+
341
+ schedule = Schedule(
342
+ pipeline_name=name,
343
+ schedule_type="daily",
344
+ schedule_value=f"{hour:02d}:{minute:02d}",
345
+ pipeline_func=pipeline_func,
346
+ context=context,
347
+ next_run=next_run,
348
+ timezone=timezone,
349
+ )
350
+ self.schedules[name] = schedule
351
+ if self._persistence:
352
+ self._persistence.save_schedule(schedule)
353
+ return schedule
354
+
355
+ def schedule_interval(
356
+ self,
357
+ name: str,
358
+ pipeline_func: Callable,
359
+ hours: int = 0,
360
+ minutes: int = 0,
361
+ seconds: int = 0,
362
+ timezone: str = "UTC",
363
+ context: dict[str, Any] | None = None,
364
+ ) -> Schedule:
365
+ """Schedule pipeline to run at regular intervals."""
366
+ self._register_func(name, pipeline_func)
367
+
368
+ interval_seconds = hours * 3600 + minutes * 60 + seconds
369
+
370
+ if pytz:
371
+ tz = pytz.timezone(timezone)
372
+ now = datetime.now(tz)
373
+ else:
374
+ now = datetime.now()
375
+
376
+ next_run = now + timedelta(seconds=interval_seconds)
377
+
378
+ schedule = Schedule(
379
+ pipeline_name=name,
380
+ schedule_type="interval",
381
+ schedule_value=str(interval_seconds),
382
+ pipeline_func=pipeline_func,
383
+ context=context,
384
+ next_run=next_run,
385
+ timezone=timezone,
386
+ )
387
+ self.schedules[name] = schedule
388
+ if self._persistence:
389
+ self._persistence.save_schedule(schedule)
390
+ return schedule
391
+
392
+ def schedule_hourly(
393
+ self,
394
+ name: str,
395
+ pipeline_func: Callable,
396
+ minute: int = 0,
397
+ timezone: str = "UTC",
398
+ context: dict[str, Any] | None = None,
399
+ ) -> Schedule:
400
+ """Schedule pipeline to run hourly."""
401
+ self._register_func(name, pipeline_func)
402
+
403
+ if pytz:
404
+ tz = pytz.timezone(timezone)
405
+ now = datetime.now(tz)
406
+ next_run = now.replace(minute=minute, second=0, microsecond=0)
407
+ if next_run <= now:
408
+ next_run += timedelta(hours=1)
409
+ else:
410
+ now = datetime.now()
411
+ next_run = now.replace(minute=minute, second=0, microsecond=0)
412
+ if next_run <= now:
413
+ next_run += timedelta(hours=1)
414
+
415
+ schedule = Schedule(
416
+ pipeline_name=name,
417
+ schedule_type="hourly",
418
+ schedule_value=str(minute),
419
+ pipeline_func=pipeline_func,
420
+ context=context,
421
+ next_run=next_run,
422
+ timezone=timezone,
423
+ )
424
+ self.schedules[name] = schedule
425
+ if self._persistence:
426
+ self._persistence.save_schedule(schedule)
427
+ return schedule
428
+
429
+ def schedule_cron(
430
+ self,
431
+ name: str,
432
+ pipeline_func: Callable,
433
+ cron_expression: str,
434
+ timezone: str = "UTC",
435
+ context: dict[str, Any] | None = None,
436
+ ) -> Schedule:
437
+ """Schedule with cron expression."""
438
+ if not croniter:
439
+ raise ImportError("croniter is required for cron scheduling. Install with: pip install croniter")
440
+
441
+ self._register_func(name, pipeline_func)
442
+
443
+ if pytz:
444
+ tz = pytz.timezone(timezone)
445
+ now = datetime.now(tz)
446
+ else:
447
+ now = datetime.now()
448
+
449
+ cron = croniter(cron_expression, now)
450
+ next_run = cron.get_next(datetime)
451
+
452
+ schedule = Schedule(
453
+ pipeline_name=name,
454
+ schedule_type="cron",
455
+ schedule_value=cron_expression,
456
+ pipeline_func=pipeline_func,
457
+ context=context,
458
+ next_run=next_run,
459
+ timezone=timezone,
460
+ )
461
+ self.schedules[name] = schedule
462
+ if self._persistence:
463
+ self._persistence.save_schedule(schedule)
464
+ return schedule
465
+
466
+ def unschedule(self, name: str) -> None:
467
+ """Remove a scheduled pipeline."""
468
+ if name in self.schedules:
469
+ del self.schedules[name]
470
+ if self._persistence:
471
+ self._persistence.delete_schedule(name)
472
+
473
+ def clear(self) -> None:
474
+ """Remove all schedules."""
475
+ self.schedules.clear()
476
+ if self._persistence:
477
+ # Re-initialize DB to clear it
478
+ db_path = Path(self._persistence.db_path)
479
+ if db_path.exists():
480
+ db_path.unlink()
481
+ self._persistence._init_db()
482
+
483
+ def enable(self, name: str) -> None:
484
+ """Enable a schedule."""
485
+ if name in self.schedules:
486
+ self.schedules[name].enabled = True
487
+ if self._persistence:
488
+ self._persistence.save_schedule(self.schedules[name])
489
+
490
+ def disable(self, name: str) -> None:
491
+ """Disable a schedule."""
492
+ if name in self.schedules:
493
+ self.schedules[name].enabled = False
494
+ if self._persistence:
495
+ self._persistence.save_schedule(self.schedules[name])
496
+
497
+ def _run_pipeline(self, schedule: Schedule) -> None:
498
+ """Run a scheduled pipeline."""
499
+ # Acquire distributed lock if enabled
500
+ if self.config.distributed:
501
+ if not self._lock.acquire(schedule.pipeline_name):
502
+ logger.info(f"Skipping {schedule.pipeline_name}: locked by another instance")
503
+ return
504
+
505
+ execution = ScheduleExecution(
506
+ schedule_name=schedule.pipeline_name,
507
+ started_at=datetime.now(),
508
+ )
509
+
510
+ try:
511
+ logger.info(f"Starting scheduled run: {schedule.pipeline_name}")
512
+ schedule.pipeline_func()
513
+ execution.success = True
514
+ schedule.last_run = datetime.now(pytz.timezone(schedule.timezone)) if pytz else datetime.now()
515
+
516
+ if self.on_success:
517
+ self.on_success(schedule, execution)
518
+ except Exception as e:
519
+ logger.error(f"Schedule {schedule.pipeline_name} failed: {e}")
520
+ execution.error = str(e)
521
+ if self.on_failure:
522
+ self.on_failure(schedule, execution, e)
523
+ finally:
524
+ execution.completed_at = datetime.now()
525
+ execution.duration_seconds = (execution.completed_at - execution.started_at).total_seconds()
526
+ self.metrics.update(execution)
527
+
528
+ if self.config.distributed:
529
+ self._lock.release(schedule.pipeline_name)
530
+
531
+ if self._persistence:
532
+ self._persistence.save_schedule(schedule)
533
+ self._persistence.save_execution(execution)
534
+
535
+ # Calculate next run
536
+ self._calculate_next_run(schedule)
537
+ if self._persistence:
538
+ self._persistence.save_schedule(schedule)
539
+
540
+ def get_history(self, schedule_name: str, limit: int = 50) -> list[dict[str, Any]]:
541
+ """Get execution history for a schedule."""
542
+ if self._persistence:
543
+ return self._persistence.get_history(schedule_name, limit)
544
+ return []
545
+
546
+ def _calculate_next_run(self, schedule: Schedule) -> None:
547
+ """Calculate next run time."""
548
+ if pytz:
549
+ tz = pytz.timezone(schedule.timezone)
550
+ now = datetime.now(tz)
551
+ else:
552
+ now = datetime.now()
553
+
554
+ if schedule.schedule_type == "daily":
555
+ schedule.next_run += timedelta(days=1)
556
+ elif schedule.schedule_type == "hourly":
557
+ schedule.next_run += timedelta(hours=1)
558
+ elif schedule.schedule_type == "interval":
559
+ interval = int(schedule.schedule_value)
560
+ schedule.next_run = now + timedelta(seconds=interval)
561
+ elif schedule.schedule_type == "cron" and croniter:
562
+ cron = croniter(schedule.schedule_value, now)
563
+ schedule.next_run = cron.get_next(datetime)
564
+
565
+ def _scheduler_loop(self) -> None:
566
+ """Main scheduler loop."""
567
+ # Load persisted schedules on startup
568
+ if self._persistence:
569
+ loaded = self._persistence.load_schedules(self._pipeline_funcs)
570
+ self.schedules.update(loaded)
571
+
572
+ while self.running:
573
+ self.metrics.last_heartbeat = datetime.now()
574
+
575
+ if pytz:
576
+ # Check schedules in their respective timezones
577
+ for _name, schedule in self.schedules.items():
578
+ if not schedule.enabled or not schedule.next_run:
579
+ continue
580
+
581
+ tz = pytz.timezone(schedule.timezone)
582
+ now = datetime.now(tz)
583
+
584
+ if schedule.next_run <= now:
585
+ threading.Thread(target=self._run_pipeline, args=(schedule,)).start()
586
+ else:
587
+ now = datetime.now()
588
+ for _name, schedule in self.schedules.items():
589
+ if schedule.enabled and schedule.next_run and schedule.next_run <= now:
590
+ threading.Thread(target=self._run_pipeline, args=(schedule,)).start()
591
+
592
+ time.sleep(self.config.check_interval_seconds)
593
+
594
+ def start(self, blocking: bool = False) -> None:
595
+ """Start the scheduler."""
596
+ if self.running:
597
+ return
598
+
599
+ self.running = True
600
+ if blocking:
601
+ self._scheduler_loop()
602
+ else:
603
+ self._thread = threading.Thread(target=self._scheduler_loop, daemon=True)
604
+ self._thread.start()
605
+
606
+ def stop(self) -> None:
607
+ """Stop the scheduler."""
608
+ self.running = False
609
+ if self._thread:
610
+ self._thread.join()
611
+
612
+ def list_schedules(self) -> list[Schedule]:
613
+ """List all schedules."""
614
+ return list(self.schedules.values())
615
+
616
+ def health_check(self) -> dict[str, Any]:
617
+ """Get scheduler health status."""
618
+ return {
619
+ "status": "running" if self.running else "stopped",
620
+ "num_schedules": len(self.schedules),
621
+ "enabled_schedules": sum(1 for s in self.schedules.values() if s.enabled),
622
+ "metrics": self.metrics.to_dict(),
623
+ "next_runs": [
624
+ {
625
+ "name": s.pipeline_name,
626
+ "next_run": s.next_run.isoformat() if s.next_run else None,
627
+ }
628
+ for s in sorted(self.schedules.values(), key=lambda x: x.next_run or datetime.max)[:5]
629
+ ],
630
+ }
@@ -0,0 +1,32 @@
1
+ """Scheduler configuration."""
2
+
3
+ import os
4
+ from typing import Optional
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class SchedulerConfig(BaseModel):
9
+ """Scheduler configuration."""
10
+
11
+ persist_schedules: bool = True
12
+ db_path: Optional[str] = None
13
+ distributed: bool = False
14
+ lock_backend: str = "file" # "file", "redis"
15
+ redis_url: Optional[str] = None
16
+ check_interval_seconds: int = 10
17
+ max_concurrent_runs: int = 5
18
+ timezone: str = "UTC"
19
+
20
+ @classmethod
21
+ def from_env(cls) -> "SchedulerConfig":
22
+ """Load from environment variables."""
23
+ return cls(
24
+ persist_schedules=os.getenv("FLOWYML_SCHEDULER_PERSIST", "true").lower() == "true",
25
+ db_path=os.getenv("FLOWYML_SCHEDULER_DB_PATH"),
26
+ distributed=os.getenv("FLOWYML_SCHEDULER_DISTRIBUTED", "false").lower() == "true",
27
+ lock_backend=os.getenv("FLOWYML_SCHEDULER_LOCK_BACKEND", "file"),
28
+ redis_url=os.getenv("FLOWYML_SCHEDULER_REDIS_URL"),
29
+ check_interval_seconds=int(os.getenv("FLOWYML_SCHEDULER_CHECK_INTERVAL", "10")),
30
+ max_concurrent_runs=int(os.getenv("FLOWYML_SCHEDULER_MAX_CONCURRENT", "5")),
31
+ timezone=os.getenv("FLOWYML_SCHEDULER_TIMEZONE", "UTC"),
32
+ )