FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -1,603 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Pipeline Runner."""
3
-
4
- from __future__ import annotations
5
-
6
- import datetime as dt
7
- import importlib.util
8
- import random
9
- import time
10
- from typing import Any, Callable
11
-
12
- import humanize
13
- from hamilton import driver
14
- from hamilton.execution import executors
15
- from hamilton.registry import disable_autoload
16
- from hamilton.telemetry import disable_telemetry
17
- from hamilton_sdk.api.clients import UnauthorizedException
18
- from requests.exceptions import ConnectionError, HTTPError
19
-
20
- from .. import settings
21
-
22
- if importlib.util.find_spec("opentelemetry"):
23
- from hamilton.plugins import h_opentelemetry
24
-
25
- from ..utils.open_telemetry import init_tracer
26
- else:
27
- h_opentelemetry = None
28
- init_tracer = None
29
-
30
- if importlib.util.find_spec("mlflow"):
31
- from hamilton.plugins import h_mlflow
32
- else:
33
- h_mlflow = None
34
-
35
- from hamilton.plugins import h_rich
36
- from hamilton.plugins.h_threadpool import FutureAdapter
37
- from hamilton_sdk.adapters import HamiltonTracker
38
- from hamilton_sdk.tracking import constants
39
- from loguru import logger
40
-
41
- if importlib.util.find_spec("distributed"):
42
- from dask import distributed
43
- from hamilton.plugins import h_dask
44
- else:
45
- distributed = None
46
-
47
-
48
- if importlib.util.find_spec("ray"):
49
- import ray
50
- from hamilton.plugins import h_ray
51
- else:
52
- h_ray = None
53
-
54
- from ..cfg import PipelineConfig, ProjectConfig
55
- from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
56
- from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
57
- from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
58
- from ..utils.logging import setup_logging
59
- from .base import load_module
60
-
61
- setup_logging(level=settings.LOG_LEVEL)
62
-
63
- # from .executor import get_executor
64
-
65
-
66
- class PipelineRunner:
67
- """PipelineRunner is responsible for executing a specific pipeline run.
68
- It handles the loading of the pipeline module, configuration, and execution"""
69
-
70
- def __init__(
71
- self,
72
- project_cfg: ProjectConfig,
73
- pipeline_cfg: PipelineConfig,
74
- ):
75
- self.project_cfg = project_cfg
76
- self.pipeline_cfg = pipeline_cfg
77
- self.name = pipeline_cfg.name
78
-
79
- if not settings.HAMILTON_TELEMETRY_ENABLED:
80
- disable_telemetry()
81
- if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
82
- disable_autoload()
83
-
84
- def __enter__(self):
85
- """Enable use as a context manager."""
86
- return self
87
-
88
- def __exit__(self, exc_type, exc_val, exc_tb):
89
- """No special cleanup required."""
90
- pass
91
-
92
- def _get_executor(
93
- self, executor_cfg: str | dict | ExecutorConfig | None = None
94
- ) -> tuple[executors.BaseExecutor, Callable | None]:
95
- """
96
- Get the executor based on the provided configuration.
97
-
98
- Args:
99
- executor (dict | None): Executor configuration.
100
-
101
- Returns:
102
- tuple[executors.BaseExecutor, Callable | None]: A tuple containing the executor and shutdown function.
103
- """
104
- logger.debug("Setting up executor...")
105
- if executor_cfg:
106
- if isinstance(executor_cfg, str):
107
- executor_cfg = ExecutorConfig(type=executor_cfg)
108
- elif isinstance(executor_cfg, dict):
109
- executor_cfg = ExecutorConfig.from_dict(executor_cfg)
110
- elif not isinstance(executor_cfg, ExecutorConfig):
111
- raise TypeError(
112
- "Executor must be a string, dictionary, or ExecutorConfig instance."
113
- )
114
-
115
- executor_cfg = self.pipeline_cfg.run.executor.merge(executor_cfg)
116
- else:
117
- executor_cfg = self.pipeline_cfg.run.executor
118
-
119
- if executor_cfg.type is None:
120
- logger.debug(
121
- "No executor type specified. Using SynchronousLocalTaskExecutor as default."
122
- )
123
- return executors.SynchronousLocalTaskExecutor(), None
124
-
125
- if executor_cfg.type == "threadpool":
126
- logger.debug(
127
- f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
128
- )
129
- return executors.MultiThreadingExecutor(
130
- max_tasks=executor_cfg.max_workers
131
- ), None
132
- elif executor_cfg.type == "processpool":
133
- logger.debug(
134
- f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
135
- )
136
- return executors.MultiProcessingExecutor(
137
- max_tasks=executor_cfg.max_workers
138
- ), None
139
- elif executor_cfg.type == "ray":
140
- if h_ray:
141
- logger.debug(
142
- f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
143
- )
144
-
145
- return (
146
- h_ray.RayTaskExecutor(
147
- num_cpus=executor_cfg.num_cpus,
148
- ray_init_config=self.project_cfg.adapter.ray.ray_init_config,
149
- ),
150
- ray.shutdown
151
- if self.project_cfg.adapter.ray.shutdown_ray_on_completion
152
- else None,
153
- )
154
- else:
155
- logger.warning("Ray is not installed. Using local executor.")
156
- return executors.SynchronousLocalTaskExecutor(), None
157
- elif executor_cfg.type == "dask":
158
- if distributed:
159
- cluster = distributed.LocalCluster()
160
- client = distributed.Client(cluster)
161
- return h_dask.DaskExecutor(client=client), cluster.close
162
- else:
163
- logger.warning("Dask is not installed. Using local executor.")
164
- return executors.SynchronousLocalTaskExecutor(), None
165
- else:
166
- logger.warning(
167
- f"Unknown executor type: {executor_cfg.type}. Using local executor."
168
- )
169
- return executors.SynchronousLocalTaskExecutor(), None
170
-
171
- def _get_adapters(
172
- self,
173
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
174
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
175
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
176
- adapter: dict[str, Any] | None = None,
177
- ) -> list:
178
- """
179
- Set the adapters for the pipeline.
180
-
181
- Args:
182
- with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
183
- Overrides the with_adapter settings in the pipeline config.
184
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
185
- Overrides the adapter settings in the pipeline config.
186
- project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
187
- Overrides the adapter settings in the project config.
188
- adapter (dict[str, Any] | None): Any additional hamilton adapters can be passed here.
189
- """
190
- logger.debug("Setting up adapters...")
191
- if with_adapter_cfg:
192
- if isinstance(with_adapter_cfg, dict):
193
- with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
194
- elif not isinstance(with_adapter_cfg, WithAdapterConfig):
195
- raise TypeError(
196
- "with_adapter must be a dictionary or WithAdapterConfig instance."
197
- )
198
-
199
- with_adapter_cfg = self.pipeline_cfg.run.with_adapter.merge(
200
- with_adapter_cfg
201
- )
202
- else:
203
- with_adapter_cfg = self.pipeline_cfg.run.with_adapter
204
-
205
- if pipeline_adapter_cfg:
206
- if isinstance(pipeline_adapter_cfg, dict):
207
- pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
208
- pipeline_adapter_cfg
209
- )
210
- elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
211
- raise TypeError(
212
- "pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
213
- )
214
-
215
- pipeline_adapter_cfg = self.pipeline_cfg.adapter.merge(pipeline_adapter_cfg)
216
- else:
217
- pipeline_adapter_cfg = self.pipeline_cfg.adapter
218
-
219
- if project_adapter_cfg:
220
- if isinstance(project_adapter_cfg, dict):
221
- project_adapter_cfg = ProjectAdapterConfig.from_dict(
222
- project_adapter_cfg
223
- )
224
- elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
225
- raise TypeError(
226
- "project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
227
- )
228
-
229
- project_adapter_cfg = self.project_cfg.adapter.merge(project_adapter_cfg)
230
- else:
231
- project_adapter_cfg = self.project_cfg.adapter
232
-
233
- adapters = []
234
- if with_adapter_cfg.hamilton_tracker:
235
- tracker_kwargs = project_adapter_cfg.hamilton_tracker.to_dict()
236
- tracker_kwargs.update(pipeline_adapter_cfg.hamilton_tracker.to_dict())
237
- tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
238
- tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
239
-
240
- constants.MAX_DICT_LENGTH_CAPTURE = (
241
- tracker_kwargs.pop("max_dict_length_capture", None)
242
- or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
243
- )
244
- constants.MAX_LIST_LENGTH_CAPTURE = (
245
- tracker_kwargs.pop("max_list_length_capture", None)
246
- or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
247
- )
248
- constants.CAPTURE_DATA_STATISTICS = (
249
- tracker_kwargs.pop("capture_data_statistics", None)
250
- or settings.HAMILTON_CAPTURE_DATA_STATISTICS
251
- )
252
-
253
- tracker = HamiltonTracker(**tracker_kwargs)
254
-
255
- adapters.append(tracker)
256
-
257
- if with_adapter_cfg.mlflow:
258
- if h_mlflow is None:
259
- logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
260
- else:
261
- mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
262
- mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
263
- mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
264
- adapters.append(mlflow_adapter)
265
-
266
- if with_adapter_cfg.opentelemetry:
267
- if h_opentelemetry is None:
268
- logger.warning(
269
- "OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
270
- )
271
- else:
272
- otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
273
- otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
274
- trace = init_tracer(**otel_kwargs, name=self.project_cfg.name)
275
- tracer = trace.get_tracer(self.name)
276
- otel_adapter = h_opentelemetry.OpenTelemetryTracer(
277
- tracer_name=f"{self.project_cfg.name}.{self.name}",
278
- tracer=tracer,
279
- )
280
- adapters.append(otel_adapter)
281
-
282
- if with_adapter_cfg.progressbar:
283
- adapters.append(
284
- h_rich.RichProgressBar(run_desc=f"{self.project_cfg.name}.{self.name}")
285
- )
286
-
287
- if with_adapter_cfg.future:
288
- adapters.append(FutureAdapter())
289
-
290
- if with_adapter_cfg.ray:
291
- if h_ray is None:
292
- logger.warning("Ray is not installed. Skipping Ray adapter.")
293
- else:
294
- ray_kwargs = project_adapter_cfg.ray.to_dict()
295
- ray_kwargs.update(pipeline_adapter_cfg.ray.to_dict())
296
- ray_adapter = h_ray.RayGraphAdapter(**ray_kwargs)
297
- adapters.append(ray_adapter)
298
-
299
- all_adapters = [
300
- f"{adp}: ✅" if enabled else f"{adp}: ❌"
301
- for adp, enabled in with_adapter_cfg.to_dict().items()
302
- ]
303
-
304
- if adapter:
305
- adapters += list(adapter.values())
306
- all_adapters += [f"{adp}: ✅" for adp in adapter.keys()]
307
-
308
- logger.debug(f"Adapters enabled: {' | '.join(all_adapters)}")
309
- return adapters
310
-
311
- def _get_driver(
312
- self,
313
- config: dict | None = None,
314
- cache: bool | dict = False,
315
- executor_cfg: str | dict | ExecutorConfig | None = None,
316
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
317
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
318
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
319
- adapter: dict[str, Any] | None = None,
320
- reload: bool = False,
321
- ) -> tuple[driver.Driver, Callable | None]:
322
- """
323
- Get the driver and shutdown function for a given pipeline.
324
-
325
- Args:
326
- config (dict | None): The configuration for the pipeline.
327
- cache (bool): Use cache or not.
328
- To fine tune the cache settings, pass a dictionary with the cache settings
329
- or adjust the pipeline config.
330
- If set to True, the default cache settings will be used.
331
- executor_cfg (str | dict | ExecutorConfig | None): The executor to use.
332
- Overrides the executor settings in the pipeline config.
333
- with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
334
- Overrides the with_adapter settings in the pipeline config.
335
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
336
- Overrides the adapter settings in the pipeline config.
337
- project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
338
- Overrides the adapter settings in the project config.
339
- adapter (dict[str, Any] | None): Any additional Hamilton adapters can be passed here.
340
- reload (bool): Whether to reload the module.
341
-
342
-
343
- Returns:
344
- tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
345
- """
346
- logger.debug("Setting up driver...")
347
- module = load_module(name=self.name, reload=reload)
348
- executor, shutdown = self._get_executor(executor_cfg)
349
- adapters = self._get_adapters(
350
- with_adapter_cfg,
351
- pipeline_adapter_cfg,
352
- project_adapter_cfg,
353
- adapter=adapter,
354
- )
355
-
356
- config = config or self.pipeline_cfg.run.config
357
-
358
- dr = (
359
- driver.Builder()
360
- .enable_dynamic_execution(allow_experimental_mode=True)
361
- .with_modules(module)
362
- .with_config(config)
363
- .with_local_executor(executors.SynchronousLocalTaskExecutor())
364
- )
365
-
366
- if cache:
367
- if isinstance(cache, dict):
368
- cache = cache or self.pipeline_cfg.run.cache
369
- dr = dr.with_cache(**cache)
370
- else:
371
- dr = dr.with_cache()
372
-
373
- if executor:
374
- dr = dr.with_remote_executor(executor)
375
-
376
- if adapters:
377
- dr = dr.with_adapters(*adapters)
378
-
379
- dr = dr.build()
380
- return dr, shutdown
381
-
382
- def run(
383
- self,
384
- inputs: dict | None = None,
385
- final_vars: list[str] | None = None,
386
- config: dict | None = None,
387
- cache: dict | None = None,
388
- executor_cfg: str | dict | ExecutorConfig | None = None,
389
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
390
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
391
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
392
- adapter: dict[str, Any] | None = None,
393
- reload: bool = False,
394
- log_level: str | None = None,
395
- max_retries: int | None = None,
396
- retry_delay: float | None = None,
397
- jitter_factor: float | None = None,
398
- retry_exceptions: tuple = (
399
- Exception,
400
- HTTPError,
401
- UnauthorizedException,
402
- ),
403
- ) -> dict[str, Any]:
404
- """
405
- Run the pipeline with the given parameters.
406
- Args:
407
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
408
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
409
- config (dict | None, optional): The config for the hamilton driver. Defaults to None.
410
- cache (dict | None, optional): The cache configuration. Defaults to None.
411
- executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
412
- Overrides the executor settings in the pipeline config. Defaults to None.
413
- with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
414
- Overrides the with_adapter settings in the pipeline config. Defaults to None.
415
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
416
- Overrides the adapter settings in the pipeline config. Defaults to None.
417
- project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
418
- Overrides the adapter settings in the project config. Defaults to None.
419
- adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
420
- reload (bool, optional): Whether to reload the module. Defaults to False.
421
- log_level (str | None, optional): The log level to use. Defaults to None.
422
- max_retries (int | None, optional): The maximum number of retry attempts. Defaults to None.
423
- retry_delay (float | None, optional): The base delay between retries in seconds. Defaults to None.
424
- jitter_factor (float | None, optional): The factor to apply for jitter. Defaults to None.
425
- retry_exceptions: tuple | None, optional): The exceptions to catch for retries.
426
- Defaults to (Exception, HTTPError, UnauthorizedException).
427
-
428
- Returns:
429
- dict[str, Any]: The result of executing the pipeline.
430
- """
431
- self.start_time = dt.datetime.now()
432
-
433
- if log_level or self.pipeline_cfg.run.log_level:
434
- setup_logging(level=log_level or self.pipeline_cfg.run.log_level)
435
-
436
- logger.info(f"Starting pipeline {self.project_cfg.name}.{self.name}")
437
-
438
- final_vars = final_vars or self.pipeline_cfg.run.final_vars
439
- inputs = {
440
- **(self.pipeline_cfg.run.inputs or {}),
441
- **(inputs or {}),
442
- } # <-- inputs override and/or extend config inputs
443
-
444
- max_retries = max_retries or self.pipeline_cfg.run.max_retries
445
- retry_delay = retry_delay or self.pipeline_cfg.run.retry_delay
446
- jitter_factor = jitter_factor or self.pipeline_cfg.run.jitter_factor
447
- retry_exceptions = retry_exceptions or self.pipeline_cfg.run.retry_exceptions
448
-
449
- if not isinstance(retry_exceptions, (tuple, list)):
450
- retry_exceptions = [retry_exceptions]
451
- retry_exceptions = [
452
- eval(exc) if isinstance(exc, str) else exc for exc in retry_exceptions
453
- ]
454
-
455
- attempts = 1
456
- last_exception = None
457
-
458
- while attempts <= max_retries:
459
- logger.debug(f"Attempting to execute pipeline {attempts}/{max_retries}")
460
- try:
461
- dr, shutdown = self._get_driver(
462
- config=config,
463
- cache=cache,
464
- executor_cfg=executor_cfg,
465
- with_adapter_cfg=with_adapter_cfg,
466
- pipeline_adapter_cfg=pipeline_adapter_cfg,
467
- project_adapter_cfg=project_adapter_cfg,
468
- adapter=adapter,
469
- reload=reload,
470
- )
471
-
472
- res = dr.execute(final_vars=final_vars, inputs=inputs)
473
- self.end_time = dt.datetime.now()
474
- self.execution_time = self.end_time - self.start_time
475
- logger.success(
476
- f"Finished: Pipeline {self.project_cfg.name}.{self.name} executed in {humanize.naturaldelta(self.execution_time)}"
477
- )
478
-
479
- if shutdown is not None:
480
- logger.info("Shutting down executor...")
481
- shutdown()
482
- logger.info("Executor shut down.")
483
-
484
- return res
485
- except tuple(retry_exceptions) as e:
486
- # set success to False and handle retries
487
-
488
- if (
489
- isinstance(e, HTTPError)
490
- or isinstance(e, UnauthorizedException)
491
- or isinstance(e, ConnectionError)
492
- ):
493
- if with_adapter_cfg["hamilton_tracker"]:
494
- logger.info(
495
- "Hamilton Tracker is enabled. Disabling tracker for the next run."
496
- )
497
- with_adapter_cfg["hamilton_tracker"] = False
498
-
499
- attempts += 1
500
- last_exception = e
501
-
502
- if attempts <= max_retries:
503
- logger.warning(
504
- f"Pipeline execution failed (attempt {attempts}/{max_retries}): {e}"
505
- )
506
-
507
- # Calculate base delay with exponential backoff
508
- base_delay = retry_delay * (2 ** (attempts - 1))
509
-
510
- # Add jitter: random value between -jitter_factor and +jitter_factor of the base delay
511
- jitter = base_delay * jitter_factor * (2 * random.random() - 1)
512
- actual_delay = max(
513
- 0, base_delay + jitter
514
- ) # Ensure non-negative delay
515
-
516
- logger.debug(
517
- f"Retrying in {actual_delay:.2f} seconds (base: {base_delay:.2f}s, jitter: {jitter:.2f}s)"
518
- )
519
- time.sleep(actual_delay)
520
-
521
- else:
522
- # Last attempt failed
523
- logger.error(
524
- f"Pipeline execution failed after {max_retries} attempts"
525
- )
526
- raise last_exception
527
-
528
-
529
- def run_pipeline(
530
- project_cfg: ProjectConfig,
531
- pipeline_cfg: PipelineConfig,
532
- inputs: dict | None = None,
533
- final_vars: list[str] | None = None,
534
- config: dict | None = None,
535
- cache: dict | None = None,
536
- executor_cfg: str | dict | ExecutorConfig | None = None,
537
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
538
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
539
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
540
- adapter: dict[str, Any] | None = None,
541
- reload: bool = False,
542
- log_level: str | None = None,
543
- max_retries: int = 0,
544
- retry_delay: float = 1.0,
545
- jitter_factor: float = 0.1,
546
- retry_exceptions: tuple = (
547
- Exception,
548
- HTTPError,
549
- UnauthorizedException,
550
- ), # Adjust to specific exceptions
551
- ) -> dict[str, Any]:
552
- """Run the pipeline with the given parameters.
553
-
554
- Args:
555
-
556
- project_cfg (ProjectConfig): The project configuration.
557
- pipeline_cfg (PipelineConfig): The pipeline configuration.
558
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
559
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
560
- config (dict | None, optional): The config for the hamilton driver. Defaults to None.
561
- cache (dict | None, optional): The cache configuration. Defaults to None.
562
- executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
563
- Overrides the executor settings in the pipeline config. Defaults to None.
564
- with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
565
- Overrides the with_adapter settings in the pipeline config. Defaults to None.
566
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
567
- Overrides the adapter settings in the pipeline config. Defaults to None.
568
- project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
569
- Overrides the adapter settings in the project config. Defaults to None.
570
- adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
571
- reload (bool, optional): Whether to reload the module. Defaults to False.
572
- log_level (str | None, optional): The log level to use. Defaults to None.
573
- max_retries (int, optional): The maximum number of retry attempts. Defaults to 0.
574
- retry_delay (float, optional): The base delay between retries in seconds. Defaults to 1.0.
575
- jitter_factor (float, optional): The factor to apply for jitter. Defaults to 0.1.
576
- retry_exceptions (tuple, optional): A tuple of exception classes to catch for retries. Defaults to (Exception,).
577
-
578
- Returns:
579
-
580
- dict[str, Any]: The result of executing the pipeline.
581
-
582
- Raises:
583
- Exception: If the pipeline execution fails after the maximum number of retries.
584
- """
585
-
586
- with PipelineRunner(project_cfg, pipeline_cfg) as runner:
587
- return runner.run(
588
- inputs=inputs,
589
- final_vars=final_vars,
590
- config=config,
591
- cache=cache,
592
- executor_cfg=executor_cfg,
593
- with_adapter_cfg=with_adapter_cfg,
594
- pipeline_adapter_cfg=pipeline_adapter_cfg,
595
- project_adapter_cfg=project_adapter_cfg,
596
- adapter=adapter,
597
- reload=reload,
598
- log_level=log_level,
599
- max_retries=max_retries,
600
- retry_delay=retry_delay,
601
- jitter_factor=jitter_factor,
602
- retry_exceptions=retry_exceptions,
603
- )