ddeutil-workflow 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/job.py +73 -42
- ddeutil/workflow/on.py +5 -2
- ddeutil/workflow/scheduler.py +30 -29
- ddeutil/workflow/stage.py +43 -10
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/METADATA +78 -32
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/RECORD +11 -11
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/WHEEL +1 -1
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.12.dist-info → ddeutil_workflow-0.0.13.dist-info}/top_level.txt +0 -0
ddeutil/workflow/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.0.
|
1
|
+
__version__: str = "0.0.13"
|
ddeutil/workflow/job.py
CHANGED
@@ -14,11 +14,13 @@ from concurrent.futures import (
|
|
14
14
|
as_completed,
|
15
15
|
wait,
|
16
16
|
)
|
17
|
+
from functools import lru_cache
|
17
18
|
from pickle import PickleError
|
18
19
|
from textwrap import dedent
|
19
20
|
from threading import Event
|
20
21
|
from typing import Optional
|
21
22
|
|
23
|
+
from ddeutil.core import freeze_args
|
22
24
|
from pydantic import BaseModel, Field
|
23
25
|
from pydantic.functional_validators import field_validator, model_validator
|
24
26
|
from typing_extensions import Self
|
@@ -53,12 +55,70 @@ logger = get_logger("ddeutil.workflow")
|
|
53
55
|
__all__: TupleStr = (
|
54
56
|
"Strategy",
|
55
57
|
"Job",
|
58
|
+
"make",
|
56
59
|
)
|
57
60
|
|
58
61
|
|
62
|
+
@freeze_args
|
63
|
+
@lru_cache
|
64
|
+
def make(matrix, include, exclude) -> list[DictStr]:
|
65
|
+
"""Return List of product of matrix values that already filter with
|
66
|
+
exclude and add include.
|
67
|
+
|
68
|
+
:param matrix: A matrix values that want to cross product to possible
|
69
|
+
parallelism values.
|
70
|
+
:param include: A list of additional matrix that want to adds-in.
|
71
|
+
:param exclude: A list of exclude matrix that want to filter-out.
|
72
|
+
:rtype: list[DictStr]
|
73
|
+
"""
|
74
|
+
# NOTE: If it does not set matrix, it will return list of an empty dict.
|
75
|
+
if not (mt := matrix):
|
76
|
+
return [{}]
|
77
|
+
|
78
|
+
final: list[DictStr] = []
|
79
|
+
for r in cross_product(matrix=mt):
|
80
|
+
if any(
|
81
|
+
all(r[k] == v for k, v in exclude.items()) for exclude in exclude
|
82
|
+
):
|
83
|
+
continue
|
84
|
+
final.append(r)
|
85
|
+
|
86
|
+
# NOTE: If it is empty matrix and include, it will return list of an
|
87
|
+
# empty dict.
|
88
|
+
if not final and not include:
|
89
|
+
return [{}]
|
90
|
+
|
91
|
+
# NOTE: Add include to generated matrix with exclude list.
|
92
|
+
add: list[DictStr] = []
|
93
|
+
for inc in include:
|
94
|
+
# VALIDATE:
|
95
|
+
# Validate any key in include list should be a subset of some one
|
96
|
+
# in matrix.
|
97
|
+
if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
|
98
|
+
raise ValueError("Include should have the keys equal to matrix")
|
99
|
+
|
100
|
+
# VALIDATE:
|
101
|
+
# Validate value of include does not duplicate with generated
|
102
|
+
# matrix.
|
103
|
+
if any(
|
104
|
+
all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
|
105
|
+
):
|
106
|
+
continue
|
107
|
+
add.append(inc)
|
108
|
+
final.extend(add)
|
109
|
+
return final
|
110
|
+
|
111
|
+
|
59
112
|
class Strategy(BaseModel):
|
60
113
|
"""Strategy Model that will combine a matrix together for running the
|
61
|
-
special job.
|
114
|
+
special job with combination of matrix data.
|
115
|
+
|
116
|
+
This model does not be the part of job only because you can use it to
|
117
|
+
any model object. The propose of this model is generate metrix result that
|
118
|
+
comming from combination logic with any matrix values for running it with
|
119
|
+
parallelism.
|
120
|
+
|
121
|
+
[1, 2, 3] x [a, b] --> [1a], [1b], [2a], [2b], [3a], [3b]
|
62
122
|
|
63
123
|
Data Validate:
|
64
124
|
>>> strategy = {
|
@@ -105,13 +165,19 @@ class Strategy(BaseModel):
|
|
105
165
|
def __prepare_keys(cls, values: DictData) -> DictData:
|
106
166
|
"""Rename key that use dash to underscore because Python does not
|
107
167
|
support this character exist in any variable name.
|
168
|
+
|
169
|
+
:param values: A parsing values to this models
|
170
|
+
:rtype: DictData
|
108
171
|
"""
|
109
172
|
dash2underscore("max-parallel", values)
|
110
173
|
dash2underscore("fail-fast", values)
|
111
174
|
return values
|
112
175
|
|
113
176
|
def is_set(self) -> bool:
|
114
|
-
"""Return True if this strategy was set from yaml template.
|
177
|
+
"""Return True if this strategy was set from yaml template.
|
178
|
+
|
179
|
+
:rtype: bool
|
180
|
+
"""
|
115
181
|
return len(self.matrix) > 0
|
116
182
|
|
117
183
|
def make(self) -> list[DictStr]:
|
@@ -120,44 +186,7 @@ class Strategy(BaseModel):
|
|
120
186
|
|
121
187
|
:rtype: list[DictStr]
|
122
188
|
"""
|
123
|
-
|
124
|
-
if not (mt := self.matrix):
|
125
|
-
return [{}]
|
126
|
-
|
127
|
-
final: list[DictStr] = []
|
128
|
-
for r in cross_product(matrix=mt):
|
129
|
-
if any(
|
130
|
-
all(r[k] == v for k, v in exclude.items())
|
131
|
-
for exclude in self.exclude
|
132
|
-
):
|
133
|
-
continue
|
134
|
-
final.append(r)
|
135
|
-
|
136
|
-
# NOTE: If it is empty matrix and include, it will return list of an
|
137
|
-
# empty dict.
|
138
|
-
if not final and not self.include:
|
139
|
-
return [{}]
|
140
|
-
|
141
|
-
# NOTE: Add include to generated matrix with exclude list.
|
142
|
-
add: list[DictStr] = []
|
143
|
-
for include in self.include:
|
144
|
-
# VALIDATE:
|
145
|
-
# Validate any key in include list should be a subset of some one
|
146
|
-
# in matrix.
|
147
|
-
if all(not (set(include.keys()) <= set(m.keys())) for m in final):
|
148
|
-
raise ValueError("Include should have the keys equal to matrix")
|
149
|
-
|
150
|
-
# VALIDATE:
|
151
|
-
# Validate value of include does not duplicate with generated
|
152
|
-
# matrix.
|
153
|
-
if any(
|
154
|
-
all(include.get(k) == v for k, v in m.items())
|
155
|
-
for m in [*final, *add]
|
156
|
-
):
|
157
|
-
continue
|
158
|
-
add.append(include)
|
159
|
-
final.extend(add)
|
160
|
-
return final
|
189
|
+
return make(self.matrix, self.include, self.exclude)
|
161
190
|
|
162
191
|
|
163
192
|
class Job(BaseModel):
|
@@ -238,6 +267,7 @@ class Job(BaseModel):
|
|
238
267
|
|
239
268
|
@model_validator(mode="after")
|
240
269
|
def __prepare_running_id(self):
|
270
|
+
"""Prepare the job running ID."""
|
241
271
|
if self.run_id is None:
|
242
272
|
self.run_id = gen_id(self.id or "", unique=True)
|
243
273
|
|
@@ -487,7 +517,7 @@ class Job(BaseModel):
|
|
487
517
|
stop all not done futures if it receive the first exception from all
|
488
518
|
running futures.
|
489
519
|
|
490
|
-
:param event:
|
520
|
+
:param event: An event
|
491
521
|
:param futures: A list of futures.
|
492
522
|
:rtype: Result
|
493
523
|
"""
|
@@ -529,7 +559,8 @@ class Job(BaseModel):
|
|
529
559
|
def __catch_all_completed(self, futures: list[Future]) -> Result:
|
530
560
|
"""Job parallel pool futures catching with all-completed mode.
|
531
561
|
|
532
|
-
:param futures: A list of futures
|
562
|
+
:param futures: A list of futures that want to catch all completed
|
563
|
+
result.
|
533
564
|
:rtype: Result
|
534
565
|
"""
|
535
566
|
context: DictData = {}
|
ddeutil/workflow/on.py
CHANGED
@@ -20,6 +20,7 @@ from .utils import Loader
|
|
20
20
|
|
21
21
|
__all__: TupleStr = (
|
22
22
|
"On",
|
23
|
+
"YearOn",
|
23
24
|
"interval2crontab",
|
24
25
|
)
|
25
26
|
|
@@ -187,8 +188,10 @@ class On(BaseModel):
|
|
187
188
|
return self.cronjob.schedule(date=start, tz=self.tz).next
|
188
189
|
|
189
190
|
|
190
|
-
class
|
191
|
-
"""Implement On
|
191
|
+
class YearOn(On):
|
192
|
+
"""Implement On Year Schedule Model for limit year matrix that use by some
|
193
|
+
data schedule tools like AWS Glue.
|
194
|
+
"""
|
192
195
|
|
193
196
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
194
197
|
|
ddeutil/workflow/scheduler.py
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import copy
|
9
|
+
import inspect
|
9
10
|
import json
|
10
11
|
import logging
|
11
12
|
import os
|
@@ -37,7 +38,7 @@ try:
|
|
37
38
|
except ImportError:
|
38
39
|
CancelJob = None
|
39
40
|
|
40
|
-
from .__types import DictData
|
41
|
+
from .__types import DictData, TupleStr
|
41
42
|
from .cron import CronRunner
|
42
43
|
from .exceptions import JobException, WorkflowException
|
43
44
|
from .job import Job
|
@@ -57,16 +58,20 @@ from .utils import (
|
|
57
58
|
|
58
59
|
load_dotenv()
|
59
60
|
logger = get_logger("ddeutil.workflow")
|
61
|
+
|
62
|
+
# NOTE: Adjust logging level on the schedule package.
|
60
63
|
logging.getLogger("schedule").setLevel(logging.INFO)
|
61
64
|
|
62
65
|
|
63
|
-
__all__ = (
|
66
|
+
__all__: TupleStr = (
|
64
67
|
"Workflow",
|
65
68
|
"WorkflowSchedule",
|
66
69
|
"WorkflowTask",
|
67
70
|
"Schedule",
|
68
|
-
"workflow_runner",
|
69
71
|
"workflow_task",
|
72
|
+
"workflow_long_running_task",
|
73
|
+
"workflow_control",
|
74
|
+
"workflow_runner",
|
70
75
|
)
|
71
76
|
|
72
77
|
|
@@ -423,7 +428,6 @@ class Workflow(BaseModel):
|
|
423
428
|
|
424
429
|
worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
|
425
430
|
with ThreadPoolExecutor(max_workers=worker) as executor:
|
426
|
-
# TODO: If I want to run infinite loop.
|
427
431
|
futures: list[Future] = []
|
428
432
|
for on in self.on:
|
429
433
|
futures.append(
|
@@ -675,7 +679,7 @@ class Workflow(BaseModel):
|
|
675
679
|
|
676
680
|
|
677
681
|
class WorkflowSchedule(BaseModel):
|
678
|
-
"""Workflow schedule Pydantic
|
682
|
+
"""Workflow schedule Pydantic model."""
|
679
683
|
|
680
684
|
name: str = Field(description="A workflow name.")
|
681
685
|
on: list[On] = Field(
|
@@ -783,12 +787,16 @@ class Schedule(BaseModel):
|
|
783
787
|
queue[wfs.name]: list[datetime] = []
|
784
788
|
running[wfs.name]: list[datetime] = []
|
785
789
|
|
786
|
-
|
790
|
+
# NOTE: Create default on if it does not passing on the Schedule.
|
791
|
+
_ons: list[On] = wf.on.copy() if len(wfs.on) == 0 else wfs.on
|
792
|
+
|
793
|
+
for on in _ons:
|
787
794
|
on_gen = on.generate(start_date)
|
788
795
|
next_running_date = on_gen.next
|
789
796
|
while next_running_date in queue[wfs.name]:
|
790
797
|
next_running_date = on_gen.next
|
791
798
|
|
799
|
+
# NOTE: Push the next running date to queue list.
|
792
800
|
heappush(queue[wfs.name], next_running_date)
|
793
801
|
|
794
802
|
workflow_tasks.append(
|
@@ -808,34 +816,27 @@ def catch_exceptions(cancel_on_failure=False):
|
|
808
816
|
"""Catch exception error from scheduler job."""
|
809
817
|
|
810
818
|
def catch_exceptions_decorator(func):
|
811
|
-
@wraps(func)
|
812
|
-
def wrapper(*args, **kwargs):
|
813
|
-
try:
|
814
|
-
return func(*args, **kwargs)
|
815
|
-
except Exception as err:
|
816
|
-
logger.exception(err)
|
817
|
-
if cancel_on_failure:
|
818
|
-
return CancelJob
|
819
819
|
|
820
|
-
|
820
|
+
try:
|
821
|
+
if inspect.ismethod(func):
|
821
822
|
|
822
|
-
|
823
|
+
@wraps(func)
|
824
|
+
def wrapper(self, *args, **kwargs):
|
825
|
+
return func(self, *args, **kwargs)
|
823
826
|
|
827
|
+
return wrapper
|
824
828
|
|
825
|
-
|
826
|
-
|
829
|
+
@wraps(func)
|
830
|
+
def wrapper(*args, **kwargs):
|
831
|
+
return func(*args, **kwargs)
|
827
832
|
|
828
|
-
|
829
|
-
@wraps(func)
|
830
|
-
def wrapper(self, *args, **kwargs):
|
831
|
-
try:
|
832
|
-
return func(self, *args, **kwargs)
|
833
|
-
except Exception as err:
|
834
|
-
logger.exception(err)
|
835
|
-
if cancel_on_failure:
|
836
|
-
return CancelJob
|
833
|
+
return wrapper
|
837
834
|
|
838
|
-
|
835
|
+
except Exception as err:
|
836
|
+
logger.exception(err)
|
837
|
+
if cancel_on_failure:
|
838
|
+
return CancelJob
|
839
|
+
raise err
|
839
840
|
|
840
841
|
return catch_exceptions_decorator
|
841
842
|
|
@@ -852,7 +853,7 @@ class WorkflowTask:
|
|
852
853
|
queue: list[datetime] = field(compare=False, hash=False)
|
853
854
|
running: list[datetime] = field(compare=False, hash=False)
|
854
855
|
|
855
|
-
@
|
856
|
+
@catch_exceptions(cancel_on_failure=True)
|
856
857
|
def release(self, log: Log | None = None) -> None:
|
857
858
|
"""Workflow release, it will use with the same logic of
|
858
859
|
`workflow.release` method.
|
ddeutil/workflow/stage.py
CHANGED
@@ -62,12 +62,15 @@ logger = get_logger("ddeutil.workflow")
|
|
62
62
|
|
63
63
|
|
64
64
|
__all__: TupleStr = (
|
65
|
-
"
|
65
|
+
"BaseStage",
|
66
66
|
"EmptyStage",
|
67
67
|
"BashStage",
|
68
68
|
"PyStage",
|
69
69
|
"HookStage",
|
70
70
|
"TriggerStage",
|
71
|
+
"Stage",
|
72
|
+
"HookSearch",
|
73
|
+
"extract_hook",
|
71
74
|
"handler_result",
|
72
75
|
)
|
73
76
|
|
@@ -76,6 +79,14 @@ def handler_result(message: str | None = None) -> Callable[P, Result]:
|
|
76
79
|
"""Decorator function for handler result from the stage execution. This
|
77
80
|
function should to use with execution method only.
|
78
81
|
|
82
|
+
This stage exception handler still use ok-error concept but it allow
|
83
|
+
you force catching an output result with error message by specific
|
84
|
+
environment variable,`WORKFLOW_CORE_STAGE_RAISE_ERROR`.
|
85
|
+
|
86
|
+
Execution --> Ok --> Result with 0
|
87
|
+
--> Error --> Raise StageException
|
88
|
+
--> Result with 1 (if env var was set)
|
89
|
+
|
79
90
|
:param message: A message that want to add at prefix of exception statement.
|
80
91
|
"""
|
81
92
|
message: str = message or ""
|
@@ -106,6 +117,9 @@ def handler_result(message: str | None = None) -> Callable[P, Result]:
|
|
106
117
|
f"{self.__class__.__name__}: {message}\n\t"
|
107
118
|
f"{err.__class__.__name__}: {err}"
|
108
119
|
) from None
|
120
|
+
|
121
|
+
# NOTE: Catching exception error object to result with
|
122
|
+
# error_message key.
|
109
123
|
rs: Result = Result(
|
110
124
|
status=1,
|
111
125
|
context={
|
@@ -209,6 +223,7 @@ class BaseStage(BaseModel, ABC):
|
|
209
223
|
if self.id:
|
210
224
|
_id: str = param2template(self.id, params=to)
|
211
225
|
else:
|
226
|
+
# NOTE: If the stage ID did not set, it will use its name instead.
|
212
227
|
_id: str = gen_id(param2template(self.name, params=to))
|
213
228
|
|
214
229
|
# NOTE: Set the output to that stage generated ID.
|
@@ -219,7 +234,8 @@ class BaseStage(BaseModel, ABC):
|
|
219
234
|
return to
|
220
235
|
|
221
236
|
def is_skipped(self, params: DictData | None = None) -> bool:
|
222
|
-
"""Return true if condition of this stage do not correct.
|
237
|
+
"""Return true if condition of this stage do not correct. This process
|
238
|
+
use build-in eval function to execute the if-condition.
|
223
239
|
|
224
240
|
:param params: A parameters that want to pass to condition template.
|
225
241
|
:rtype: bool
|
@@ -257,10 +273,15 @@ class EmptyStage(BaseStage):
|
|
257
273
|
|
258
274
|
def execute(self, params: DictData) -> Result:
|
259
275
|
"""Execution method for the Empty stage that do only logging out to
|
260
|
-
stdout.
|
276
|
+
stdout. This method does not use the `handler_result` decorator because
|
277
|
+
it does not get any error from logging function.
|
278
|
+
|
279
|
+
The result context should be empty and do not process anything
|
280
|
+
without calling logging function.
|
261
281
|
|
262
282
|
:param params: A context data that want to add output result. But this
|
263
283
|
stage does not pass any output.
|
284
|
+
:rtype: Result
|
264
285
|
"""
|
265
286
|
logger.info(
|
266
287
|
f"({self.run_id}) [STAGE]: Empty-Execute: {self.name!r}: "
|
@@ -302,6 +323,10 @@ class BashStage(BaseStage):
|
|
302
323
|
"""Return context of prepared bash statement that want to execute. This
|
303
324
|
step will write the `.sh` file before giving this file name to context.
|
304
325
|
After that, it will auto delete this file automatic.
|
326
|
+
|
327
|
+
:param bash: A bash statement that want to execute.
|
328
|
+
:param env: An environment variable that use on this bash statement.
|
329
|
+
:rtype: Iterator[TupleStr]
|
305
330
|
"""
|
306
331
|
f_name: str = f"{uuid.uuid4()}.sh"
|
307
332
|
f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
|
@@ -348,6 +373,7 @@ class BashStage(BaseStage):
|
|
348
373
|
text=True,
|
349
374
|
)
|
350
375
|
if rs.returncode > 0:
|
376
|
+
# NOTE: Prepare stderr message that returning from subprocess.
|
351
377
|
err: str = (
|
352
378
|
rs.stderr.encode("utf-8").decode("utf-16")
|
353
379
|
if "\\x00" in rs.stderr
|
@@ -368,8 +394,11 @@ class BashStage(BaseStage):
|
|
368
394
|
|
369
395
|
|
370
396
|
class PyStage(BaseStage):
|
371
|
-
"""Python executor stage that running the Python statement
|
372
|
-
globals
|
397
|
+
"""Python executor stage that running the Python statement with receiving
|
398
|
+
globals and additional variables.
|
399
|
+
|
400
|
+
This stage allow you to use any Python object that exists on the globals
|
401
|
+
such as import your installed package.
|
373
402
|
|
374
403
|
Data Validate:
|
375
404
|
>>> stage = {
|
@@ -392,7 +421,8 @@ class PyStage(BaseStage):
|
|
392
421
|
)
|
393
422
|
|
394
423
|
def set_outputs(self, output: DictData, to: DictData) -> DictData:
|
395
|
-
"""
|
424
|
+
"""Override set an outputs method for the Python execution process that
|
425
|
+
extract output from all the locals values.
|
396
426
|
|
397
427
|
:param output: A output data that want to extract to an output key.
|
398
428
|
:param to: A context data that want to add output result.
|
@@ -436,7 +466,7 @@ class PyStage(BaseStage):
|
|
436
466
|
)
|
437
467
|
|
438
468
|
|
439
|
-
@dataclass
|
469
|
+
@dataclass(frozen=True)
|
440
470
|
class HookSearch:
|
441
471
|
"""Hook Search dataclass that use for receive regular expression grouping
|
442
472
|
dict from searching hook string value.
|
@@ -551,7 +581,9 @@ class HookStage(BaseStage):
|
|
551
581
|
|
552
582
|
|
553
583
|
class TriggerStage(BaseStage):
|
554
|
-
"""Trigger Workflow execution stage that execute another workflow
|
584
|
+
"""Trigger Workflow execution stage that execute another workflow. This this
|
585
|
+
the core stage that allow you to create the reusable workflow object or
|
586
|
+
dynamic parameters workflow for common usecase.
|
555
587
|
|
556
588
|
Data Validate:
|
557
589
|
>>> stage = {
|
@@ -572,7 +604,8 @@ class TriggerStage(BaseStage):
|
|
572
604
|
|
573
605
|
@handler_result("Raise from TriggerStage")
|
574
606
|
def execute(self, params: DictData) -> Result:
|
575
|
-
"""Trigger workflow execution.
|
607
|
+
"""Trigger another workflow execution. It will waiting the trigger
|
608
|
+
workflow running complete before catching its result.
|
576
609
|
|
577
610
|
:param params: A parameter data that want to use in this execution.
|
578
611
|
:rtype: Result
|
@@ -591,7 +624,7 @@ class TriggerStage(BaseStage):
|
|
591
624
|
return wf.execute(params=param2template(self.params, params))
|
592
625
|
|
593
626
|
|
594
|
-
# NOTE:
|
627
|
+
# NOTE: An order of parsing stage model.
|
595
628
|
Stage = Union[
|
596
629
|
PyStage,
|
597
630
|
BashStage,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.13
|
4
4
|
Summary: Lightweight workflow orchestration with less dependencies
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -18,19 +18,21 @@ Classifier: Programming Language :: Python :: 3.9
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
21
22
|
Requires-Python: >=3.9.13
|
22
23
|
Description-Content-Type: text/markdown
|
23
24
|
License-File: LICENSE
|
24
|
-
Requires-Dist: ddeutil-io
|
25
|
+
Requires-Dist: ddeutil-io >=0.1.12
|
25
26
|
Requires-Dist: python-dotenv ==1.0.1
|
26
27
|
Requires-Dist: typer <1.0.0,==0.12.5
|
27
28
|
Requires-Dist: schedule <2.0.0,==1.2.2
|
28
29
|
Provides-Extra: api
|
29
|
-
Requires-Dist: fastapi <1.0.0
|
30
|
+
Requires-Dist: fastapi <1.0.0,>=0.114.1 ; extra == 'api'
|
30
31
|
|
31
32
|
# Workflow
|
32
33
|
|
33
34
|
[](https://github.com/ddeutils/ddeutil-workflow/actions/workflows/tests.yml)
|
35
|
+
[](https://pypi.org/project/ddeutil-workflow/)
|
34
36
|
[](https://pypi.org/project/ddeutil-workflow/)
|
35
37
|
[](https://github.com/ddeutils/ddeutil-workflow)
|
36
38
|
[](https://github.com/ddeutils/ddeutil-workflow/blob/main/LICENSE)
|
@@ -41,8 +43,9 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
41
43
|
It can to use for data operator by a `.yaml` template.
|
42
44
|
|
43
45
|
> [!WARNING]
|
44
|
-
> This package provide only orchestration workload. That mean you should not
|
45
|
-
> workflow stage to process any large data which use lot of compute
|
46
|
+
> This package provide only orchestration workload task. That mean you should not
|
47
|
+
> use the workflow stage to process any large volume data which use lot of compute
|
48
|
+
> resource. :cold_sweat:
|
46
49
|
|
47
50
|
In my opinion, I think it should not create duplicate workflow codes if I can
|
48
51
|
write with dynamic input parameters on the one template workflow that just change
|
@@ -50,23 +53,25 @@ the input parameters per use-case instead.
|
|
50
53
|
This way I can handle a lot of logical workflows in our orgs with only metadata
|
51
54
|
configuration. It called **Metadata Driven Data Workflow**.
|
52
55
|
|
53
|
-
|
54
|
-
workflow running. Because it not show us what is a use-case that running data
|
55
|
-
workflow.
|
56
|
+
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
56
57
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
**Rules of This Workflow engine**:
|
58
|
+
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
59
|
+
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
60
|
+
3. All parallel tasks inside workflow engine use Multi-Threading
|
61
|
+
(Because Python 3.13 unlock GIL :unlock:)
|
63
62
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
> [!NOTE]
|
64
|
+
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
65
|
+
> `.yml` files and all of config file from several data orchestration framework
|
66
|
+
> tools from my experience on Data Engineer. :grimacing:
|
67
|
+
>
|
68
|
+
> Other workflow that I interest on them and pick some interested feature to this
|
69
|
+
> package:
|
70
|
+
>
|
71
|
+
> - [Google **Workflows**](https://cloud.google.com/workflows)
|
72
|
+
> - [AWS **Step Functions**](https://aws.amazon.com/step-functions/)
|
68
73
|
|
69
|
-
## Installation
|
74
|
+
## :round_pushpin: Installation
|
70
75
|
|
71
76
|
This project need `ddeutil-io` extension namespace packages. If you want to install
|
72
77
|
this package with application add-ons, you should add `app` in installation;
|
@@ -79,7 +84,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
79
84
|
|
80
85
|
> I added this feature to the main milestone.
|
81
86
|
>
|
82
|
-
> **Docker Images** supported:
|
87
|
+
> :egg: **Docker Images** supported:
|
83
88
|
>
|
84
89
|
> | Docker Image | Python Version | Support |
|
85
90
|
> |-----------------------------|----------------|---------|
|
@@ -88,7 +93,7 @@ this package with application add-ons, you should add `app` in installation;
|
|
88
93
|
> | ddeutil-workflow:python3.11 | `3.11` | :x: |
|
89
94
|
> | ddeutil-workflow:python3.12 | `3.12` | :x: |
|
90
95
|
|
91
|
-
## Usage
|
96
|
+
## :beers: Usage
|
92
97
|
|
93
98
|
This is examples that use workflow file for running common Data Engineering
|
94
99
|
use-case.
|
@@ -100,8 +105,10 @@ use-case.
|
|
100
105
|
> maintenance your data workflows.
|
101
106
|
|
102
107
|
```yaml
|
103
|
-
|
104
|
-
|
108
|
+
run-py-local:
|
109
|
+
|
110
|
+
# Validate model that use to parsing exists for template file
|
111
|
+
type: ddeutil.workflow.Workflow
|
105
112
|
on:
|
106
113
|
# If workflow deploy to schedule, it will running every 5 minutes
|
107
114
|
# with Asia/Bangkok timezone.
|
@@ -110,7 +117,7 @@ run_py_local:
|
|
110
117
|
params:
|
111
118
|
# Incoming execution parameters will validate with this type. It allow
|
112
119
|
# to set default value or templating.
|
113
|
-
|
120
|
+
source-extract: str
|
114
121
|
run-date: datetime
|
115
122
|
jobs:
|
116
123
|
getting-api-data:
|
@@ -119,17 +126,56 @@ run_py_local:
|
|
119
126
|
id: retrieve-api
|
120
127
|
uses: tasks/get-api-with-oauth-to-s3@requests
|
121
128
|
with:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
129
|
+
# Arguments of source data that want to retrieve.
|
130
|
+
method: post
|
131
|
+
url: https://finances/open-data/currency-pairs/
|
132
|
+
body:
|
133
|
+
resource: ${{ params.source-extract }}
|
134
|
+
|
135
|
+
# You can able to use filtering like Jinja template but this
|
136
|
+
# package does not use it.
|
137
|
+
filter: ${{ params.run-date | fmt(fmt='%Y%m%d') }}
|
138
|
+
auth:
|
139
|
+
type: bearer
|
140
|
+
keys: ${API_ACCESS_REFRESH_TOKEN}
|
141
|
+
|
142
|
+
# Arguments of target data that want to landing.
|
143
|
+
writing_mode: flatten
|
144
|
+
aws_s3_path: my-data/open-data/${{ params.source-extract }}
|
145
|
+
|
146
|
+
# This Authentication code should implement with your custom hook
|
147
|
+
# function. The template allow you to use environment variable.
|
128
148
|
aws_access_client_id: ${AWS_ACCESS_CLIENT_ID}
|
129
149
|
aws_access_client_secret: ${AWS_ACCESS_CLIENT_SECRET}
|
130
150
|
```
|
131
151
|
|
132
|
-
|
152
|
+
The above workflow template is main executor pipeline that you want to do. If you
|
153
|
+
want to schedule this workflow, you want to dynamic its parameters change base on
|
154
|
+
execution time such as `run-date` should change base on that workflow running date.
|
155
|
+
|
156
|
+
So, this package provide the `Schedule` template for this action.
|
157
|
+
|
158
|
+
```yaml
|
159
|
+
schedule-run-local-wf:
|
160
|
+
|
161
|
+
# Validate model that use to parsing exists for template file
|
162
|
+
type: ddeutil.workflow.scheduler.Schedule
|
163
|
+
workflows:
|
164
|
+
|
165
|
+
# Map existing workflow that want to deploy with scheduler application.
|
166
|
+
# It allow you to passing release parameter that dynamic change depend the
|
167
|
+
# current context of this scheduler application releasing that time.
|
168
|
+
- name: run-py-local
|
169
|
+
params:
|
170
|
+
source-extract: "USD-THB"
|
171
|
+
asat-dt: "${{ release.logical_date }}"
|
172
|
+
```
|
173
|
+
|
174
|
+
## :cookie: Configuration
|
175
|
+
|
176
|
+
The main configuration that use to dynamic changing with your propose of this
|
177
|
+
application. If any configuration values do not set yet, it will use default value
|
178
|
+
and do not raise any error to you.
|
133
179
|
|
134
180
|
| Environment | Component | Default | Description |
|
135
181
|
|-------------------------------------|-----------|----------------------------------|----------------------------------------------------------------------------|
|
@@ -155,7 +201,7 @@ run_py_local:
|
|
155
201
|
| `WORKFLOW_API_ENABLE_ROUTE_WORKFLOW` | API | true | A flag that enable workflow route to manage execute manually and workflow logging |
|
156
202
|
| `WORKFLOW_API_ENABLE_ROUTE_SCHEDULE` | API | true | A flag that enable run scheduler |
|
157
203
|
|
158
|
-
## Deployment
|
204
|
+
## :rocket: Deployment
|
159
205
|
|
160
206
|
This package able to run as a application service for receive manual trigger
|
161
207
|
from the master node via RestAPI or use to be Scheduler background service
|
@@ -1,21 +1,21 @@
|
|
1
|
-
ddeutil/workflow/__about__.py,sha256=
|
1
|
+
ddeutil/workflow/__about__.py,sha256=StSv8QbtF16HmqqJ8TfZlgbD1BgLyYHcubwplM-eSto,28
|
2
2
|
ddeutil/workflow/__init__.py,sha256=aEQiEWwTPGhfwpzzdb99xXaHchi5ABWUHl2iLIyT18E,664
|
3
3
|
ddeutil/workflow/__types.py,sha256=SYMoxbENQX8uPsiCZkjtpHAqqHOh8rUrarAFicAJd0E,1773
|
4
4
|
ddeutil/workflow/api.py,sha256=xVP8eGu1nnR8HM0ULTwxs9TV9tsxCOjZ68cAffw2f3o,4802
|
5
5
|
ddeutil/workflow/cli.py,sha256=Ikcq526WeIl-737-v55T0PwAZ2pNiZFxlN0Y-DjhDbQ,3374
|
6
6
|
ddeutil/workflow/cron.py,sha256=uhp3E5pl_tX_H88bsDujcwdhZmOE53csyV-ouPpPdK8,25321
|
7
7
|
ddeutil/workflow/exceptions.py,sha256=Uf1-Tn8rAzj0aiVHSqo4fBqO80W0za7UFZgKv24E-tg,706
|
8
|
-
ddeutil/workflow/job.py,sha256=
|
8
|
+
ddeutil/workflow/job.py,sha256=iwiDUGgnId6QFkzqLZuiWFYUNfY-qYJebaGwhFnMKH8,20633
|
9
9
|
ddeutil/workflow/log.py,sha256=bZyyqf3oNBB8oRf8RI0YvII7wHHoj4wC-nmW_pQjQ1c,6036
|
10
|
-
ddeutil/workflow/on.py,sha256=
|
10
|
+
ddeutil/workflow/on.py,sha256=vsZG19mNoztDSB_ObD_4ZWPKgHYpBDJMWw97ZiTavNE,7237
|
11
11
|
ddeutil/workflow/repeat.py,sha256=e3dekPTlMlxCCizfBYsZ8dD8Juy4rtfqDZJU3Iky2oA,5011
|
12
12
|
ddeutil/workflow/route.py,sha256=ABEk-WlVo9XGFc7zCPbckX33URCNH7woQFU1keX_8PQ,6970
|
13
|
-
ddeutil/workflow/scheduler.py,sha256=
|
14
|
-
ddeutil/workflow/stage.py,sha256=
|
13
|
+
ddeutil/workflow/scheduler.py,sha256=fe9NGobU8zN95C0FY2PB7eYI9tzyvyh-_K7vcUFFBO8,41674
|
14
|
+
ddeutil/workflow/stage.py,sha256=rGFdLLYj6eo8aqSRr4lkBBdah4KIzCzKefJeg0hk0O8,22289
|
15
15
|
ddeutil/workflow/utils.py,sha256=TbqgPkDDYBpqCZ7HV2TU3AH1_Mv-zfrJdwVL-l2SPUo,28559
|
16
|
-
ddeutil_workflow-0.0.
|
17
|
-
ddeutil_workflow-0.0.
|
18
|
-
ddeutil_workflow-0.0.
|
19
|
-
ddeutil_workflow-0.0.
|
20
|
-
ddeutil_workflow-0.0.
|
21
|
-
ddeutil_workflow-0.0.
|
16
|
+
ddeutil_workflow-0.0.13.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
+
ddeutil_workflow-0.0.13.dist-info/METADATA,sha256=HuSRkM94JcefbkiCR6_3khXeUiAsb0FMirS3d7qWGHk,11556
|
18
|
+
ddeutil_workflow-0.0.13.dist-info/WHEEL,sha256=5Mi1sN9lKoFv_gxcPtisEVrJZihrm_beibeg5R6xb4I,91
|
19
|
+
ddeutil_workflow-0.0.13.dist-info/entry_points.txt,sha256=0BVOgO3LdUdXVZ-CiHHDKxzEk2c8J30jEwHeKn2YCWI,62
|
20
|
+
ddeutil_workflow-0.0.13.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
21
|
+
ddeutil_workflow-0.0.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|