ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +9 -0
- ddeutil/workflow/__types.py +43 -1
- ddeutil/workflow/exceptions.py +13 -1
- ddeutil/workflow/loader.py +16 -110
- ddeutil/workflow/on.py +195 -0
- ddeutil/workflow/pipeline.py +351 -371
- ddeutil/workflow/{vendors/__schedule.py → scheduler.py} +222 -176
- ddeutil/workflow/stage.py +402 -0
- ddeutil/workflow/utils.py +219 -28
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/METADATA +118 -90
- ddeutil_workflow-0.0.6.dist-info/RECORD +15 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/WHEEL +1 -1
- ddeutil/workflow/__regex.py +0 -44
- ddeutil/workflow/conn.py +0 -240
- ddeutil/workflow/schedule.py +0 -82
- ddeutil/workflow/tasks/__init__.py +0 -6
- ddeutil/workflow/tasks/_pandas.py +0 -54
- ddeutil/workflow/tasks/_polars.py +0 -92
- ddeutil/workflow/vendors/__dataset.py +0 -127
- ddeutil/workflow/vendors/__dict.py +0 -333
- ddeutil/workflow/vendors/__init__.py +0 -0
- ddeutil/workflow/vendors/aws.py +0 -185
- ddeutil/workflow/vendors/az.py +0 -0
- ddeutil/workflow/vendors/minio.py +0 -11
- ddeutil/workflow/vendors/pd.py +0 -13
- ddeutil/workflow/vendors/pg.py +0 -11
- ddeutil/workflow/vendors/pl.py +0 -172
- ddeutil/workflow/vendors/sftp.py +0 -209
- ddeutil_workflow-0.0.4.dist-info/RECORD +0 -29
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.4.dist-info → ddeutil_workflow-0.0.6.dist-info}/top_level.txt +0 -0
ddeutil/workflow/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.0.
|
1
|
+
__version__: str = "0.0.6"
|
ddeutil/workflow/__init__.py
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from .exceptions import StageException
|
7
|
+
from .on import On
|
8
|
+
from .pipeline import Pipeline
|
9
|
+
from .stage import Stage
|
ddeutil/workflow/__types.py
CHANGED
@@ -5,8 +5,50 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
|
8
|
+
import re
|
9
|
+
from re import (
|
10
|
+
IGNORECASE,
|
11
|
+
MULTILINE,
|
12
|
+
UNICODE,
|
13
|
+
VERBOSE,
|
14
|
+
Pattern,
|
15
|
+
)
|
16
|
+
from typing import Any, Union
|
9
17
|
|
10
18
|
TupleStr = tuple[str, ...]
|
11
19
|
DictData = dict[str, Any]
|
12
20
|
DictStr = dict[str, str]
|
21
|
+
Matrix = dict[str, Union[list[str], list[int]]]
|
22
|
+
MatrixInclude = list[dict[str, Union[str, int]]]
|
23
|
+
MatrixExclude = list[dict[str, Union[str, int]]]
|
24
|
+
|
25
|
+
|
26
|
+
class Re:
|
27
|
+
"""Regular expression config."""
|
28
|
+
|
29
|
+
# NOTE: Search caller
|
30
|
+
__re_caller: str = r"""
|
31
|
+
\$
|
32
|
+
{{
|
33
|
+
\s*(?P<caller>
|
34
|
+
[a-zA-Z0-9_.\s'\"\[\]\(\)\-\{}]+?
|
35
|
+
)\s*
|
36
|
+
}}
|
37
|
+
"""
|
38
|
+
RE_CALLER: Pattern = re.compile(
|
39
|
+
__re_caller, MULTILINE | IGNORECASE | UNICODE | VERBOSE
|
40
|
+
)
|
41
|
+
|
42
|
+
# NOTE: Search task
|
43
|
+
__re_task_fmt: str = r"""
|
44
|
+
^
|
45
|
+
(?P<path>[^/@]+)
|
46
|
+
/
|
47
|
+
(?P<func>[^@]+)
|
48
|
+
@
|
49
|
+
(?P<tag>.+)
|
50
|
+
$
|
51
|
+
"""
|
52
|
+
RE_TASK_FMT: Pattern = re.compile(
|
53
|
+
__re_task_fmt, MULTILINE | IGNORECASE | UNICODE | VERBOSE
|
54
|
+
)
|
ddeutil/workflow/exceptions.py
CHANGED
@@ -9,4 +9,16 @@ Define Errors Object for Node package
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
11
|
|
12
|
-
class
|
12
|
+
class WorkflowException(Exception): ...
|
13
|
+
|
14
|
+
|
15
|
+
class UtilException(WorkflowException): ...
|
16
|
+
|
17
|
+
|
18
|
+
class StageException(WorkflowException): ...
|
19
|
+
|
20
|
+
|
21
|
+
class JobException(WorkflowException): ...
|
22
|
+
|
23
|
+
|
24
|
+
class PipelineException(WorkflowException): ...
|
ddeutil/workflow/loader.py
CHANGED
@@ -6,46 +6,17 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
from functools import cached_property
|
9
|
-
from typing import
|
9
|
+
from typing import TypeVar
|
10
10
|
|
11
|
-
from ddeutil.core import
|
12
|
-
|
13
|
-
|
14
|
-
import_string,
|
15
|
-
)
|
16
|
-
from ddeutil.io import (
|
17
|
-
PathData,
|
18
|
-
PathSearch,
|
19
|
-
YamlEnvFl,
|
20
|
-
)
|
21
|
-
from pydantic import BaseModel, Field
|
22
|
-
from pydantic.functional_validators import model_validator
|
11
|
+
from ddeutil.core import import_string
|
12
|
+
from ddeutil.io import PathSearch, YamlFlResolve
|
13
|
+
from pydantic import BaseModel
|
23
14
|
|
24
|
-
from .__regex import RegexConf
|
25
15
|
from .__types import DictData
|
16
|
+
from .utils import ConfParams, config
|
26
17
|
|
27
|
-
T = TypeVar("T")
|
28
|
-
BaseModelType = type[BaseModel]
|
29
18
|
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
30
|
-
|
31
|
-
|
32
|
-
class Engine(BaseModel):
|
33
|
-
"""Engine Model"""
|
34
|
-
|
35
|
-
paths: PathData = Field(default_factory=PathData)
|
36
|
-
registry: list[str] = Field(default_factory=lambda: ["ddeutil.workflow"])
|
37
|
-
|
38
|
-
@model_validator(mode="before")
|
39
|
-
def __prepare_registry(cls, values: DictData) -> DictData:
|
40
|
-
if (_regis := values.get("registry")) and isinstance(_regis, str):
|
41
|
-
values["registry"] = [_regis]
|
42
|
-
return values
|
43
|
-
|
44
|
-
|
45
|
-
class Params(BaseModel):
|
46
|
-
"""Params Model"""
|
47
|
-
|
48
|
-
engine: Engine = Field(default_factory=Engine)
|
19
|
+
AnyModelType = type[AnyModel]
|
49
20
|
|
50
21
|
|
51
22
|
class SimLoad:
|
@@ -63,27 +34,25 @@ class SimLoad:
|
|
63
34
|
def __init__(
|
64
35
|
self,
|
65
36
|
name: str,
|
66
|
-
params:
|
37
|
+
params: ConfParams,
|
67
38
|
externals: DictData,
|
68
39
|
) -> None:
|
69
40
|
self.data: DictData = {}
|
70
41
|
for file in PathSearch(params.engine.paths.conf).files:
|
71
42
|
if any(file.suffix.endswith(s) for s in ("yml", "yaml")) and (
|
72
|
-
data :=
|
43
|
+
data := YamlFlResolve(file).read().get(name, {})
|
73
44
|
):
|
74
45
|
self.data = data
|
75
46
|
if not self.data:
|
76
47
|
raise ValueError(f"Config {name!r} does not found on conf path")
|
77
|
-
self.
|
48
|
+
self.conf_params: ConfParams = params
|
78
49
|
self.externals: DictData = externals
|
79
50
|
|
80
|
-
@property
|
81
|
-
def conf_params(self) -> Params:
|
82
|
-
return self.__conf_params
|
83
|
-
|
84
51
|
@cached_property
|
85
|
-
def type(self) ->
|
86
|
-
"""Return object type which implement
|
52
|
+
def type(self) -> AnyModelType:
|
53
|
+
"""Return object of string type which implement on any registry. The
|
54
|
+
object type
|
55
|
+
"""
|
87
56
|
if not (_typ := self.data.get("type")):
|
88
57
|
raise ValueError(
|
89
58
|
f"the 'type' value: {_typ} does not exists in config data."
|
@@ -99,76 +68,13 @@ class SimLoad:
|
|
99
68
|
continue
|
100
69
|
return import_string(f"{_typ}")
|
101
70
|
|
102
|
-
def load(self) -> AnyModel:
|
103
|
-
return self.type.model_validate(self.data)
|
104
|
-
|
105
71
|
|
106
72
|
class Loader(SimLoad):
|
107
|
-
"""
|
73
|
+
"""Loader Object that get the config `yaml` file from current path.
|
108
74
|
|
109
75
|
:param name: A name of config data that will read by Yaml Loader object.
|
110
76
|
:param externals: An external parameters
|
111
77
|
"""
|
112
78
|
|
113
|
-
|
114
|
-
|
115
|
-
def __init__(
|
116
|
-
self,
|
117
|
-
name: str,
|
118
|
-
externals: DictData,
|
119
|
-
*,
|
120
|
-
path: str | None = None,
|
121
|
-
) -> None:
|
122
|
-
self.data: DictData = {}
|
123
|
-
|
124
|
-
# NOTE: import params object from specific config file
|
125
|
-
params: Params = self.config(path)
|
126
|
-
|
127
|
-
super().__init__(name, params, externals)
|
128
|
-
|
129
|
-
@classmethod
|
130
|
-
def config(cls, path: str | None = None) -> Params:
|
131
|
-
"""Load Config data from ``workflows-conf.yaml`` file."""
|
132
|
-
return Params.model_validate(
|
133
|
-
YamlEnvFl(path or f"./{cls.conf_name}.yaml").read()
|
134
|
-
)
|
135
|
-
|
136
|
-
|
137
|
-
def map_params(value: Any, params: dict[str, Any]) -> Any:
|
138
|
-
"""Map caller value that found from ``RE_CALLER`` regular expression.
|
139
|
-
|
140
|
-
:param value: A value that want to mapped with an params
|
141
|
-
:param params: A parameter value that getting with matched regular
|
142
|
-
expression.
|
143
|
-
|
144
|
-
:rtype: Any
|
145
|
-
:returns: An any getter value from the params input.
|
146
|
-
"""
|
147
|
-
if isinstance(value, dict):
|
148
|
-
return {k: map_params(value[k], params) for k in value}
|
149
|
-
elif isinstance(value, (list, tuple, set)):
|
150
|
-
return type(value)([map_params(i, params) for i in value])
|
151
|
-
elif not isinstance(value, str):
|
152
|
-
return value
|
153
|
-
|
154
|
-
if not (found := RegexConf.RE_CALLER.search(value)):
|
155
|
-
return value
|
156
|
-
|
157
|
-
# NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
|
158
|
-
caller: str = found.group("caller")
|
159
|
-
if not hasdot(caller, params):
|
160
|
-
raise ValueError(f"params does not set caller: {caller!r}")
|
161
|
-
getter: Any = getdot(caller, params)
|
162
|
-
|
163
|
-
# NOTE: check type of vars
|
164
|
-
if isinstance(getter, (str, int)):
|
165
|
-
return value.replace(found.group(0), str(getter))
|
166
|
-
|
167
|
-
# NOTE:
|
168
|
-
# If type of getter caller does not formatting, it will return origin
|
169
|
-
# value.
|
170
|
-
if value.replace(found.group(0), "") != "":
|
171
|
-
raise ValueError(
|
172
|
-
"Callable variable should not pass other outside ${{ ... }}"
|
173
|
-
)
|
174
|
-
return getter
|
79
|
+
def __init__(self, name: str, externals: DictData) -> None:
|
80
|
+
super().__init__(name, config(), externals)
|
ddeutil/workflow/on.py
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from datetime import datetime
|
9
|
+
from typing import Annotated, Literal
|
10
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
11
|
+
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field
|
13
|
+
from pydantic.functional_validators import field_validator, model_validator
|
14
|
+
from typing_extensions import Self
|
15
|
+
|
16
|
+
try:
|
17
|
+
from .__types import DictData, DictStr
|
18
|
+
from .loader import Loader
|
19
|
+
from .scheduler import WEEKDAYS, CronJob, CronJobYear, CronRunner
|
20
|
+
except ImportError:
|
21
|
+
from ddeutil.workflow.__types import DictData, DictStr
|
22
|
+
from ddeutil.workflow.loader import Loader
|
23
|
+
from ddeutil.workflow.scheduler import (
|
24
|
+
WEEKDAYS,
|
25
|
+
CronJob,
|
26
|
+
CronJobYear,
|
27
|
+
CronRunner,
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
def interval2crontab(
|
32
|
+
interval: Literal["daily", "weekly", "monthly"],
|
33
|
+
day: str | None = None,
|
34
|
+
time: str = "00:00",
|
35
|
+
) -> str:
|
36
|
+
"""Return the crontab string that was generated from specific values.
|
37
|
+
|
38
|
+
:param interval: A interval value that is one of 'daily', 'weekly', or
|
39
|
+
'monthly'.
|
40
|
+
:param day: A day value that will be day of week. The default value is
|
41
|
+
monday if it be weekly interval.
|
42
|
+
:param time: A time value that passing with format '%H:%M'.
|
43
|
+
|
44
|
+
Examples:
|
45
|
+
>>> interval2crontab(interval='daily', time='01:30')
|
46
|
+
'1 30 * * *'
|
47
|
+
>>> interval2crontab(interval='weekly', day='friday', time='18:30')
|
48
|
+
'18 30 * * 5'
|
49
|
+
>>> interval2crontab(interval='monthly', time='00:00')
|
50
|
+
'0 0 1 * *'
|
51
|
+
>>> interval2crontab(interval='monthly', day='tuesday', time='12:00')
|
52
|
+
'12 0 1 * 2'
|
53
|
+
"""
|
54
|
+
d: str = "*"
|
55
|
+
if interval == "weekly":
|
56
|
+
d = WEEKDAYS[(day or "monday")[:3].title()]
|
57
|
+
elif interval == "monthly" and day:
|
58
|
+
d = WEEKDAYS[day[:3].title()]
|
59
|
+
|
60
|
+
h, m = tuple(
|
61
|
+
i.lstrip("0") if i != "00" else "0" for i in time.split(":", maxsplit=1)
|
62
|
+
)
|
63
|
+
return f"{h} {m} {'1' if interval == 'monthly' else '*'} * {d}"
|
64
|
+
|
65
|
+
|
66
|
+
class On(BaseModel):
|
67
|
+
"""On Model (Schedule)
|
68
|
+
|
69
|
+
See Also:
|
70
|
+
* ``generate()`` is the main usecase of this schedule object.
|
71
|
+
"""
|
72
|
+
|
73
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
74
|
+
|
75
|
+
# NOTE: This is fields of the base schedule.
|
76
|
+
cronjob: Annotated[
|
77
|
+
CronJob,
|
78
|
+
Field(description="Cron job of this schedule"),
|
79
|
+
]
|
80
|
+
tz: Annotated[
|
81
|
+
str,
|
82
|
+
Field(
|
83
|
+
description="A timezone string value",
|
84
|
+
alias="timezone",
|
85
|
+
),
|
86
|
+
] = "Etc/UTC"
|
87
|
+
extras: Annotated[
|
88
|
+
DictData,
|
89
|
+
Field(
|
90
|
+
default_factory=dict,
|
91
|
+
description="An extras mapping parameters",
|
92
|
+
),
|
93
|
+
]
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def from_value(cls, value: DictStr, externals: DictData) -> Self:
|
97
|
+
"""Constructor from values that will generate crontab by function.
|
98
|
+
|
99
|
+
:param value: A mapping value that will generate crontab before create
|
100
|
+
schedule model.
|
101
|
+
:param externals: A extras external parameter that will keep in extras.
|
102
|
+
"""
|
103
|
+
passing: DictStr = {}
|
104
|
+
if "timezone" in value:
|
105
|
+
passing["tz"] = value.pop("timezone")
|
106
|
+
passing["cronjob"] = interval2crontab(
|
107
|
+
**{v: value[v] for v in value if v in ("interval", "day", "time")}
|
108
|
+
)
|
109
|
+
return cls(extras=externals, **passing)
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def from_loader(
|
113
|
+
cls,
|
114
|
+
name: str,
|
115
|
+
externals: DictData,
|
116
|
+
) -> Self:
|
117
|
+
"""Constructor from the name of config that will use loader object for
|
118
|
+
getting the data.
|
119
|
+
|
120
|
+
:param name: A name of config that will getting from loader.
|
121
|
+
:param externals: A extras external parameter that will keep in extras.
|
122
|
+
"""
|
123
|
+
loader: Loader = Loader(name, externals=externals)
|
124
|
+
# NOTE: Validate the config type match with current connection model
|
125
|
+
if loader.type != cls:
|
126
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
127
|
+
|
128
|
+
loader_data: DictData = loader.data
|
129
|
+
if "interval" in loader_data:
|
130
|
+
return cls.model_validate(
|
131
|
+
obj=dict(
|
132
|
+
cronjob=interval2crontab(
|
133
|
+
**{
|
134
|
+
v: loader_data[v]
|
135
|
+
for v in loader_data
|
136
|
+
if v in ("interval", "day", "time")
|
137
|
+
}
|
138
|
+
),
|
139
|
+
extras=externals,
|
140
|
+
**loader_data,
|
141
|
+
)
|
142
|
+
)
|
143
|
+
if "cronjob" not in loader_data:
|
144
|
+
raise ValueError("Config does not set ``cronjob`` key")
|
145
|
+
return cls.model_validate(
|
146
|
+
obj=dict(
|
147
|
+
cronjob=loader_data.pop("cronjob"),
|
148
|
+
extras=externals,
|
149
|
+
**loader_data,
|
150
|
+
)
|
151
|
+
)
|
152
|
+
|
153
|
+
@model_validator(mode="before")
|
154
|
+
def __prepare_values(cls, values):
|
155
|
+
if tz := values.pop("tz", None):
|
156
|
+
values["timezone"] = tz
|
157
|
+
return values
|
158
|
+
|
159
|
+
@field_validator("tz")
|
160
|
+
def __validate_tz(cls, value: str):
|
161
|
+
"""Validate timezone value that able to initialize with ZoneInfo after
|
162
|
+
it passing to this model in before mode."""
|
163
|
+
try:
|
164
|
+
_ = ZoneInfo(value)
|
165
|
+
return value
|
166
|
+
except ZoneInfoNotFoundError as err:
|
167
|
+
raise ValueError(f"Invalid timezone: {value}") from err
|
168
|
+
|
169
|
+
@field_validator("cronjob", mode="before")
|
170
|
+
def __prepare_cronjob(cls, value: str | CronJob) -> CronJob:
|
171
|
+
"""Prepare crontab value that able to receive with string type."""
|
172
|
+
return CronJob(value) if isinstance(value, str) else value
|
173
|
+
|
174
|
+
def generate(self, start: str | datetime) -> CronRunner:
|
175
|
+
"""Return Cron runner object."""
|
176
|
+
if not isinstance(start, datetime):
|
177
|
+
start: datetime = datetime.fromisoformat(start)
|
178
|
+
return self.cronjob.schedule(date=start, tz=self.tz)
|
179
|
+
|
180
|
+
|
181
|
+
class AwsOn(On):
|
182
|
+
"""Implement On AWS Schedule for AWS Service like AWS Glue."""
|
183
|
+
|
184
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
185
|
+
|
186
|
+
# NOTE: This is fields of the base schedule.
|
187
|
+
cronjob: Annotated[
|
188
|
+
CronJobYear,
|
189
|
+
Field(description="Cron job of this schedule"),
|
190
|
+
]
|
191
|
+
|
192
|
+
@field_validator("cronjob", mode="before")
|
193
|
+
def __prepare_cronjob(cls, value: str | CronJobYear) -> CronJobYear:
|
194
|
+
"""Prepare crontab value that able to receive with string type."""
|
195
|
+
return CronJobYear(value) if isinstance(value, str) else value
|