ddeutil-workflow 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/conn.py +31 -29
- ddeutil/workflow/dataset.py +1 -5
- ddeutil/workflow/exceptions.py +0 -50
- ddeutil/workflow/loader.py +26 -190
- ddeutil/workflow/pipeline.py +201 -83
- ddeutil/workflow/schedule.py +3 -8
- ddeutil/workflow/tasks/__init__.py +6 -10
- ddeutil/workflow/tasks/_pandas.py +54 -0
- ddeutil/workflow/tasks/_polars.py +45 -2
- ddeutil/workflow/utils.py +65 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.2.dist-info}/METADATA +26 -15
- ddeutil_workflow-0.0.2.dist-info/RECORD +25 -0
- ddeutil/workflow/hooks/__init__.py +0 -9
- ddeutil/workflow/hooks/_postgres.py +0 -2
- ddeutil/workflow/utils/__init__.py +0 -0
- ddeutil/workflow/utils/receive.py +0 -33
- ddeutil/workflow/utils/selection.py +0 -2
- ddeutil_workflow-0.0.1.dist-info/RECORD +0 -28
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.2.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.2.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.2.dist-info}/top_level.txt +0 -0
ddeutil/workflow/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.0.
|
1
|
+
__version__: str = "0.0.2"
|
ddeutil/workflow/conn.py
CHANGED
@@ -10,7 +10,7 @@ from collections.abc import Iterator
|
|
10
10
|
from pathlib import Path
|
11
11
|
from typing import Annotated, Any, Literal, Optional, TypeVar
|
12
12
|
|
13
|
-
from ddeutil.
|
13
|
+
from ddeutil.io.models.conn import Conn as ConnModel
|
14
14
|
from pydantic import BaseModel, ConfigDict, Field
|
15
15
|
from pydantic.functional_validators import field_validator
|
16
16
|
from pydantic.types import SecretStr
|
@@ -43,27 +43,15 @@ class BaseConn(BaseModel):
|
|
43
43
|
]
|
44
44
|
|
45
45
|
@classmethod
|
46
|
-
def
|
47
|
-
|
48
|
-
name: str,
|
49
|
-
externals: DictData,
|
50
|
-
) -> Self:
|
51
|
-
"""Construct Connection with Loader object with specific config name.
|
52
|
-
|
53
|
-
:param name:
|
54
|
-
:param externals:
|
55
|
-
"""
|
56
|
-
loader: Loader = Loader(name, externals=externals)
|
57
|
-
# NOTE: Validate the config type match with current connection model
|
58
|
-
if loader.type != cls:
|
59
|
-
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
46
|
+
def from_dict(cls, values: DictData):
|
47
|
+
"""Construct Connection with dict of data"""
|
60
48
|
filter_data: DictData = {
|
61
|
-
k:
|
62
|
-
for k in
|
49
|
+
k: values.pop(k)
|
50
|
+
for k in values.copy()
|
63
51
|
if k not in cls.model_fields and k not in EXCLUDED_EXTRAS
|
64
52
|
}
|
65
|
-
if "url" in
|
66
|
-
url: ConnModel = ConnModel.from_url(
|
53
|
+
if "url" in values:
|
54
|
+
url: ConnModel = ConnModel.from_url(values.pop("url"))
|
67
55
|
return cls(
|
68
56
|
dialect=url.dialect,
|
69
57
|
host=url.host,
|
@@ -73,24 +61,38 @@ class BaseConn(BaseModel):
|
|
73
61
|
# NOTE:
|
74
62
|
# I will replace None endpoint with memory value for SQLite
|
75
63
|
# connection string.
|
76
|
-
endpoint=
|
64
|
+
endpoint=(url.endpoint or "memory"),
|
77
65
|
# NOTE: This order will show that externals this the top level.
|
78
|
-
extras=(url.options | filter_data
|
66
|
+
extras=(url.options | filter_data),
|
79
67
|
)
|
80
68
|
return cls.model_validate(
|
81
69
|
obj={
|
82
|
-
"extras": (
|
83
|
-
|
84
|
-
),
|
85
|
-
**loader.data,
|
70
|
+
"extras": (values.pop("extras", {}) | filter_data),
|
71
|
+
**values,
|
86
72
|
}
|
87
73
|
)
|
88
74
|
|
89
75
|
@classmethod
|
90
|
-
def
|
91
|
-
|
92
|
-
|
93
|
-
|
76
|
+
def from_loader(
|
77
|
+
cls,
|
78
|
+
name: str,
|
79
|
+
externals: DictData,
|
80
|
+
) -> Self:
|
81
|
+
"""Construct Connection with Loader object with specific config name.
|
82
|
+
|
83
|
+
:param name:
|
84
|
+
:param externals:
|
85
|
+
"""
|
86
|
+
loader: Loader = Loader(name, externals=externals)
|
87
|
+
# NOTE: Validate the config type match with current connection model
|
88
|
+
if loader.type != cls:
|
89
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
90
|
+
return cls.from_dict(
|
91
|
+
{
|
92
|
+
"extras": (loader.data.pop("extras", {}) | externals),
|
93
|
+
**loader.data,
|
94
|
+
}
|
95
|
+
)
|
94
96
|
|
95
97
|
@field_validator("endpoint")
|
96
98
|
def __prepare_slash(cls, value: str) -> str:
|
ddeutil/workflow/dataset.py
CHANGED
@@ -25,11 +25,7 @@ from .conn import SubclassConn
|
|
25
25
|
from .loader import Loader
|
26
26
|
|
27
27
|
EXCLUDED_EXTRAS: TupleStr = ("type",)
|
28
|
-
OBJ_FMTS: FormatterGroupType = make_group(
|
29
|
-
{
|
30
|
-
"datetime": Datetime,
|
31
|
-
}
|
32
|
-
)
|
28
|
+
OBJ_FMTS: FormatterGroupType = make_group({"datetime": Datetime})
|
33
29
|
|
34
30
|
|
35
31
|
class BaseDataset(BaseModel):
|
ddeutil/workflow/exceptions.py
CHANGED
@@ -8,8 +8,6 @@ Define Errors Object for Node package
|
|
8
8
|
"""
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
from typing import Union
|
12
|
-
|
13
11
|
|
14
12
|
class BaseError(Exception):
|
15
13
|
"""Base Error Object that use for catch any errors statement of
|
@@ -25,54 +23,6 @@ class ConfigNotFound(WorkflowBaseError):
|
|
25
23
|
"""Error raise for a method not found the config file or data."""
|
26
24
|
|
27
25
|
|
28
|
-
class ConfigArgumentError(WorkflowBaseError):
|
29
|
-
"""Error raise for a wrong configuration argument."""
|
30
|
-
|
31
|
-
def __init__(self, argument: Union[str, tuple], message: str):
|
32
|
-
"""Main Initialization that merge the argument and message input values
|
33
|
-
with specific content message together like
|
34
|
-
|
35
|
-
`__class__` with `argument`, `message`
|
36
|
-
|
37
|
-
:param argument: Union[str, tuple]
|
38
|
-
:param message: str
|
39
|
-
"""
|
40
|
-
if isinstance(argument, tuple):
|
41
|
-
_last_arg: str = str(argument[-1])
|
42
|
-
_argument: str = (
|
43
|
-
(
|
44
|
-
", ".join([f"{_!r}" for _ in argument[:-1]])
|
45
|
-
+ f", and {_last_arg!r}"
|
46
|
-
)
|
47
|
-
if len(argument) > 1
|
48
|
-
else f"{_last_arg!r}"
|
49
|
-
)
|
50
|
-
else:
|
51
|
-
_argument: str = f"{argument!r}"
|
52
|
-
_message: str = f"with {_argument}, {message}"
|
53
|
-
super().__init__(_message)
|
54
|
-
|
55
|
-
|
56
|
-
class ConnArgumentError(ConfigArgumentError):
|
57
|
-
"""Error raise for wrong connection argument when loading or parsing"""
|
58
|
-
|
59
|
-
|
60
|
-
class DsArgumentError(ConfigArgumentError):
|
61
|
-
"""Error raise for wrong catalog argument when loading or parsing"""
|
62
|
-
|
63
|
-
|
64
|
-
class NodeArgumentError(ConfigArgumentError):
|
65
|
-
"""Error raise for wrong node argument when loading or parsing"""
|
66
|
-
|
67
|
-
|
68
|
-
class ScdlArgumentError(ConfigArgumentError):
|
69
|
-
"""Error raise for wrong schedule argument when loading or parsing"""
|
70
|
-
|
71
|
-
|
72
|
-
class PipeArgumentError(ConfigArgumentError):
|
73
|
-
"""Error raise for wrong pipeline argument when loading or parsing"""
|
74
|
-
|
75
|
-
|
76
26
|
class PyException(Exception): ...
|
77
27
|
|
78
28
|
|
ddeutil/workflow/loader.py
CHANGED
@@ -5,189 +5,30 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import copy
|
9
|
-
import logging
|
10
|
-
import urllib.parse
|
11
8
|
from functools import cached_property
|
12
|
-
from typing import Any,
|
9
|
+
from typing import Any, TypeVar
|
13
10
|
|
14
11
|
from ddeutil.core import (
|
15
|
-
clear_cache,
|
16
12
|
getdot,
|
17
13
|
hasdot,
|
18
14
|
import_string,
|
19
|
-
setdot,
|
20
15
|
)
|
21
16
|
from ddeutil.io import (
|
22
17
|
ConfigNotFound,
|
23
18
|
Params,
|
24
19
|
PathSearch,
|
25
|
-
Register,
|
26
20
|
YamlEnvFl,
|
27
|
-
map_func,
|
28
21
|
)
|
29
|
-
from ddeutil.io.__conf import UPDATE_KEY, VERSION_KEY
|
30
|
-
from fmtutil import Datetime
|
31
22
|
from pydantic import BaseModel
|
32
|
-
from typing_extensions import Self
|
33
23
|
|
34
24
|
from .__regex import RegexConf
|
35
|
-
from .__types import DictData
|
36
|
-
from .exceptions import ConfigArgumentError
|
25
|
+
from .__types import DictData
|
37
26
|
|
27
|
+
T = TypeVar("T")
|
28
|
+
BaseModelType = type[BaseModel]
|
38
29
|
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
39
30
|
|
40
31
|
|
41
|
-
class YamlEnvQuote(YamlEnvFl):
|
42
|
-
|
43
|
-
@staticmethod
|
44
|
-
def prepare(x: str) -> str:
|
45
|
-
return urllib.parse.quote_plus(str(x))
|
46
|
-
|
47
|
-
|
48
|
-
class BaseLoad:
|
49
|
-
"""Base configuration data loading object for load config data from
|
50
|
-
`cls.load_stage` stage. The base loading object contain necessary
|
51
|
-
properties and method for type object.
|
52
|
-
|
53
|
-
:param data: dict : A configuration data content with fix keys, `name`,
|
54
|
-
`fullname`, and `data`.
|
55
|
-
:param params: Optional[dict] : A parameters mapping for some
|
56
|
-
subclass of loading use.
|
57
|
-
"""
|
58
|
-
|
59
|
-
# NOTE: Set loading config for inherit
|
60
|
-
load_prefixes: TupleStr = ("conn",)
|
61
|
-
load_datetime_name: str = "audit_date"
|
62
|
-
load_datetime_fmt: str = "%Y-%m-%d %H:%M:%S"
|
63
|
-
|
64
|
-
# NOTE: Set preparing config for inherit
|
65
|
-
data_excluded: TupleStr = (UPDATE_KEY, VERSION_KEY)
|
66
|
-
option_key: TupleStr = ("parameters",)
|
67
|
-
datetime_key: TupleStr = ("endpoint",)
|
68
|
-
|
69
|
-
@classmethod
|
70
|
-
def from_register(
|
71
|
-
cls,
|
72
|
-
name: str,
|
73
|
-
params: Params,
|
74
|
-
externals: DictData | None = None,
|
75
|
-
) -> Self:
|
76
|
-
"""Loading config data from register object.
|
77
|
-
|
78
|
-
:param name: A name of config data catalog that can register.
|
79
|
-
:type name: str
|
80
|
-
:param params: A params object.
|
81
|
-
:type params: Params
|
82
|
-
:param externals: A external parameters
|
83
|
-
:type externals: DictData | None(=None)
|
84
|
-
"""
|
85
|
-
try:
|
86
|
-
rs: Register = Register(
|
87
|
-
name=name,
|
88
|
-
stage=params.stage_final,
|
89
|
-
params=params,
|
90
|
-
loader=YamlEnvQuote,
|
91
|
-
)
|
92
|
-
except ConfigNotFound:
|
93
|
-
rs: Register = Register(
|
94
|
-
name=name,
|
95
|
-
params=params,
|
96
|
-
loader=YamlEnvQuote,
|
97
|
-
).deploy(stop=params.stage_final)
|
98
|
-
return cls(
|
99
|
-
name=rs.name,
|
100
|
-
data=rs.data().copy(),
|
101
|
-
params=params,
|
102
|
-
externals=externals,
|
103
|
-
)
|
104
|
-
|
105
|
-
def __init__(
|
106
|
-
self,
|
107
|
-
name: str,
|
108
|
-
data: DictData,
|
109
|
-
params: Params,
|
110
|
-
externals: DictData | None = None,
|
111
|
-
) -> None:
|
112
|
-
"""Main initialize base config object which get a name of configuration
|
113
|
-
and load data by the register object.
|
114
|
-
"""
|
115
|
-
self.name: str = name
|
116
|
-
self.__data: DictData = data
|
117
|
-
self.params: Params = params
|
118
|
-
self.externals: DictData = externals or {}
|
119
|
-
|
120
|
-
# NOTE: Validate step of base loading object.
|
121
|
-
if not any(
|
122
|
-
self.name.startswith(prefix) for prefix in self.load_prefixes
|
123
|
-
):
|
124
|
-
raise ConfigArgumentError(
|
125
|
-
"prefix",
|
126
|
-
(
|
127
|
-
f"{self.name!r} does not starts with the "
|
128
|
-
f"{self.__class__.__name__} prefixes: "
|
129
|
-
f"{self.load_prefixes!r}."
|
130
|
-
),
|
131
|
-
)
|
132
|
-
|
133
|
-
@property
|
134
|
-
def updt(self):
|
135
|
-
return self.data.get(UPDATE_KEY)
|
136
|
-
|
137
|
-
@cached_property
|
138
|
-
def _map_data(self) -> DictData:
|
139
|
-
"""Return configuration data without key in the excluded key set."""
|
140
|
-
data: DictData = self.__data.copy()
|
141
|
-
rs: DictData = {k: data[k] for k in data if k not in self.data_excluded}
|
142
|
-
|
143
|
-
# Mapping datetime format to string value.
|
144
|
-
for _ in self.datetime_key:
|
145
|
-
if hasdot(_, rs):
|
146
|
-
# Fill format datetime object to any type value.
|
147
|
-
rs: DictData = setdot(
|
148
|
-
_,
|
149
|
-
rs,
|
150
|
-
map_func(
|
151
|
-
getdot(_, rs),
|
152
|
-
Datetime.parse(
|
153
|
-
value=self.externals[self.load_datetime_name],
|
154
|
-
fmt=self.load_datetime_fmt,
|
155
|
-
).format,
|
156
|
-
),
|
157
|
-
)
|
158
|
-
return rs
|
159
|
-
|
160
|
-
@property
|
161
|
-
def data(self) -> DictData:
|
162
|
-
"""Return deep copy of the input data.
|
163
|
-
|
164
|
-
:rtype: DictData
|
165
|
-
"""
|
166
|
-
return copy.deepcopy(self._map_data)
|
167
|
-
|
168
|
-
@clear_cache(attrs=("type", "_map_data"))
|
169
|
-
def refresh(self) -> Self:
|
170
|
-
"""Refresh configuration data. This process will use `deploy` method
|
171
|
-
of the register object.
|
172
|
-
|
173
|
-
:rtype: Self
|
174
|
-
"""
|
175
|
-
return self.from_register(
|
176
|
-
name=self.name,
|
177
|
-
params=self.params,
|
178
|
-
externals=self.externals,
|
179
|
-
)
|
180
|
-
|
181
|
-
@cached_property
|
182
|
-
def type(self) -> Any:
|
183
|
-
"""Return object type which implement in `config_object` key."""
|
184
|
-
if not (_typ := self.data.get("type")):
|
185
|
-
raise ValueError(
|
186
|
-
f"the 'type' value: {_typ} does not exists in config data."
|
187
|
-
)
|
188
|
-
return import_string(f"ddeutil.pipe.{_typ}")
|
189
|
-
|
190
|
-
|
191
32
|
class SimLoad:
|
192
33
|
"""Simple Load Object that will search config data by name.
|
193
34
|
|
@@ -224,7 +65,7 @@ class SimLoad:
|
|
224
65
|
return self.__conf_params
|
225
66
|
|
226
67
|
@cached_property
|
227
|
-
def type(self) ->
|
68
|
+
def type(self) -> BaseModelType:
|
228
69
|
"""Return object type which implement in `config_object` key."""
|
229
70
|
if not (_typ := self.data.get("type")):
|
230
71
|
raise ValueError(
|
@@ -236,31 +77,16 @@ class SimLoad:
|
|
236
77
|
except ModuleNotFoundError:
|
237
78
|
return import_string(f"{_typ}")
|
238
79
|
|
239
|
-
def
|
240
|
-
|
241
|
-
if not (p := self.data.get("params", {})):
|
242
|
-
return p
|
243
|
-
|
244
|
-
try:
|
245
|
-
return {i: import_string(f"{self.import_prefix}.{p[i]}") for i in p}
|
246
|
-
except ModuleNotFoundError as err:
|
247
|
-
logging.error(err)
|
248
|
-
raise err
|
249
|
-
|
250
|
-
def validate_params(self, param: dict[str, Any]) -> dict[str, Any]:
|
251
|
-
"""Return parameter that want to catch before workflow running."""
|
252
|
-
try:
|
253
|
-
return {i: caller(param[i]) for i, caller in self.params().items()}
|
254
|
-
except KeyError as err:
|
255
|
-
logging.error(f"Parameter: {err} does not exists from passing")
|
256
|
-
raise err
|
257
|
-
except ValueError as err:
|
258
|
-
logging.error("Value that passing to params does not valid")
|
259
|
-
raise err
|
80
|
+
def load(self) -> AnyModel:
|
81
|
+
return self.type.model_validate(self.data)
|
260
82
|
|
261
83
|
|
262
84
|
class Loader(SimLoad):
|
263
|
-
"""Main Loader Object.
|
85
|
+
"""Main Loader Object.
|
86
|
+
|
87
|
+
:param name: A name of config data that will read by Yaml Loader object.
|
88
|
+
:param externals: An external parameters
|
89
|
+
"""
|
264
90
|
|
265
91
|
def __init__(
|
266
92
|
self,
|
@@ -278,23 +104,33 @@ class Loader(SimLoad):
|
|
278
104
|
|
279
105
|
@classmethod
|
280
106
|
def config(cls, path: str | None = None) -> Params:
|
107
|
+
"""Load Config data from ``workflows-conf.yaml`` file."""
|
281
108
|
return Params.model_validate(
|
282
109
|
YamlEnvFl(path or "./workflows-conf.yaml").read()
|
283
110
|
)
|
284
111
|
|
285
112
|
|
286
|
-
def
|
113
|
+
def map_params(value: Any, params: dict[str, Any]) -> Any:
|
287
114
|
"""Map caller value that found from ``RE_CALLER`` regex.
|
288
115
|
|
289
|
-
:
|
116
|
+
:rtype: Any
|
117
|
+
:returns: An any getter value from the params input.
|
290
118
|
"""
|
119
|
+
if isinstance(value, dict):
|
120
|
+
return {k: map_params(value[k], params) for k in value}
|
121
|
+
elif isinstance(value, (list, tuple, set)):
|
122
|
+
return type(value)([map_params(i, params) for i in value])
|
123
|
+
elif not isinstance(value, str):
|
124
|
+
return value
|
125
|
+
|
291
126
|
if not (found := RegexConf.RE_CALLER.search(value)):
|
292
127
|
return value
|
128
|
+
|
293
129
|
# NOTE: get caller value that setting inside; ``${{ <caller-value> }}``
|
294
|
-
caller = found.group("caller")
|
130
|
+
caller: str = found.group("caller")
|
295
131
|
if not hasdot(caller, params):
|
296
132
|
raise ValueError(f"params does not set caller: {caller!r}")
|
297
|
-
getter = getdot(caller, params)
|
133
|
+
getter: Any = getdot(caller, params)
|
298
134
|
|
299
135
|
# NOTE: check type of vars
|
300
136
|
if isinstance(getter, (str, int)):
|