ddeutil-workflow 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/PKG-INFO +19 -15
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/README.md +18 -14
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/pyproject.toml +4 -2
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/__about__.py +1 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__types.py +1 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/conn.py +13 -10
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/exceptions.py +0 -20
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/loader.py +39 -11
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/pipeline.py +183 -147
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/schedule.py +7 -7
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/_pandas.py +1 -1
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/_polars.py +10 -2
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/utils.py +180 -0
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/__dataset.py +127 -0
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/az.py +0 -0
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pd.py +13 -0
- ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pg.py +11 -0
- ddeutil_workflow-0.0.2/src/ddeutil/workflow/dataset.py → ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/pl.py +3 -133
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/PKG-INFO +19 -15
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/SOURCES.txt +9 -5
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_conn.py +8 -9
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_dataset.py +7 -5
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline.py +1 -1
- ddeutil_workflow-0.0.3/tests/test_pipeline_matrix.py +29 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline_run.py +5 -3
- ddeutil_workflow-0.0.3/tests/test_pipeline_task.py +80 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_schedule.py +2 -2
- ddeutil_workflow-0.0.2/src/ddeutil/workflow/__about__.py +0 -1
- ddeutil_workflow-0.0.2/src/ddeutil/workflow/utils.py +0 -65
- ddeutil_workflow-0.0.2/tests/test_loader_simple.py +0 -84
- ddeutil_workflow-0.0.2/tests/test_pipeline_task.py +0 -21
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/LICENSE +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/setup.cfg +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__init__.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/__regex.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/tasks/__init__.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__dict.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__init__.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil/workflow/vendors/__schedule.py +0 -0
- /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/aws_warpped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/aws.py +0 -0
- /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/minio_warpped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/minio.py +0 -0
- /ddeutil_workflow-0.0.2/src/ddeutil/workflow/vendors/sftp_wrapped.py → /ddeutil_workflow-0.0.3/src/ddeutil/workflow/vendors/sftp.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/dependency_links.txt +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/requires.txt +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/src/ddeutil_workflow.egg-info/top_level.txt +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_data.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_local_and_global.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_base_regex.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_loader.py +0 -0
- {ddeutil_workflow-0.0.2 → ddeutil_workflow-0.0.3}/tests/test_pipeline_params.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: Data Developer & Engineer Workflow Utility Objects
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -83,7 +83,13 @@ Out of you data that want to use in pipeline of workflow. Some of this component
|
|
83
83
|
is similar component of the **Airflow** because I like it concepts.
|
84
84
|
|
85
85
|
The main feature of this project is the `Pipeline` object that can call any
|
86
|
-
|
86
|
+
registries function. The pipeline can handle everything that you want to do, it
|
87
|
+
will passing parameters and catching the output for re-use it to next step.
|
88
|
+
|
89
|
+
> [!IMPORTANT]
|
90
|
+
> In the future of this project, I will drop the connection and dataset to
|
91
|
+
> dynamic registries instead of main features because it have a lot of maintain
|
92
|
+
> vendor codes and deps. (I do not have time to handle this features)
|
87
93
|
|
88
94
|
### Connection
|
89
95
|
|
@@ -104,7 +110,9 @@ assert conn.ping()
|
|
104
110
|
|
105
111
|
### Dataset
|
106
112
|
|
107
|
-
The dataset is define any objects on the connection.
|
113
|
+
The dataset is define any objects on the connection. This feature was implemented
|
114
|
+
on `/vendors` because it has a lot of tools that can interact with any data systems
|
115
|
+
in the data tool stacks.
|
108
116
|
|
109
117
|
```yaml
|
110
118
|
ds_postgres_customer_tbl:
|
@@ -116,7 +124,7 @@ ds_postgres_customer_tbl:
|
|
116
124
|
```
|
117
125
|
|
118
126
|
```python
|
119
|
-
from ddeutil.workflow.
|
127
|
+
from ddeutil.workflow.vendors.pg import PostgresTbl
|
120
128
|
|
121
129
|
dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
|
122
130
|
assert dataset.exists()
|
@@ -126,14 +134,14 @@ assert dataset.exists()
|
|
126
134
|
|
127
135
|
```yaml
|
128
136
|
schd_for_node:
|
129
|
-
type: schedule.
|
137
|
+
type: schedule.Schedule
|
130
138
|
cron: "*/5 * * * *"
|
131
139
|
```
|
132
140
|
|
133
141
|
```python
|
134
|
-
from ddeutil.workflow.schedule import
|
142
|
+
from ddeutil.workflow.schedule import Schedule
|
135
143
|
|
136
|
-
scdl =
|
144
|
+
scdl = Schedule.from_loader(name='schd_for_node', externals={})
|
137
145
|
assert '*/5 * * * *' == str(scdl.cronjob)
|
138
146
|
|
139
147
|
cron_iterate = scdl.generate('2022-01-01 00:00:00')
|
@@ -234,14 +242,10 @@ pipe_el_pg_to_lake:
|
|
234
242
|
pipe_hook_mssql_proc:
|
235
243
|
type: ddeutil.workflow.pipe.Pipeline
|
236
244
|
params:
|
237
|
-
run_date:
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
source_name:
|
242
|
-
type: str
|
243
|
-
target_name:
|
244
|
-
type: str
|
245
|
+
run_date: datetime
|
246
|
+
sp_name: str
|
247
|
+
source_name: str
|
248
|
+
target_name: str
|
245
249
|
jobs:
|
246
250
|
transform:
|
247
251
|
stages:
|
@@ -48,7 +48,13 @@ Out of you data that want to use in pipeline of workflow. Some of this component
|
|
48
48
|
is similar component of the **Airflow** because I like it concepts.
|
49
49
|
|
50
50
|
The main feature of this project is the `Pipeline` object that can call any
|
51
|
-
|
51
|
+
registries function. The pipeline can handle everything that you want to do, it
|
52
|
+
will passing parameters and catching the output for re-use it to next step.
|
53
|
+
|
54
|
+
> [!IMPORTANT]
|
55
|
+
> In the future of this project, I will drop the connection and dataset to
|
56
|
+
> dynamic registries instead of main features because it have a lot of maintain
|
57
|
+
> vendor codes and deps. (I do not have time to handle this features)
|
52
58
|
|
53
59
|
### Connection
|
54
60
|
|
@@ -69,7 +75,9 @@ assert conn.ping()
|
|
69
75
|
|
70
76
|
### Dataset
|
71
77
|
|
72
|
-
The dataset is define any objects on the connection.
|
78
|
+
The dataset is define any objects on the connection. This feature was implemented
|
79
|
+
on `/vendors` because it has a lot of tools that can interact with any data systems
|
80
|
+
in the data tool stacks.
|
73
81
|
|
74
82
|
```yaml
|
75
83
|
ds_postgres_customer_tbl:
|
@@ -81,7 +89,7 @@ ds_postgres_customer_tbl:
|
|
81
89
|
```
|
82
90
|
|
83
91
|
```python
|
84
|
-
from ddeutil.workflow.
|
92
|
+
from ddeutil.workflow.vendors.pg import PostgresTbl
|
85
93
|
|
86
94
|
dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
|
87
95
|
assert dataset.exists()
|
@@ -91,14 +99,14 @@ assert dataset.exists()
|
|
91
99
|
|
92
100
|
```yaml
|
93
101
|
schd_for_node:
|
94
|
-
type: schedule.
|
102
|
+
type: schedule.Schedule
|
95
103
|
cron: "*/5 * * * *"
|
96
104
|
```
|
97
105
|
|
98
106
|
```python
|
99
|
-
from ddeutil.workflow.schedule import
|
107
|
+
from ddeutil.workflow.schedule import Schedule
|
100
108
|
|
101
|
-
scdl =
|
109
|
+
scdl = Schedule.from_loader(name='schd_for_node', externals={})
|
102
110
|
assert '*/5 * * * *' == str(scdl.cronjob)
|
103
111
|
|
104
112
|
cron_iterate = scdl.generate('2022-01-01 00:00:00')
|
@@ -199,14 +207,10 @@ pipe_el_pg_to_lake:
|
|
199
207
|
pipe_hook_mssql_proc:
|
200
208
|
type: ddeutil.workflow.pipe.Pipeline
|
201
209
|
params:
|
202
|
-
run_date:
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
source_name:
|
207
|
-
type: str
|
208
|
-
target_name:
|
209
|
-
type: str
|
210
|
+
run_date: datetime
|
211
|
+
sp_name: str
|
212
|
+
source_name: str
|
213
|
+
target_name: str
|
210
214
|
jobs:
|
211
215
|
transform:
|
212
216
|
stages:
|
@@ -79,6 +79,10 @@ addopts = [
|
|
79
79
|
"--strict-markers",
|
80
80
|
]
|
81
81
|
filterwarnings = ["error"]
|
82
|
+
log_cli = true
|
83
|
+
log_cli_level = "INFO"
|
84
|
+
log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
|
85
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
82
86
|
|
83
87
|
[tool.black]
|
84
88
|
line-length = 80
|
@@ -97,7 +101,6 @@ exclude = """
|
|
97
101
|
| build
|
98
102
|
| dist
|
99
103
|
| venv
|
100
|
-
| __legacy
|
101
104
|
)/
|
102
105
|
)
|
103
106
|
"""
|
@@ -113,7 +116,6 @@ exclude = [
|
|
113
116
|
"build",
|
114
117
|
"dist",
|
115
118
|
"venv",
|
116
|
-
"__legacy",
|
117
119
|
]
|
118
120
|
|
119
121
|
[tool.ruff.lint]
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__: str = "0.0.3"
|
@@ -43,8 +43,14 @@ class BaseConn(BaseModel):
|
|
43
43
|
]
|
44
44
|
|
45
45
|
@classmethod
|
46
|
-
def from_dict(cls, values: DictData):
|
47
|
-
"""Construct Connection
|
46
|
+
def from_dict(cls, values: DictData) -> Self:
|
47
|
+
"""Construct Connection Model from dict data. This construct is
|
48
|
+
different with ``.model_validate()`` because it will prepare the values
|
49
|
+
before using it if the data dose not have 'url'.
|
50
|
+
|
51
|
+
:param values: A dict data that use to construct this model.
|
52
|
+
"""
|
53
|
+
# NOTE: filter out the fields of this model.
|
48
54
|
filter_data: DictData = {
|
49
55
|
k: values.pop(k)
|
50
56
|
for k in values.copy()
|
@@ -73,15 +79,11 @@ class BaseConn(BaseModel):
|
|
73
79
|
)
|
74
80
|
|
75
81
|
@classmethod
|
76
|
-
def from_loader(
|
77
|
-
cls,
|
78
|
-
name: str,
|
79
|
-
externals: DictData,
|
80
|
-
) -> Self:
|
82
|
+
def from_loader(cls, name: str, externals: DictData) -> Self:
|
81
83
|
"""Construct Connection with Loader object with specific config name.
|
82
84
|
|
83
|
-
:param name:
|
84
|
-
:param externals:
|
85
|
+
:param name: A config name.
|
86
|
+
:param externals: A external data that want to adding to extras.
|
85
87
|
"""
|
86
88
|
loader: Loader = Loader(name, externals=externals)
|
87
89
|
# NOTE: Validate the config type match with current connection model
|
@@ -96,6 +98,7 @@ class BaseConn(BaseModel):
|
|
96
98
|
|
97
99
|
@field_validator("endpoint")
|
98
100
|
def __prepare_slash(cls, value: str) -> str:
|
101
|
+
"""Prepare slash character that map double form URL model loading."""
|
99
102
|
if value.startswith("//"):
|
100
103
|
return value[1:]
|
101
104
|
return value
|
@@ -148,7 +151,7 @@ class SFTP(Conn):
|
|
148
151
|
dialect: Literal["sftp"] = "sftp"
|
149
152
|
|
150
153
|
def __client(self):
|
151
|
-
from .vendors.
|
154
|
+
from .vendors.sftp import WrapSFTP
|
152
155
|
|
153
156
|
return WrapSFTP(
|
154
157
|
host=self.host,
|
@@ -9,24 +9,4 @@ Define Errors Object for Node package
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
11
|
|
12
|
-
class BaseError(Exception):
|
13
|
-
"""Base Error Object that use for catch any errors statement of
|
14
|
-
all step in this src
|
15
|
-
"""
|
16
|
-
|
17
|
-
|
18
|
-
class WorkflowBaseError(BaseError):
|
19
|
-
"""Core Base Error object"""
|
20
|
-
|
21
|
-
|
22
|
-
class ConfigNotFound(WorkflowBaseError):
|
23
|
-
"""Error raise for a method not found the config file or data."""
|
24
|
-
|
25
|
-
|
26
|
-
class PyException(Exception): ...
|
27
|
-
|
28
|
-
|
29
|
-
class ShellException(Exception): ...
|
30
|
-
|
31
|
-
|
32
12
|
class TaskException(Exception): ...
|
@@ -6,7 +6,7 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
from functools import cached_property
|
9
|
-
from typing import Any, TypeVar
|
9
|
+
from typing import Any, ClassVar, TypeVar
|
10
10
|
|
11
11
|
from ddeutil.core import (
|
12
12
|
getdot,
|
@@ -14,12 +14,12 @@ from ddeutil.core import (
|
|
14
14
|
import_string,
|
15
15
|
)
|
16
16
|
from ddeutil.io import (
|
17
|
-
|
18
|
-
Params,
|
17
|
+
PathData,
|
19
18
|
PathSearch,
|
20
19
|
YamlEnvFl,
|
21
20
|
)
|
22
|
-
from pydantic import BaseModel
|
21
|
+
from pydantic import BaseModel, Field
|
22
|
+
from pydantic.functional_validators import model_validator
|
23
23
|
|
24
24
|
from .__regex import RegexConf
|
25
25
|
from .__types import DictData
|
@@ -29,6 +29,25 @@ BaseModelType = type[BaseModel]
|
|
29
29
|
AnyModel = TypeVar("AnyModel", bound=BaseModel)
|
30
30
|
|
31
31
|
|
32
|
+
class Engine(BaseModel):
|
33
|
+
"""Engine Model"""
|
34
|
+
|
35
|
+
paths: PathData = Field(default_factory=PathData)
|
36
|
+
registry: list[str] = Field(default_factory=lambda: ["ddeutil.workflow"])
|
37
|
+
|
38
|
+
@model_validator(mode="before")
|
39
|
+
def __prepare_registry(cls, values: DictData) -> DictData:
|
40
|
+
if (_regis := values.get("registry")) and isinstance(_regis, str):
|
41
|
+
values["registry"] = [_regis]
|
42
|
+
return values
|
43
|
+
|
44
|
+
|
45
|
+
class Params(BaseModel):
|
46
|
+
"""Params Model"""
|
47
|
+
|
48
|
+
engine: Engine = Field(default_factory=Engine)
|
49
|
+
|
50
|
+
|
32
51
|
class SimLoad:
|
33
52
|
"""Simple Load Object that will search config data by name.
|
34
53
|
|
@@ -36,13 +55,11 @@ class SimLoad:
|
|
36
55
|
:param params: A Params model object.
|
37
56
|
:param externals: An external parameters
|
38
57
|
|
39
|
-
|
58
|
+
Noted:
|
40
59
|
The config data should have ``type`` key for engine can know what is
|
41
60
|
config should to do next.
|
42
61
|
"""
|
43
62
|
|
44
|
-
import_prefix: str = "ddeutil.workflow"
|
45
|
-
|
46
63
|
def __init__(
|
47
64
|
self,
|
48
65
|
name: str,
|
@@ -56,7 +73,7 @@ class SimLoad:
|
|
56
73
|
):
|
57
74
|
self.data = data
|
58
75
|
if not self.data:
|
59
|
-
raise
|
76
|
+
raise ValueError(f"Config {name!r} does not found on conf path")
|
60
77
|
self.__conf_params: Params = params
|
61
78
|
self.externals: DictData = externals
|
62
79
|
|
@@ -75,6 +92,11 @@ class SimLoad:
|
|
75
92
|
# NOTE: Auto adding module prefix if it does not set
|
76
93
|
return import_string(f"ddeutil.workflow.{_typ}")
|
77
94
|
except ModuleNotFoundError:
|
95
|
+
for registry in self.conf_params.engine.registry:
|
96
|
+
try:
|
97
|
+
return import_string(f"{registry}.{_typ}")
|
98
|
+
except ModuleNotFoundError:
|
99
|
+
continue
|
78
100
|
return import_string(f"{_typ}")
|
79
101
|
|
80
102
|
def load(self) -> AnyModel:
|
@@ -82,12 +104,14 @@ class SimLoad:
|
|
82
104
|
|
83
105
|
|
84
106
|
class Loader(SimLoad):
|
85
|
-
"""Main Loader Object.
|
107
|
+
"""Main Loader Object that get the config `yaml` file from current path.
|
86
108
|
|
87
109
|
:param name: A name of config data that will read by Yaml Loader object.
|
88
110
|
:param externals: An external parameters
|
89
111
|
"""
|
90
112
|
|
113
|
+
conf_name: ClassVar[str] = "workflows-conf"
|
114
|
+
|
91
115
|
def __init__(
|
92
116
|
self,
|
93
117
|
name: str,
|
@@ -106,12 +130,16 @@ class Loader(SimLoad):
|
|
106
130
|
def config(cls, path: str | None = None) -> Params:
|
107
131
|
"""Load Config data from ``workflows-conf.yaml`` file."""
|
108
132
|
return Params.model_validate(
|
109
|
-
YamlEnvFl(path or "./
|
133
|
+
YamlEnvFl(path or f"./{cls.conf_name}.yaml").read()
|
110
134
|
)
|
111
135
|
|
112
136
|
|
113
137
|
def map_params(value: Any, params: dict[str, Any]) -> Any:
|
114
|
-
"""Map caller value that found from ``RE_CALLER``
|
138
|
+
"""Map caller value that found from ``RE_CALLER`` regular expression.
|
139
|
+
|
140
|
+
:param value: A value that want to mapped with an params
|
141
|
+
:param params: A parameter value that getting with matched regular
|
142
|
+
expression.
|
115
143
|
|
116
144
|
:rtype: Any
|
117
145
|
:returns: An any getter value from the params input.
|