ddeutil-workflow 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__types.py +1 -0
- ddeutil/workflow/conn.py +33 -28
- ddeutil/workflow/exceptions.py +0 -70
- ddeutil/workflow/loader.py +55 -191
- ddeutil/workflow/pipeline.py +264 -110
- ddeutil/workflow/schedule.py +10 -15
- ddeutil/workflow/tasks/__init__.py +6 -10
- ddeutil/workflow/tasks/_pandas.py +54 -0
- ddeutil/workflow/tasks/_polars.py +55 -4
- ddeutil/workflow/utils.py +180 -0
- ddeutil/workflow/vendors/__dataset.py +127 -0
- ddeutil/workflow/vendors/pd.py +13 -0
- ddeutil/workflow/vendors/pg.py +11 -0
- ddeutil/workflow/{dataset.py → vendors/pl.py} +4 -138
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/METADATA +35 -20
- ddeutil_workflow-0.0.3.dist-info/RECORD +29 -0
- ddeutil/workflow/hooks/__init__.py +0 -9
- ddeutil/workflow/hooks/_postgres.py +0 -2
- ddeutil/workflow/utils/receive.py +0 -33
- ddeutil/workflow/utils/selection.py +0 -2
- ddeutil_workflow-0.0.1.dist-info/RECORD +0 -28
- /ddeutil/workflow/vendors/{aws_warpped.py → aws.py} +0 -0
- /ddeutil/workflow/{utils/__init__.py → vendors/az.py} +0 -0
- /ddeutil/workflow/vendors/{minio_warpped.py → minio.py} +0 -0
- /ddeutil/workflow/vendors/{sftp_wrapped.py → sftp.py} +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: Data Developer & Engineer Workflow Utility Objects
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -23,15 +23,14 @@ Description-Content-Type: text/markdown
|
|
23
23
|
License-File: LICENSE
|
24
24
|
Requires-Dist: fmtutil
|
25
25
|
Requires-Dist: ddeutil-io
|
26
|
-
Requires-Dist: ddeutil-model
|
27
26
|
Requires-Dist: python-dotenv
|
28
27
|
Provides-Extra: test
|
29
28
|
Requires-Dist: sqlalchemy ==2.0.30 ; extra == 'test'
|
30
29
|
Requires-Dist: paramiko ==3.4.0 ; extra == 'test'
|
31
30
|
Requires-Dist: sshtunnel ==0.4.0 ; extra == 'test'
|
32
|
-
Requires-Dist: boto3 ==1.34.
|
31
|
+
Requires-Dist: boto3 ==1.34.117 ; extra == 'test'
|
33
32
|
Requires-Dist: fsspec ==2024.5.0 ; extra == 'test'
|
34
|
-
Requires-Dist: polars ==0.20.
|
33
|
+
Requires-Dist: polars ==0.20.31 ; extra == 'test'
|
35
34
|
Requires-Dist: pyarrow ==16.1.0 ; extra == 'test'
|
36
35
|
|
37
36
|
# Data Utility: _Workflow_
|
@@ -83,6 +82,15 @@ The first step, you should start create the connections and datasets for In and
|
|
83
82
|
Out of you data that want to use in pipeline of workflow. Some of this component
|
84
83
|
is similar component of the **Airflow** because I like it concepts.
|
85
84
|
|
85
|
+
The main feature of this project is the `Pipeline` object that can call any
|
86
|
+
registries function. The pipeline can handle everything that you want to do, it
|
87
|
+
will passing parameters and catching the output for re-use it to next step.
|
88
|
+
|
89
|
+
> [!IMPORTANT]
|
90
|
+
> In the future of this project, I will drop the connection and dataset to
|
91
|
+
> dynamic registries instead of main features because it have a lot of maintain
|
92
|
+
> vendor codes and deps. (I do not have time to handle this features)
|
93
|
+
|
86
94
|
### Connection
|
87
95
|
|
88
96
|
The connection for worker able to do any thing.
|
@@ -102,7 +110,9 @@ assert conn.ping()
|
|
102
110
|
|
103
111
|
### Dataset
|
104
112
|
|
105
|
-
The dataset is define any objects on the connection.
|
113
|
+
The dataset is define any objects on the connection. This feature was implemented
|
114
|
+
on `/vendors` because it has a lot of tools that can interact with any data systems
|
115
|
+
in the data tool stacks.
|
106
116
|
|
107
117
|
```yaml
|
108
118
|
ds_postgres_customer_tbl:
|
@@ -114,7 +124,7 @@ ds_postgres_customer_tbl:
|
|
114
124
|
```
|
115
125
|
|
116
126
|
```python
|
117
|
-
from ddeutil.workflow.
|
127
|
+
from ddeutil.workflow.vendors.pg import PostgresTbl
|
118
128
|
|
119
129
|
dataset = PostgresTbl.from_loader(name='ds_postgres_customer_tbl', externals={})
|
120
130
|
assert dataset.exists()
|
@@ -124,14 +134,14 @@ assert dataset.exists()
|
|
124
134
|
|
125
135
|
```yaml
|
126
136
|
schd_for_node:
|
127
|
-
type: schedule.
|
137
|
+
type: schedule.Schedule
|
128
138
|
cron: "*/5 * * * *"
|
129
139
|
```
|
130
140
|
|
131
141
|
```python
|
132
|
-
from ddeutil.workflow.schedule import
|
142
|
+
from ddeutil.workflow.schedule import Schedule
|
133
143
|
|
134
|
-
scdl =
|
144
|
+
scdl = Schedule.from_loader(name='schd_for_node', externals={})
|
135
145
|
assert '*/5 * * * *' == str(scdl.cronjob)
|
136
146
|
|
137
147
|
cron_iterate = scdl.generate('2022-01-01 00:00:00')
|
@@ -155,8 +165,10 @@ The state of doing lists that worker should to do. It be collection of the stage
|
|
155
165
|
run_py_local:
|
156
166
|
type: ddeutil.workflow.pipe.Pipeline
|
157
167
|
params:
|
158
|
-
author-run:
|
159
|
-
|
168
|
+
author-run:
|
169
|
+
type: str
|
170
|
+
run-date:
|
171
|
+
type: datetime
|
160
172
|
jobs:
|
161
173
|
first-job:
|
162
174
|
stages:
|
@@ -203,13 +215,15 @@ pipe.execute(params={'author-run': 'Local Workflow', 'run-date': '2024-01-01'})
|
|
203
215
|
pipe_el_pg_to_lake:
|
204
216
|
type: ddeutil.workflow.pipe.Pipeline
|
205
217
|
params:
|
206
|
-
run-date:
|
207
|
-
|
218
|
+
run-date:
|
219
|
+
type: datetime
|
220
|
+
author-email:
|
221
|
+
type: str
|
208
222
|
jobs:
|
209
223
|
extract-load:
|
210
224
|
stages:
|
211
225
|
- name: "Extract Load from Postgres to Lake"
|
212
|
-
id: extract
|
226
|
+
id: extract-load
|
213
227
|
task: tasks/postgres-to-delta@polars
|
214
228
|
with:
|
215
229
|
source:
|
@@ -222,21 +236,22 @@ pipe_el_pg_to_lake:
|
|
222
236
|
endpoint: "/${{ params.name }}"
|
223
237
|
```
|
224
238
|
|
225
|
-
###
|
239
|
+
### Tasks (Transform)
|
226
240
|
|
227
241
|
```yaml
|
228
242
|
pipe_hook_mssql_proc:
|
229
243
|
type: ddeutil.workflow.pipe.Pipeline
|
230
244
|
params:
|
231
|
-
run_date:
|
232
|
-
sp_name:
|
233
|
-
source_name:
|
234
|
-
target_name:
|
245
|
+
run_date: datetime
|
246
|
+
sp_name: str
|
247
|
+
source_name: str
|
248
|
+
target_name: str
|
235
249
|
jobs:
|
236
250
|
transform:
|
237
251
|
stages:
|
238
252
|
- name: "Transform Data in MS SQL Server"
|
239
|
-
|
253
|
+
id: transform
|
254
|
+
task: tasks/mssql-proc@odbc
|
240
255
|
with:
|
241
256
|
exec: ${{ params.sp_name }}
|
242
257
|
params:
|
@@ -0,0 +1,29 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=smA9c0CTLewINRoxj2VBHoiYDESoFGtXYFDvRT31dgs,27
|
2
|
+
ddeutil/workflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
ddeutil/workflow/__regex.py,sha256=bOngaQ0zJgy3vfNwF2MlI8XhLu_Ei1Vz8y50iLj8ao4,1061
|
4
|
+
ddeutil/workflow/__types.py,sha256=AkpQq6QlrclpurCZZVY9RMxoyS9z2WGzhaz_ikeTaCU,453
|
5
|
+
ddeutil/workflow/conn.py,sha256=POtNcyqFNGxZnkg5J_H1OIvQVnnqG-ajmBBzjoHl9sg,7238
|
6
|
+
ddeutil/workflow/exceptions.py,sha256=XAq82VHSMLNb4UjGatp7hYfjxFtMiKFtBqJyAhwTl-s,434
|
7
|
+
ddeutil/workflow/loader.py,sha256=TXS4k2dqNycBYSTYcJ80WIsPMKNZbHNeBbcufX6lrJc,5483
|
8
|
+
ddeutil/workflow/pipeline.py,sha256=fG6ta-SNx4OWS6n8w7YpYDadfnbqayj8A1uY03TvLUA,16942
|
9
|
+
ddeutil/workflow/schedule.py,sha256=RMbTC7L32D3fJ5gYxJDCn-vPr2RYEBMSD0G2kj1Qows,2712
|
10
|
+
ddeutil/workflow/utils.py,sha256=z7evB9kOsgTr30uVuL994bmOMDNZB5xDY2KjO7gL1dc,5379
|
11
|
+
ddeutil/workflow/tasks/__init__.py,sha256=TIcw9JinrdepWgyazSMLk_QflUFms99ILI4GvLHUGD0,338
|
12
|
+
ddeutil/workflow/tasks/_pandas.py,sha256=rqz5_VMSqkEdirk7i3EElZoqnRYFyyK_Z8_Zt8FyeTg,1693
|
13
|
+
ddeutil/workflow/tasks/_polars.py,sha256=SYEBx-0I9tbY046QGSMokVugK8Fqjhiw4dzpL6y6Hww,2917
|
14
|
+
ddeutil/workflow/vendors/__dataset.py,sha256=n9EwTIoVlgCKyCQQgQrijeoQgauOoKtdKiWCzSIErns,4065
|
15
|
+
ddeutil/workflow/vendors/__dict.py,sha256=ETwkeA0qzKNgedfeRgAz1qShNXTIXIS4DXzJB4lM4jo,9962
|
16
|
+
ddeutil/workflow/vendors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
ddeutil/workflow/vendors/__schedule.py,sha256=cUIwtTli9G0klrNFxAIbG3VTiv6umRuNAZiKA-kOtpE,20690
|
18
|
+
ddeutil/workflow/vendors/aws.py,sha256=zjq_LCu3ffVBRrxS2vqss9X24yrtuAEt9ouy2_WvS0o,5980
|
19
|
+
ddeutil/workflow/vendors/az.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
+
ddeutil/workflow/vendors/minio.py,sha256=pScLy38Du9moOrGaSBSFsoQRhiQ686FQyloOeLA0OQk,261
|
21
|
+
ddeutil/workflow/vendors/pd.py,sha256=J6Nkb4RqUnz3NMfo3cHX-Udw3HPjqjUimojS86rR4os,116
|
22
|
+
ddeutil/workflow/vendors/pg.py,sha256=TGwkV6nsarGLbiRTT_wB4uAy3xCR89EPPCMWqlWhFe8,422
|
23
|
+
ddeutil/workflow/vendors/pl.py,sha256=B-l9zcZ9vATAKVMLv5tjKiWo5Qt8ZIv_aQzuVFinKbY,5087
|
24
|
+
ddeutil/workflow/vendors/sftp.py,sha256=lQn4mnHhgvE9g1pbpoQF7HvZOxab8Z2XaDtSIJvumGM,7090
|
25
|
+
ddeutil_workflow-0.0.3.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
26
|
+
ddeutil_workflow-0.0.3.dist-info/METADATA,sha256=KcsTd-FjufMK-4fhiIq27yeQUuA7NeB8TCkbXADQ1Dc,7992
|
27
|
+
ddeutil_workflow-0.0.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
28
|
+
ddeutil_workflow-0.0.3.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
29
|
+
ddeutil_workflow-0.0.3.dist-info/RECORD,,
|
@@ -1,33 +0,0 @@
|
|
1
|
-
import datetime as dt
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
|
5
|
-
def datetime(value: Any) -> dt.datetime:
|
6
|
-
if isinstance(value, dt.datetime):
|
7
|
-
return value
|
8
|
-
elif isinstance(value, dt.date):
|
9
|
-
return dt.datetime(value.year, value.month, value.day)
|
10
|
-
if value is None:
|
11
|
-
return dt.datetime.now(dt.timezone.utc)
|
12
|
-
elif not isinstance(value, str):
|
13
|
-
raise ValueError(
|
14
|
-
f"Value that want to convert to datetime does not support for "
|
15
|
-
f"type: {type(value)}"
|
16
|
-
)
|
17
|
-
return dt.datetime.fromisoformat(value)
|
18
|
-
|
19
|
-
|
20
|
-
def string(value: Any) -> str:
|
21
|
-
return str(value)
|
22
|
-
|
23
|
-
|
24
|
-
def integer(value: Any) -> int:
|
25
|
-
if not isinstance(value, int):
|
26
|
-
try:
|
27
|
-
return int(str(value))
|
28
|
-
except TypeError as err:
|
29
|
-
raise ValueError(
|
30
|
-
f"Value that want to convert to integer does not support for "
|
31
|
-
f"type: {type(value)}"
|
32
|
-
) from err
|
33
|
-
return value
|
@@ -1,28 +0,0 @@
|
|
1
|
-
ddeutil/workflow/__about__.py,sha256=YZoDMFgikTmgL5EqRKa4mX-zXFr2c20J7dSZJ4dqSd0,27
|
2
|
-
ddeutil/workflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
ddeutil/workflow/__regex.py,sha256=bOngaQ0zJgy3vfNwF2MlI8XhLu_Ei1Vz8y50iLj8ao4,1061
|
4
|
-
ddeutil/workflow/__types.py,sha256=7t-i5_-eif9zUBwb7jnv5RbSjzOihyl3yFDqXzaeyxk,428
|
5
|
-
ddeutil/workflow/conn.py,sha256=5Nf6IQf2PXC6eLn469tGNvtnXAx-eOm3jJ_ltj90VbQ,6831
|
6
|
-
ddeutil/workflow/dataset.py,sha256=SNp58WwTvgDSFWll9aizJeiHX_fd68f687cRME7vBo4,8599
|
7
|
-
ddeutil/workflow/exceptions.py,sha256=aV_LN4_zmAk2R2p5ZYJcSMrsdBsRo6cMwm0lwZv_400,2399
|
8
|
-
ddeutil/workflow/loader.py,sha256=nJffdJ8kchhwYsEGnWoG_leR16dzU0evo_p5Wbi4rp4,9710
|
9
|
-
ddeutil/workflow/pipeline.py,sha256=kN6k2ht4C2OPHZYPljpnUQqfY3K54kMSOS1azGt7R68,10755
|
10
|
-
ddeutil/workflow/schedule.py,sha256=RSvT0pi1Gb1un3OseRUW9AngRgVuAlXFT23QrSjtxYk,2828
|
11
|
-
ddeutil/workflow/hooks/__init__.py,sha256=x5pY43037-cE0j3ofU_f7oEylzx-sN7pQwhiZubBtvY,199
|
12
|
-
ddeutil/workflow/hooks/_postgres.py,sha256=8bAAlxHe_K1sAqwE1Pz6y6cKuytimmPnaEeudEPag2A,37
|
13
|
-
ddeutil/workflow/tasks/__init__.py,sha256=NgGzbxWBV6lRgIEvp4VuREbKzo_ncTLgx00W2BAR-uk,276
|
14
|
-
ddeutil/workflow/tasks/_polars.py,sha256=OT6kt7mYHx88DEJsO8sfnIfcjdXZ02XCAAcQBKr6wXM,1429
|
15
|
-
ddeutil/workflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
ddeutil/workflow/utils/receive.py,sha256=lWB8k-_hPmdkq2bjPSL_2IX1X19I8f017u8-7jNRH68,939
|
17
|
-
ddeutil/workflow/utils/selection.py,sha256=0qJu4mgVfNLLlnBbrsQtKK__LmG_DZf2Gs8CBplP4c0,34
|
18
|
-
ddeutil/workflow/vendors/__dict.py,sha256=ETwkeA0qzKNgedfeRgAz1qShNXTIXIS4DXzJB4lM4jo,9962
|
19
|
-
ddeutil/workflow/vendors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
ddeutil/workflow/vendors/__schedule.py,sha256=cUIwtTli9G0klrNFxAIbG3VTiv6umRuNAZiKA-kOtpE,20690
|
21
|
-
ddeutil/workflow/vendors/aws_warpped.py,sha256=zjq_LCu3ffVBRrxS2vqss9X24yrtuAEt9ouy2_WvS0o,5980
|
22
|
-
ddeutil/workflow/vendors/minio_warpped.py,sha256=pScLy38Du9moOrGaSBSFsoQRhiQ686FQyloOeLA0OQk,261
|
23
|
-
ddeutil/workflow/vendors/sftp_wrapped.py,sha256=lQn4mnHhgvE9g1pbpoQF7HvZOxab8Z2XaDtSIJvumGM,7090
|
24
|
-
ddeutil_workflow-0.0.1.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
25
|
-
ddeutil_workflow-0.0.1.dist-info/METADATA,sha256=uFH3MTuZk6UNPjLIQqaQSebszywROMUMzjLMftY0OM0,7444
|
26
|
-
ddeutil_workflow-0.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
27
|
-
ddeutil_workflow-0.0.1.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
28
|
-
ddeutil_workflow-0.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|