ddeutil-workflow 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__types.py +1 -0
- ddeutil/workflow/conn.py +13 -10
- ddeutil/workflow/exceptions.py +0 -20
- ddeutil/workflow/loader.py +39 -11
- ddeutil/workflow/pipeline.py +223 -162
- ddeutil/workflow/schedule.py +7 -7
- ddeutil/workflow/tasks/_pandas.py +1 -1
- ddeutil/workflow/tasks/_polars.py +10 -2
- ddeutil/workflow/utils.py +123 -1
- ddeutil/workflow/vendors/__dataset.py +127 -0
- ddeutil/workflow/vendors/az.py +0 -0
- ddeutil/workflow/vendors/pd.py +13 -0
- ddeutil/workflow/vendors/pg.py +11 -0
- ddeutil/workflow/{dataset.py → vendors/pl.py} +3 -133
- {ddeutil_workflow-0.0.2.dist-info → ddeutil_workflow-0.0.4.dist-info}/METADATA +19 -15
- ddeutil_workflow-0.0.4.dist-info/RECORD +29 -0
- ddeutil_workflow-0.0.2.dist-info/RECORD +0 -25
- /ddeutil/workflow/vendors/{aws_warpped.py → aws.py} +0 -0
- /ddeutil/workflow/vendors/{minio_warpped.py → minio.py} +0 -0
- /ddeutil/workflow/vendors/{sftp_wrapped.py → sftp.py} +0 -0
- {ddeutil_workflow-0.0.2.dist-info → ddeutil_workflow-0.0.4.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.2.dist-info → ddeutil_workflow-0.0.4.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.2.dist-info → ddeutil_workflow-0.0.4.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
CHANGED
@@ -5,39 +5,57 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import contextlib
|
8
9
|
import inspect
|
10
|
+
import itertools
|
9
11
|
import logging
|
10
12
|
import subprocess
|
13
|
+
import sys
|
14
|
+
import time
|
15
|
+
import uuid
|
11
16
|
from abc import ABC, abstractmethod
|
12
|
-
from datetime import date, datetime
|
13
17
|
from inspect import Parameter
|
18
|
+
from pathlib import Path
|
19
|
+
from queue import Queue
|
14
20
|
from subprocess import CompletedProcess
|
15
|
-
from typing import Any, Callable,
|
21
|
+
from typing import Any, Callable, Optional, Union
|
16
22
|
|
17
|
-
|
23
|
+
import msgspec as spec
|
18
24
|
from pydantic import BaseModel, Field
|
19
25
|
from pydantic.functional_validators import model_validator
|
20
26
|
from typing_extensions import Self
|
21
27
|
|
22
28
|
from .__regex import RegexConf
|
23
|
-
from .__types import DictData
|
24
|
-
from .exceptions import
|
29
|
+
from .__types import DictData, DictStr
|
30
|
+
from .exceptions import TaskException
|
25
31
|
from .loader import Loader, map_params
|
26
|
-
from .utils import make_registry
|
32
|
+
from .utils import Params, make_exec, make_registry
|
27
33
|
|
28
34
|
|
29
35
|
class BaseStage(BaseModel, ABC):
|
30
|
-
"""Base Stage Model."""
|
31
|
-
|
32
|
-
id: Optional[str] =
|
33
|
-
|
36
|
+
"""Base Stage Model that keep only id and name fields."""
|
37
|
+
|
38
|
+
id: Optional[str] = Field(
|
39
|
+
default=None,
|
40
|
+
description=(
|
41
|
+
"The stage ID that use to keep execution output or getting by job "
|
42
|
+
"owner."
|
43
|
+
),
|
44
|
+
)
|
45
|
+
name: str = Field(
|
46
|
+
description="The stage name that want to logging when start execution."
|
47
|
+
)
|
34
48
|
|
35
49
|
@abstractmethod
|
36
50
|
def execute(self, params: DictData) -> DictData:
|
51
|
+
"""Execute abstraction method that action something by sub-model class.
|
52
|
+
|
53
|
+
:param params: A parameter data that want to use in this execution.
|
54
|
+
"""
|
37
55
|
raise NotImplementedError("Stage should implement ``execute`` method.")
|
38
56
|
|
39
57
|
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
40
|
-
"""Set outputs to params"""
|
58
|
+
"""Set an outputs from execution process to an input params."""
|
41
59
|
if self.id is None:
|
42
60
|
return params
|
43
61
|
|
@@ -61,12 +79,30 @@ class ShellStage(BaseStage):
|
|
61
79
|
"""Shell statement stage."""
|
62
80
|
|
63
81
|
shell: str
|
64
|
-
env:
|
82
|
+
env: DictStr = Field(default_factory=dict)
|
65
83
|
|
66
84
|
@staticmethod
|
85
|
+
@contextlib.contextmanager
|
67
86
|
def __prepare_shell(shell: str):
|
68
|
-
"""
|
69
|
-
|
87
|
+
"""Return context of prepared shell statement that want to execute. This
|
88
|
+
step will write the `.sh` file before giving this file name to context.
|
89
|
+
After that, it will auto delete this file automatic.
|
90
|
+
|
91
|
+
:param shell: A shell statement that want to prepare.
|
92
|
+
"""
|
93
|
+
f_name: str = f"{uuid.uuid4()}.sh"
|
94
|
+
f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
|
95
|
+
with open(f"./{f_name}", mode="w", newline="\n") as f:
|
96
|
+
f.write(f"#!/bin/{f_shebang}\n")
|
97
|
+
|
98
|
+
# NOTE: make sure that shell script file does not have `\r` char.
|
99
|
+
f.write(shell.replace("\r\n", "\n"))
|
100
|
+
|
101
|
+
make_exec(f"./{f_name}")
|
102
|
+
|
103
|
+
yield [f_shebang, f_name]
|
104
|
+
|
105
|
+
Path(f_name).unlink()
|
70
106
|
|
71
107
|
def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
|
72
108
|
"""Set outputs to params"""
|
@@ -81,8 +117,7 @@ class ShellStage(BaseStage):
|
|
81
117
|
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
82
118
|
"outputs": {
|
83
119
|
"return_code": rs.returncode,
|
84
|
-
"stdout": rs.stdout,
|
85
|
-
"stderr": rs.stderr,
|
120
|
+
"stdout": rs.stdout.rstrip("\n"),
|
86
121
|
},
|
87
122
|
}
|
88
123
|
return params
|
@@ -91,19 +126,21 @@ class ShellStage(BaseStage):
|
|
91
126
|
"""Execute the Shell & Powershell statement with the Python build-in
|
92
127
|
``subprocess`` package.
|
93
128
|
"""
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
129
|
+
with self.__prepare_shell(self.shell) as sh:
|
130
|
+
with open(sh[-1]) as f:
|
131
|
+
logging.debug(f.read())
|
132
|
+
logging.info(f"Shell-Execute: {sh}")
|
133
|
+
rs: CompletedProcess = subprocess.run(
|
134
|
+
sh,
|
135
|
+
shell=False,
|
136
|
+
capture_output=True,
|
137
|
+
text=True,
|
138
|
+
)
|
100
139
|
if rs.returncode > 0:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
# f"{self.shell}"
|
106
|
-
# )
|
140
|
+
logging.error(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
|
141
|
+
raise TaskException(
|
142
|
+
f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}"
|
143
|
+
)
|
107
144
|
self.set_outputs(rs, params)
|
108
145
|
return params
|
109
146
|
|
@@ -116,7 +153,7 @@ class PyStage(BaseStage):
|
|
116
153
|
run: str
|
117
154
|
vars: DictData = Field(default_factory=dict)
|
118
155
|
|
119
|
-
def
|
156
|
+
def get_vars(self, params: DictData) -> DictData:
|
120
157
|
"""Return variables"""
|
121
158
|
rs = self.vars.copy()
|
122
159
|
for p, v in self.vars.items():
|
@@ -149,12 +186,12 @@ class PyStage(BaseStage):
|
|
149
186
|
:returns: A parameters from an input that was mapped output if the stage
|
150
187
|
ID was set.
|
151
188
|
"""
|
152
|
-
_globals: DictData = globals() | params | self.
|
189
|
+
_globals: DictData = globals() | params | self.get_vars(params)
|
153
190
|
_locals: DictData = {}
|
154
191
|
try:
|
155
192
|
exec(map_params(self.run, params), _globals, _locals)
|
156
193
|
except Exception as err:
|
157
|
-
raise
|
194
|
+
raise TaskException(
|
158
195
|
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
159
196
|
f"{self.run}"
|
160
197
|
) from None
|
@@ -164,13 +201,17 @@ class PyStage(BaseStage):
|
|
164
201
|
return params | {k: _globals[k] for k in params if k in _globals}
|
165
202
|
|
166
203
|
|
167
|
-
class TaskSearch(
|
168
|
-
"""Task Search
|
204
|
+
class TaskSearch(spec.Struct, kw_only=True, tag="task"):
|
205
|
+
"""Task Search Struct that use the `msgspec` for the best performance."""
|
169
206
|
|
170
207
|
path: str
|
171
208
|
func: str
|
172
209
|
tag: str
|
173
210
|
|
211
|
+
def to_dict(self) -> DictData:
|
212
|
+
"""Return dict data from struct fields."""
|
213
|
+
return {f: getattr(self, f) for f in self.__struct_fields__}
|
214
|
+
|
174
215
|
|
175
216
|
class TaskStage(BaseStage):
|
176
217
|
"""Task executor stage that running the Python function."""
|
@@ -183,7 +224,7 @@ class TaskStage(BaseStage):
|
|
183
224
|
"""Extract Task string value to task function."""
|
184
225
|
if not (found := RegexConf.RE_TASK_FMT.search(task)):
|
185
226
|
raise ValueError("Task does not match with task format regex.")
|
186
|
-
tasks = TaskSearch(**found.groupdict())
|
227
|
+
tasks: TaskSearch = TaskSearch(**found.groupdict())
|
187
228
|
|
188
229
|
# NOTE: Registry object should implement on this package only.
|
189
230
|
# TODO: This prefix value to search registry should dynamic with
|
@@ -238,153 +279,131 @@ Stage = Union[
|
|
238
279
|
|
239
280
|
|
240
281
|
class Strategy(BaseModel):
|
241
|
-
"""Strategy Model
|
282
|
+
"""Strategy Model that will combine a matrix together for running the
|
283
|
+
special job.
|
284
|
+
|
285
|
+
Examples:
|
286
|
+
>>> strategy = {
|
287
|
+
... 'matrix': {
|
288
|
+
... 'first': [1, 2, 3],
|
289
|
+
... 'second': ['foo', 'bar']
|
290
|
+
... },
|
291
|
+
... 'include': [{'first': 4, 'second': 'foo'}],
|
292
|
+
... 'exclude': [{'first': 1, 'second': 'bar'}],
|
293
|
+
... }
|
294
|
+
"""
|
295
|
+
|
296
|
+
fail_fast: bool = Field(default=False)
|
297
|
+
max_parallel: int = Field(default=-1)
|
298
|
+
matrix: dict[str, Union[list[str], list[int]]] = Field(default_factory=dict)
|
299
|
+
include: list[dict[str, Union[str, int]]] = Field(default_factory=list)
|
300
|
+
exclude: list[dict[str, Union[str, int]]] = Field(default_factory=list)
|
242
301
|
|
243
|
-
|
244
|
-
|
245
|
-
|
302
|
+
@model_validator(mode="before")
|
303
|
+
def __prepare_keys(cls, values: DictData) -> DictData:
|
304
|
+
if "max-parallel" in values:
|
305
|
+
values["max_parallel"] = values.pop("max-parallel")
|
306
|
+
if "fail-fast" in values:
|
307
|
+
values["fail_fast"] = values.pop("fail-fast")
|
308
|
+
return values
|
246
309
|
|
247
310
|
|
248
311
|
class Job(BaseModel):
|
249
312
|
"""Job Model"""
|
250
313
|
|
314
|
+
runs_on: Optional[str] = Field(default=None)
|
251
315
|
stages: list[Stage] = Field(default_factory=list)
|
252
316
|
needs: list[str] = Field(default_factory=list)
|
253
317
|
strategy: Strategy = Field(default_factory=Strategy)
|
254
318
|
|
319
|
+
@model_validator(mode="before")
|
320
|
+
def __prepare_keys(cls, values: DictData) -> DictData:
|
321
|
+
if "runs-on" in values:
|
322
|
+
values["runs_on"] = values.pop("runs-on")
|
323
|
+
return values
|
324
|
+
|
255
325
|
def stage(self, stage_id: str) -> Stage:
|
326
|
+
"""Return stage model that match with an input stage ID."""
|
256
327
|
for stage in self.stages:
|
257
328
|
if stage_id == (stage.id or ""):
|
258
329
|
return stage
|
259
330
|
raise ValueError(f"Stage ID {stage_id} does not exists")
|
260
331
|
|
332
|
+
def make_strategy(self) -> list[DictStr]:
|
333
|
+
"""Return List of combination of matrix values that already filter with
|
334
|
+
exclude and add include values.
|
335
|
+
"""
|
336
|
+
if not (mt := self.strategy.matrix):
|
337
|
+
return [{}]
|
338
|
+
final: list[DictStr] = []
|
339
|
+
for r in [
|
340
|
+
{_k: _v for e in mapped for _k, _v in e.items()}
|
341
|
+
for mapped in itertools.product(
|
342
|
+
*[[{k: v} for v in vs] for k, vs in mt.items()]
|
343
|
+
)
|
344
|
+
]:
|
345
|
+
if any(
|
346
|
+
all(r[k] == v for k, v in exclude.items())
|
347
|
+
for exclude in self.strategy.exclude
|
348
|
+
):
|
349
|
+
continue
|
350
|
+
final.append(r)
|
351
|
+
|
352
|
+
if not final:
|
353
|
+
return [{}]
|
354
|
+
|
355
|
+
for include in self.strategy.include:
|
356
|
+
if include.keys() != final[0].keys():
|
357
|
+
raise ValueError("Include should have the keys equal to matrix")
|
358
|
+
if any(all(include[k] == v for k, v in f.items()) for f in final):
|
359
|
+
continue
|
360
|
+
final.append(include)
|
361
|
+
return final
|
362
|
+
|
261
363
|
def execute(self, params: DictData | None = None) -> DictData:
|
262
364
|
"""Execute job with passing dynamic parameters from the pipeline."""
|
263
|
-
for
|
264
|
-
|
265
|
-
|
266
|
-
#
|
267
|
-
|
268
|
-
|
269
|
-
|
365
|
+
for strategy in self.make_strategy():
|
366
|
+
params.update({"matrix": strategy})
|
367
|
+
|
368
|
+
# IMPORTANT: The stage execution only run sequentially one-by-one.
|
369
|
+
for stage in self.stages:
|
370
|
+
logging.info(
|
371
|
+
f"[JOB]: Start execute the stage: "
|
372
|
+
f"{(stage.id if stage.id else stage.name)!r}"
|
373
|
+
)
|
374
|
+
|
375
|
+
# NOTE:
|
376
|
+
# I do not use below syntax because `params` dict be the
|
377
|
+
# reference memory pointer and it was changed when I action
|
378
|
+
# anything like update or re-construct this.
|
379
|
+
# ... params |= stage.execute(params=params)
|
380
|
+
stage.execute(params=params)
|
381
|
+
# TODO: We should not return matrix key to outside
|
270
382
|
return params
|
271
383
|
|
272
384
|
|
273
|
-
class BaseParams(BaseModel, ABC):
|
274
|
-
"""Base Parameter that use to make Params Model."""
|
275
|
-
|
276
|
-
desc: Optional[str] = None
|
277
|
-
required: bool = True
|
278
|
-
type: str
|
279
|
-
|
280
|
-
@abstractmethod
|
281
|
-
def receive(self, value: Optional[Any] = None) -> Any:
|
282
|
-
raise ValueError(
|
283
|
-
"Receive value and validate typing before return valid value."
|
284
|
-
)
|
285
|
-
|
286
|
-
|
287
|
-
class DefaultParams(BaseParams):
|
288
|
-
"""Default Parameter that will check default if it required"""
|
289
|
-
|
290
|
-
default: Optional[str] = None
|
291
|
-
|
292
|
-
@abstractmethod
|
293
|
-
def receive(self, value: Optional[Any] = None) -> Any:
|
294
|
-
raise ValueError(
|
295
|
-
"Receive value and validate typing before return valid value."
|
296
|
-
)
|
297
|
-
|
298
|
-
@model_validator(mode="after")
|
299
|
-
def check_default(self) -> Self:
|
300
|
-
if not self.required and self.default is None:
|
301
|
-
raise ValueError(
|
302
|
-
"Default should set when this parameter does not required."
|
303
|
-
)
|
304
|
-
return self
|
305
|
-
|
306
|
-
|
307
|
-
class DatetimeParams(DefaultParams):
|
308
|
-
"""Datetime parameter."""
|
309
|
-
|
310
|
-
type: Literal["datetime"] = "datetime"
|
311
|
-
required: bool = False
|
312
|
-
default: datetime = Field(default_factory=dt_now)
|
313
|
-
|
314
|
-
def receive(self, value: str | datetime | date | None = None) -> datetime:
|
315
|
-
if value is None:
|
316
|
-
return self.default
|
317
|
-
|
318
|
-
if isinstance(value, datetime):
|
319
|
-
return value
|
320
|
-
elif isinstance(value, date):
|
321
|
-
return datetime(value.year, value.month, value.day)
|
322
|
-
elif not isinstance(value, str):
|
323
|
-
raise ValueError(
|
324
|
-
f"Value that want to convert to datetime does not support for "
|
325
|
-
f"type: {type(value)}"
|
326
|
-
)
|
327
|
-
return datetime.fromisoformat(value)
|
328
|
-
|
329
|
-
|
330
|
-
class StrParams(DefaultParams):
|
331
|
-
"""String parameter."""
|
332
|
-
|
333
|
-
type: Literal["str"] = "str"
|
334
|
-
|
335
|
-
def receive(self, value: Optional[str] = None) -> str | None:
|
336
|
-
if value is None:
|
337
|
-
return self.default
|
338
|
-
return str(value)
|
339
|
-
|
340
|
-
|
341
|
-
class IntParams(DefaultParams):
|
342
|
-
"""Integer parameter."""
|
343
|
-
|
344
|
-
type: Literal["int"] = "int"
|
345
|
-
|
346
|
-
def receive(self, value: Optional[int] = None) -> int | None:
|
347
|
-
if value is None:
|
348
|
-
return self.default
|
349
|
-
if not isinstance(value, int):
|
350
|
-
try:
|
351
|
-
return int(str(value))
|
352
|
-
except TypeError as err:
|
353
|
-
raise ValueError(
|
354
|
-
f"Value that want to convert to integer does not support "
|
355
|
-
f"for type: {type(value)}"
|
356
|
-
) from err
|
357
|
-
return value
|
358
|
-
|
359
|
-
|
360
|
-
class ChoiceParams(BaseParams):
|
361
|
-
type: Literal["choice"] = "choice"
|
362
|
-
options: list[str]
|
363
|
-
|
364
|
-
def receive(self, value: Optional[str] = None) -> str:
|
365
|
-
"""Receive value that match with options."""
|
366
|
-
# NOTE:
|
367
|
-
# Return the first value in options if does not pass any input value
|
368
|
-
if value is None:
|
369
|
-
return self.options[0]
|
370
|
-
if any(value not in self.options):
|
371
|
-
raise ValueError(f"{value} does not match any value in options")
|
372
|
-
return value
|
373
|
-
|
374
|
-
|
375
|
-
Params = Union[
|
376
|
-
ChoiceParams,
|
377
|
-
DatetimeParams,
|
378
|
-
StrParams,
|
379
|
-
]
|
380
|
-
|
381
|
-
|
382
385
|
class Pipeline(BaseModel):
|
383
|
-
"""Pipeline Model
|
386
|
+
"""Pipeline Model this is the main feature of this project because it use to
|
387
|
+
be workflow data for running everywhere that you want. It use lightweight
|
388
|
+
coding line to execute it.
|
389
|
+
"""
|
384
390
|
|
385
391
|
params: dict[str, Params] = Field(default_factory=dict)
|
386
392
|
jobs: dict[str, Job]
|
387
393
|
|
394
|
+
@model_validator(mode="before")
|
395
|
+
def __prepare_params(cls, values: DictData) -> DictData:
|
396
|
+
if params := values.pop("params", {}):
|
397
|
+
values["params"] = {
|
398
|
+
p: (
|
399
|
+
{"type": params[p]}
|
400
|
+
if isinstance(params[p], str)
|
401
|
+
else params[p]
|
402
|
+
)
|
403
|
+
for p in params
|
404
|
+
}
|
405
|
+
return values
|
406
|
+
|
388
407
|
@classmethod
|
389
408
|
def from_loader(
|
390
409
|
cls,
|
@@ -399,6 +418,10 @@ class Pipeline(BaseModel):
|
|
399
418
|
params=loader.data["params"],
|
400
419
|
)
|
401
420
|
|
421
|
+
@model_validator(mode="after")
|
422
|
+
def job_checking_needs(self):
|
423
|
+
return self
|
424
|
+
|
402
425
|
def job(self, name: str) -> Job:
|
403
426
|
"""Return Job model that exists on this pipeline.
|
404
427
|
|
@@ -406,13 +429,23 @@ class Pipeline(BaseModel):
|
|
406
429
|
:type name: str
|
407
430
|
|
408
431
|
:rtype: Job
|
432
|
+
:returns: A job model that exists on this pipeline by input name.
|
409
433
|
"""
|
410
434
|
if name not in self.jobs:
|
411
|
-
raise ValueError(f"Job {name} does not exists")
|
435
|
+
raise ValueError(f"Job {name!r} does not exists")
|
412
436
|
return self.jobs[name]
|
413
437
|
|
414
|
-
def execute(
|
415
|
-
|
438
|
+
def execute(
|
439
|
+
self,
|
440
|
+
params: DictData | None = None,
|
441
|
+
time_out: int = 60,
|
442
|
+
) -> DictData:
|
443
|
+
"""Execute pipeline with passing dynamic parameters to any jobs that
|
444
|
+
included in the pipeline.
|
445
|
+
|
446
|
+
:param params: An input parameters that use on pipeline execution.
|
447
|
+
:param time_out: A time out second value for limit time of this
|
448
|
+
execution.
|
416
449
|
|
417
450
|
See Also:
|
418
451
|
|
@@ -427,8 +460,7 @@ class Pipeline(BaseModel):
|
|
427
460
|
|
428
461
|
"""
|
429
462
|
params: DictData = params or {}
|
430
|
-
check_key
|
431
|
-
if check_key:
|
463
|
+
if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
|
432
464
|
raise ValueError(
|
433
465
|
f"Parameters that needed on pipeline does not pass: "
|
434
466
|
f"{', '.join(check_key)}."
|
@@ -445,12 +477,41 @@ class Pipeline(BaseModel):
|
|
445
477
|
for k in params
|
446
478
|
if k in self.params
|
447
479
|
}
|
448
|
-
)
|
480
|
+
),
|
481
|
+
"jobs": {},
|
449
482
|
}
|
483
|
+
|
484
|
+
jq = Queue()
|
450
485
|
for job_id in self.jobs:
|
451
|
-
|
486
|
+
jq.put(job_id)
|
487
|
+
|
488
|
+
ts: float = time.monotonic()
|
489
|
+
not_time_out_flag = True
|
490
|
+
|
491
|
+
# IMPORTANT: The job execution can run parallel and waiting by needed.
|
492
|
+
while not jq.empty() and (
|
493
|
+
not_time_out_flag := ((time.monotonic() - ts) < time_out)
|
494
|
+
):
|
495
|
+
job_id: str = jq.get()
|
496
|
+
logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
|
452
497
|
job: Job = self.jobs[job_id]
|
498
|
+
|
453
499
|
# TODO: Condition on ``needs`` of this job was set. It should create
|
454
500
|
# multithreading process on this step.
|
501
|
+
# But, I don't know how to handle changes params between each job
|
502
|
+
# execution while its use them together.
|
503
|
+
# ---
|
504
|
+
# >>> import multiprocessing
|
505
|
+
# >>> with multiprocessing.Pool(processes=3) as pool:
|
506
|
+
# ... results = pool.starmap(merge_names, ('', '', ...))
|
507
|
+
#
|
508
|
+
if any(params["jobs"].get(need) for need in job.needs):
|
509
|
+
jq.put(job_id)
|
455
510
|
job.execute(params=params)
|
511
|
+
params["jobs"][job_id] = {
|
512
|
+
"stages": params.pop("stages", {}),
|
513
|
+
"matrix": params.pop("matrix", {}),
|
514
|
+
}
|
515
|
+
if not not_time_out_flag:
|
516
|
+
raise RuntimeError("Execution of pipeline was time out")
|
456
517
|
return params
|
ddeutil/workflow/schedule.py
CHANGED
@@ -18,8 +18,8 @@ from .__types import DictData
|
|
18
18
|
from .loader import Loader
|
19
19
|
|
20
20
|
|
21
|
-
class
|
22
|
-
"""Base
|
21
|
+
class BaseSchedule(BaseModel):
|
22
|
+
"""Base Schedule (Schedule) Model"""
|
23
23
|
|
24
24
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
25
25
|
|
@@ -61,16 +61,16 @@ class BaseScdl(BaseModel):
|
|
61
61
|
return self.cronjob.schedule(date=(start.astimezone(ZoneInfo(self.tz))))
|
62
62
|
|
63
63
|
|
64
|
-
class
|
65
|
-
"""
|
64
|
+
class Schedule(BaseSchedule):
|
65
|
+
"""Schedule (Schedule) Model.
|
66
66
|
|
67
67
|
See Also:
|
68
68
|
* ``generate()`` is the main usecase of this schedule object.
|
69
69
|
"""
|
70
70
|
|
71
71
|
|
72
|
-
class
|
73
|
-
"""Asia Bangkok
|
72
|
+
class ScheduleBkk(Schedule):
|
73
|
+
"""Asia Bangkok Schedule (Schedule) timezone Model.
|
74
74
|
|
75
75
|
This model use for change timezone from utc to Asia/Bangkok
|
76
76
|
"""
|
@@ -78,5 +78,5 @@ class ScdlBkk(Scdl):
|
|
78
78
|
tz: Annotated[str, Field(description="Timezone")] = "Asia/Bangkok"
|
79
79
|
|
80
80
|
|
81
|
-
class
|
81
|
+
class AwsSchedule(BaseSchedule):
|
82
82
|
"""Implement Schedule for AWS Service."""
|
@@ -4,7 +4,7 @@ import math
|
|
4
4
|
try:
|
5
5
|
import pandas as pd
|
6
6
|
|
7
|
-
logging.debug(f"
|
7
|
+
logging.debug(f"Pandas version: {pd.__version__}")
|
8
8
|
except ImportError as err:
|
9
9
|
raise ImportError(
|
10
10
|
"``split_iterable`` function want to use pandas package that does"
|
@@ -5,13 +5,21 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import logging
|
8
9
|
from typing import Any
|
9
10
|
from uuid import uuid4
|
10
11
|
|
11
|
-
|
12
|
+
try:
|
13
|
+
import polars as pl
|
14
|
+
|
15
|
+
logging.debug(f"Polars version: {pl.__version__}")
|
16
|
+
except ImportError:
|
17
|
+
raise ImportError(
|
18
|
+
"Please install polars if you want to use any relate task"
|
19
|
+
) from None
|
12
20
|
import pyarrow.parquet as pq
|
13
|
-
from ddeutil.workflow.dataset import PolarsCsv, PolarsParq
|
14
21
|
from ddeutil.workflow.utils import tag
|
22
|
+
from ddeutil.workflow.vendors.pl import PolarsCsv, PolarsParq
|
15
23
|
|
16
24
|
|
17
25
|
def polars_dtype():
|