ddeutil-workflow 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__types.py +1 -0
- ddeutil/workflow/conn.py +33 -28
- ddeutil/workflow/exceptions.py +0 -70
- ddeutil/workflow/loader.py +55 -191
- ddeutil/workflow/pipeline.py +264 -110
- ddeutil/workflow/schedule.py +10 -15
- ddeutil/workflow/tasks/__init__.py +6 -10
- ddeutil/workflow/tasks/_pandas.py +54 -0
- ddeutil/workflow/tasks/_polars.py +55 -4
- ddeutil/workflow/utils.py +180 -0
- ddeutil/workflow/vendors/__dataset.py +127 -0
- ddeutil/workflow/vendors/pd.py +13 -0
- ddeutil/workflow/vendors/pg.py +11 -0
- ddeutil/workflow/{dataset.py → vendors/pl.py} +4 -138
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/METADATA +35 -20
- ddeutil_workflow-0.0.3.dist-info/RECORD +29 -0
- ddeutil/workflow/hooks/__init__.py +0 -9
- ddeutil/workflow/hooks/_postgres.py +0 -2
- ddeutil/workflow/utils/receive.py +0 -33
- ddeutil/workflow/utils/selection.py +0 -2
- ddeutil_workflow-0.0.1.dist-info/RECORD +0 -28
- /ddeutil/workflow/vendors/{aws_warpped.py → aws.py} +0 -0
- /ddeutil/workflow/{utils/__init__.py → vendors/az.py} +0 -0
- /ddeutil/workflow/vendors/{minio_warpped.py → minio.py} +0 -0
- /ddeutil/workflow/vendors/{sftp_wrapped.py → sftp.py} +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/WHEEL +0 -0
- {ddeutil_workflow-0.0.1.dist-info → ddeutil_workflow-0.0.3.dist-info}/top_level.txt +0 -0
ddeutil/workflow/pipeline.py
CHANGED
@@ -6,53 +6,83 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import inspect
|
9
|
+
import itertools
|
10
|
+
import logging
|
9
11
|
import subprocess
|
12
|
+
import time
|
13
|
+
from abc import ABC, abstractmethod
|
10
14
|
from inspect import Parameter
|
15
|
+
from queue import Queue
|
11
16
|
from subprocess import CompletedProcess
|
12
17
|
from typing import Any, Callable, Optional, Union
|
13
18
|
|
19
|
+
import msgspec as spec
|
14
20
|
from pydantic import BaseModel, Field
|
21
|
+
from pydantic.functional_validators import model_validator
|
15
22
|
from typing_extensions import Self
|
16
23
|
|
17
24
|
from .__regex import RegexConf
|
18
|
-
from .__types import DictData
|
19
|
-
from .exceptions import
|
20
|
-
from .loader import Loader,
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
from .__types import DictData, DictStr
|
26
|
+
from .exceptions import TaskException
|
27
|
+
from .loader import Loader, map_params
|
28
|
+
from .utils import Params, make_registry
|
29
|
+
|
30
|
+
|
31
|
+
class BaseStage(BaseModel, ABC):
|
32
|
+
"""Base Stage Model that keep only id and name fields."""
|
33
|
+
|
34
|
+
id: Optional[str] = Field(
|
35
|
+
default=None,
|
36
|
+
description=(
|
37
|
+
"The stage ID that use to keep execution output or getting by job "
|
38
|
+
"owner."
|
39
|
+
),
|
40
|
+
)
|
41
|
+
name: str = Field(
|
42
|
+
description="The stage name that want to logging when start execution."
|
43
|
+
)
|
44
|
+
|
45
|
+
@abstractmethod
|
46
|
+
def execute(self, params: DictData) -> DictData:
|
47
|
+
"""Execute abstraction method that action something by sub-model class.
|
48
|
+
|
49
|
+
:param params: A parameter data that want to use in this execution.
|
50
|
+
"""
|
51
|
+
raise NotImplementedError("Stage should implement ``execute`` method.")
|
25
52
|
|
26
|
-
|
53
|
+
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
54
|
+
"""Set an outputs from execution process to an input params."""
|
55
|
+
if self.id is None:
|
56
|
+
return params
|
27
57
|
|
58
|
+
if "stages" not in params:
|
59
|
+
params["stages"] = {}
|
28
60
|
|
29
|
-
|
61
|
+
params["stages"][self.id] = {"outputs": rs}
|
62
|
+
return params
|
30
63
|
|
31
64
|
|
32
|
-
class EmptyStage(
|
65
|
+
class EmptyStage(BaseStage):
|
33
66
|
"""Empty stage that is doing nothing and logging the name of stage only."""
|
34
67
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def execute(self, params: dict[str, Any]) -> dict[str, Any]:
|
68
|
+
def execute(self, params: DictData) -> DictData:
|
69
|
+
"""Execute for the Empty stage that do only logging out."""
|
70
|
+
logging.info(f"Execute: {self.name!r}")
|
39
71
|
return params
|
40
72
|
|
41
73
|
|
42
|
-
class ShellStage(
|
74
|
+
class ShellStage(BaseStage):
|
43
75
|
"""Shell statement stage."""
|
44
76
|
|
45
77
|
shell: str
|
46
|
-
env:
|
78
|
+
env: DictStr = Field(default_factory=dict)
|
47
79
|
|
48
80
|
@staticmethod
|
49
81
|
def __prepare_shell(shell: str):
|
50
82
|
"""Prepare shell statement string that include newline"""
|
51
83
|
return shell.replace("\n", ";")
|
52
84
|
|
53
|
-
def set_outputs(
|
54
|
-
self, rs: CompletedProcess, params: dict[str, Any]
|
55
|
-
) -> dict[str, Any]:
|
85
|
+
def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
|
56
86
|
"""Set outputs to params"""
|
57
87
|
# NOTE: skipping set outputs of stage execution when id does not set.
|
58
88
|
if self.id is None:
|
@@ -71,7 +101,7 @@ class ShellStage(EmptyStage):
|
|
71
101
|
}
|
72
102
|
return params
|
73
103
|
|
74
|
-
def execute(self, params:
|
104
|
+
def execute(self, params: DictData) -> DictData:
|
75
105
|
"""Execute the Shell & Powershell statement with the Python build-in
|
76
106
|
``subprocess`` package.
|
77
107
|
"""
|
@@ -84,7 +114,7 @@ class ShellStage(EmptyStage):
|
|
84
114
|
if rs.returncode > 0:
|
85
115
|
print(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
|
86
116
|
# FIXME: raise err for this execution.
|
87
|
-
# raise
|
117
|
+
# raise TaskException(
|
88
118
|
# f"{rs.stderr}\nRunning Statement:\n---\n"
|
89
119
|
# f"{self.shell}"
|
90
120
|
# )
|
@@ -92,24 +122,22 @@ class ShellStage(EmptyStage):
|
|
92
122
|
return params
|
93
123
|
|
94
124
|
|
95
|
-
class PyStage(
|
125
|
+
class PyStage(BaseStage):
|
96
126
|
"""Python executor stage that running the Python statement that receive
|
97
127
|
globals nad additional variables.
|
98
128
|
"""
|
99
129
|
|
100
130
|
run: str
|
101
|
-
vars:
|
131
|
+
vars: DictData = Field(default_factory=dict)
|
102
132
|
|
103
|
-
def
|
133
|
+
def get_vars(self, params: DictData) -> DictData:
|
104
134
|
"""Return variables"""
|
105
135
|
rs = self.vars.copy()
|
106
136
|
for p, v in self.vars.items():
|
107
|
-
rs[p] =
|
137
|
+
rs[p] = map_params(v, params)
|
108
138
|
return rs
|
109
139
|
|
110
|
-
def set_outputs(
|
111
|
-
self, lc: dict[str, Any], params: dict[str, Any]
|
112
|
-
) -> dict[str, Any]:
|
140
|
+
def set_outputs(self, rs: DictData, params: DictData) -> DictData:
|
113
141
|
"""Set outputs to params"""
|
114
142
|
# NOTE: skipping set outputs of stage execution when id does not set.
|
115
143
|
if self.id is None:
|
@@ -120,27 +148,27 @@ class PyStage(EmptyStage):
|
|
120
148
|
|
121
149
|
params["stages"][self.id] = {
|
122
150
|
# NOTE: The output will fileter unnecessary keys from ``_locals``.
|
123
|
-
"outputs": {k:
|
151
|
+
"outputs": {k: rs[k] for k in rs if k != "__annotations__"},
|
124
152
|
}
|
125
153
|
return params
|
126
154
|
|
127
|
-
def execute(self, params:
|
155
|
+
def execute(self, params: DictData) -> DictData:
|
128
156
|
"""Execute the Python statement that pass all globals and input params
|
129
157
|
to globals argument on ``exec`` build-in function.
|
130
158
|
|
131
159
|
:param params: A parameter that want to pass before run any statement.
|
132
|
-
:type params:
|
160
|
+
:type params: DictData
|
133
161
|
|
134
|
-
:rtype:
|
162
|
+
:rtype: DictData
|
135
163
|
:returns: A parameters from an input that was mapped output if the stage
|
136
164
|
ID was set.
|
137
165
|
"""
|
138
|
-
_globals:
|
139
|
-
_locals:
|
166
|
+
_globals: DictData = globals() | params | self.get_vars(params)
|
167
|
+
_locals: DictData = {}
|
140
168
|
try:
|
141
|
-
exec(
|
169
|
+
exec(map_params(self.run, params), _globals, _locals)
|
142
170
|
except Exception as err:
|
143
|
-
raise
|
171
|
+
raise TaskException(
|
144
172
|
f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
|
145
173
|
f"{self.run}"
|
146
174
|
) from None
|
@@ -150,44 +178,40 @@ class PyStage(EmptyStage):
|
|
150
178
|
return params | {k: _globals[k] for k in params if k in _globals}
|
151
179
|
|
152
180
|
|
153
|
-
class TaskSearch(
|
181
|
+
class TaskSearch(spec.Struct, kw_only=True, tag="task"):
|
182
|
+
"""Task Search Struct that use the `msgspec` for the best performance."""
|
183
|
+
|
154
184
|
path: str
|
155
185
|
func: str
|
156
186
|
tag: str
|
157
187
|
|
188
|
+
def to_dict(self) -> DictData:
|
189
|
+
"""Return dict data from struct fields."""
|
190
|
+
return {f: getattr(self, f) for f in self.__struct_fields__}
|
191
|
+
|
192
|
+
|
193
|
+
class TaskStage(BaseStage):
|
194
|
+
"""Task executor stage that running the Python function."""
|
158
195
|
|
159
|
-
class TaskStage(EmptyStage):
|
160
196
|
task: str
|
161
|
-
args:
|
197
|
+
args: DictData
|
162
198
|
|
163
199
|
@staticmethod
|
164
200
|
def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
|
165
201
|
"""Extract Task string value to task function."""
|
166
202
|
if not (found := RegexConf.RE_TASK_FMT.search(task)):
|
167
203
|
raise ValueError("Task does not match with task format regex.")
|
168
|
-
tasks = TaskSearch(**found.groupdict())
|
169
|
-
|
170
|
-
from ddeutil.core import import_string
|
171
|
-
|
172
|
-
try:
|
173
|
-
rgt = import_string(f"ddeutil.workflow.{tasks.path}.registries")
|
174
|
-
if tasks.func not in rgt:
|
175
|
-
raise NotImplementedError(
|
176
|
-
f"ddeutil.workflow.{tasks.path}.registries does not "
|
177
|
-
f"implement registry: {tasks.func}."
|
178
|
-
)
|
179
|
-
except ImportError:
|
204
|
+
tasks: TaskSearch = TaskSearch(**found.groupdict())
|
180
205
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
) from None
|
206
|
+
# NOTE: Registry object should implement on this package only.
|
207
|
+
# TODO: This prefix value to search registry should dynamic with
|
208
|
+
# config file.
|
209
|
+
rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
|
210
|
+
if tasks.func not in rgt:
|
211
|
+
raise NotImplementedError(
|
212
|
+
f"ddeutil.workflow.{tasks.path}.registries does not "
|
213
|
+
f"implement registry: {tasks.func}."
|
214
|
+
)
|
191
215
|
|
192
216
|
if tasks.tag not in rgt[tasks.func]:
|
193
217
|
raise NotImplementedError(
|
@@ -197,7 +221,7 @@ class TaskStage(EmptyStage):
|
|
197
221
|
)
|
198
222
|
return rgt[tasks.func][tasks.tag]
|
199
223
|
|
200
|
-
def execute(self, params:
|
224
|
+
def execute(self, params: DictData) -> DictData:
|
201
225
|
"""Execute the Task function."""
|
202
226
|
task_caller = self.extract_task(self.task)()
|
203
227
|
if not callable(task_caller):
|
@@ -215,17 +239,11 @@ class TaskStage(EmptyStage):
|
|
215
239
|
f"does not set to args"
|
216
240
|
)
|
217
241
|
try:
|
218
|
-
rs = task_caller(**self.args)
|
242
|
+
rs = task_caller(**map_params(self.args, params))
|
219
243
|
except Exception as err:
|
220
244
|
raise TaskException(f"{err.__class__.__name__}: {err}") from err
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
class HookStage(EmptyStage):
|
225
|
-
hook: str
|
226
|
-
args: dict[str, Any]
|
227
|
-
|
228
|
-
def execute(self, params: dict[str, Any]) -> dict[str, Any]: ...
|
245
|
+
self.set_outputs(rs, params)
|
246
|
+
return params
|
229
247
|
|
230
248
|
|
231
249
|
# NOTE: Order of parsing stage data
|
@@ -233,72 +251,178 @@ Stage = Union[
|
|
233
251
|
PyStage,
|
234
252
|
ShellStage,
|
235
253
|
TaskStage,
|
236
|
-
HookStage,
|
237
254
|
EmptyStage,
|
238
255
|
]
|
239
256
|
|
240
257
|
|
258
|
+
class Strategy(BaseModel):
|
259
|
+
"""Strategy Model that will combine a matrix together for running the
|
260
|
+
special job.
|
261
|
+
|
262
|
+
Examples:
|
263
|
+
>>> strategy = {
|
264
|
+
... 'matrix': {
|
265
|
+
... 'first': [1, 2, 3],
|
266
|
+
... 'second': ['foo', 'bar']
|
267
|
+
... },
|
268
|
+
... 'include': [{'first': 4, 'second': 'foo'}],
|
269
|
+
... 'exclude': [{'first': 1, 'second': 'bar'}],
|
270
|
+
... }
|
271
|
+
"""
|
272
|
+
|
273
|
+
fail_fast: bool = Field(default=False)
|
274
|
+
max_parallel: int = Field(default=-1)
|
275
|
+
matrix: dict[str, Union[list[str], list[int]]] = Field(default_factory=dict)
|
276
|
+
include: list[dict[str, Union[str, int]]] = Field(default_factory=list)
|
277
|
+
exclude: list[dict[str, Union[str, int]]] = Field(default_factory=list)
|
278
|
+
|
279
|
+
@model_validator(mode="before")
|
280
|
+
def __prepare_keys(cls, values: DictData) -> DictData:
|
281
|
+
if "max-parallel" in values:
|
282
|
+
values["max_parallel"] = values.pop("max-parallel")
|
283
|
+
if "fail-fast" in values:
|
284
|
+
values["fail_fast"] = values.pop("fail-fast")
|
285
|
+
return values
|
286
|
+
|
287
|
+
|
241
288
|
class Job(BaseModel):
|
289
|
+
"""Job Model"""
|
290
|
+
|
291
|
+
runs_on: Optional[str] = Field(default=None)
|
242
292
|
stages: list[Stage] = Field(default_factory=list)
|
243
293
|
needs: list[str] = Field(default_factory=list)
|
294
|
+
strategy: Strategy = Field(default_factory=Strategy)
|
295
|
+
|
296
|
+
@model_validator(mode="before")
|
297
|
+
def __prepare_keys(cls, values: DictData) -> DictData:
|
298
|
+
if "runs-on" in values:
|
299
|
+
values["runs_on"] = values.pop("runs-on")
|
300
|
+
return values
|
244
301
|
|
245
302
|
def stage(self, stage_id: str) -> Stage:
|
303
|
+
"""Return stage model that match with an input stage ID."""
|
246
304
|
for stage in self.stages:
|
247
305
|
if stage_id == (stage.id or ""):
|
248
306
|
return stage
|
249
307
|
raise ValueError(f"Stage ID {stage_id} does not exists")
|
250
308
|
|
251
|
-
def
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
309
|
+
def make_strategy(self) -> list[DictStr]:
|
310
|
+
"""Return List of combination of matrix values that already filter with
|
311
|
+
exclude and add include values.
|
312
|
+
"""
|
313
|
+
if not (mt := self.strategy.matrix):
|
314
|
+
return [{}]
|
315
|
+
final: list[DictStr] = []
|
316
|
+
for r in [
|
317
|
+
{_k: _v for e in mapped for _k, _v in e.items()}
|
318
|
+
for mapped in itertools.product(
|
319
|
+
*[[{k: v} for v in vs] for k, vs in mt.items()]
|
320
|
+
)
|
321
|
+
]:
|
322
|
+
if any(
|
323
|
+
all(r[k] == v for k, v in exclude.items())
|
324
|
+
for exclude in self.strategy.exclude
|
325
|
+
):
|
326
|
+
continue
|
327
|
+
final.append(r)
|
328
|
+
|
329
|
+
if not final:
|
330
|
+
return [{}]
|
331
|
+
|
332
|
+
for include in self.strategy.include:
|
333
|
+
if include.keys() != final[0].keys():
|
334
|
+
raise ValueError("Include should have the keys equal to matrix")
|
335
|
+
if any(all(include[k] == v for k, v in f.items()) for f in final):
|
336
|
+
continue
|
337
|
+
final.append(include)
|
338
|
+
return final
|
339
|
+
|
340
|
+
def execute(self, params: DictData | None = None) -> DictData:
|
341
|
+
"""Execute job with passing dynamic parameters from the pipeline."""
|
342
|
+
for strategy in self.make_strategy():
|
343
|
+
params.update({"matrix": strategy})
|
344
|
+
|
345
|
+
# IMPORTANT: The stage execution only run sequentially one-by-one.
|
346
|
+
for stage in self.stages:
|
347
|
+
logging.info(
|
348
|
+
f"[JOB]: Start execute the stage: "
|
349
|
+
f"{(stage.id if stage.id else stage.name)!r}"
|
350
|
+
)
|
270
351
|
|
271
|
-
|
352
|
+
# NOTE:
|
353
|
+
# I do not use below syntax because `params` dict be the
|
354
|
+
# reference memory pointer and it was changed when I action
|
355
|
+
# anything like update or re-construct this.
|
356
|
+
# ... params |= stage.execute(params=params)
|
357
|
+
stage.execute(params=params)
|
358
|
+
# TODO: We should not return matrix key to outside
|
359
|
+
return params
|
272
360
|
|
273
361
|
|
274
362
|
class Pipeline(BaseModel):
|
275
|
-
"""Pipeline Model
|
363
|
+
"""Pipeline Model this is the main feature of this project because it use to
|
364
|
+
be workflow data for running everywhere that you want. It use lightweight
|
365
|
+
coding line to execute it.
|
366
|
+
"""
|
276
367
|
|
277
|
-
params: dict[str,
|
368
|
+
params: dict[str, Params] = Field(default_factory=dict)
|
278
369
|
jobs: dict[str, Job]
|
279
370
|
|
371
|
+
@model_validator(mode="before")
|
372
|
+
def __prepare_params(cls, values: DictData) -> DictData:
|
373
|
+
if params := values.pop("params", {}):
|
374
|
+
values["params"] = {
|
375
|
+
p: (
|
376
|
+
{"type": params[p]}
|
377
|
+
if isinstance(params[p], str)
|
378
|
+
else params[p]
|
379
|
+
)
|
380
|
+
for p in params
|
381
|
+
}
|
382
|
+
return values
|
383
|
+
|
280
384
|
@classmethod
|
281
385
|
def from_loader(
|
282
386
|
cls,
|
283
387
|
name: str,
|
284
|
-
externals: DictData,
|
388
|
+
externals: Optional[DictData] = None,
|
285
389
|
) -> Self:
|
286
|
-
loader: Loader = Loader(name, externals=externals)
|
390
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
287
391
|
if "jobs" not in loader.data:
|
288
|
-
raise
|
392
|
+
raise ValueError("Config does not set ``jobs`` value")
|
289
393
|
return cls(
|
290
394
|
jobs=loader.data["jobs"],
|
291
|
-
params=loader.params
|
395
|
+
params=loader.data["params"],
|
292
396
|
)
|
293
397
|
|
398
|
+
@model_validator(mode="after")
|
399
|
+
def job_checking_needs(self):
|
400
|
+
return self
|
401
|
+
|
294
402
|
def job(self, name: str) -> Job:
|
295
|
-
"""Return Job model that exists on this pipeline.
|
403
|
+
"""Return Job model that exists on this pipeline.
|
404
|
+
|
405
|
+
:param name: A job name that want to get from a mapping of job models.
|
406
|
+
:type name: str
|
407
|
+
|
408
|
+
:rtype: Job
|
409
|
+
:returns: A job model that exists on this pipeline by input name.
|
410
|
+
"""
|
296
411
|
if name not in self.jobs:
|
297
|
-
raise ValueError(f"Job {name} does not exists")
|
412
|
+
raise ValueError(f"Job {name!r} does not exists")
|
298
413
|
return self.jobs[name]
|
299
414
|
|
300
|
-
def execute(
|
301
|
-
|
415
|
+
def execute(
|
416
|
+
self,
|
417
|
+
params: DictData | None = None,
|
418
|
+
time_out: int = 60,
|
419
|
+
) -> DictData:
|
420
|
+
"""Execute pipeline with passing dynamic parameters to any jobs that
|
421
|
+
included in the pipeline.
|
422
|
+
|
423
|
+
:param params: An input parameters that use on pipeline execution.
|
424
|
+
:param time_out: A time out second value for limit time of this
|
425
|
+
execution.
|
302
426
|
|
303
427
|
See Also:
|
304
428
|
|
@@ -312,27 +436,57 @@ class Pipeline(BaseModel):
|
|
312
436
|
... "<job-name>.stages.<stage-id>.outputs.<key>"
|
313
437
|
|
314
438
|
"""
|
315
|
-
params:
|
316
|
-
check_key
|
317
|
-
if check_key:
|
439
|
+
params: DictData = params or {}
|
440
|
+
if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
|
318
441
|
raise ValueError(
|
319
442
|
f"Parameters that needed on pipeline does not pass: "
|
320
443
|
f"{', '.join(check_key)}."
|
321
444
|
)
|
322
|
-
|
445
|
+
|
446
|
+
if any(p not in params for p in self.params if self.params[p].required):
|
447
|
+
raise ValueError("Required parameter does not pass")
|
448
|
+
|
449
|
+
params: DictData = {
|
323
450
|
"params": (
|
324
451
|
params
|
325
452
|
| {
|
326
|
-
k: self.params[k](params[k])
|
453
|
+
k: self.params[k].receive(params[k])
|
327
454
|
for k in params
|
328
455
|
if k in self.params
|
329
456
|
}
|
330
|
-
)
|
457
|
+
),
|
458
|
+
"jobs": {},
|
331
459
|
}
|
460
|
+
|
461
|
+
jq = Queue()
|
332
462
|
for job_id in self.jobs:
|
333
|
-
|
334
|
-
|
463
|
+
jq.put(job_id)
|
464
|
+
|
465
|
+
ts: float = time.monotonic()
|
466
|
+
not_time_out_flag = True
|
467
|
+
|
468
|
+
# IMPORTANT: The job execution can run parallel and waiting by needed.
|
469
|
+
while not jq.empty() and (
|
470
|
+
not_time_out_flag := ((time.monotonic() - ts) < time_out)
|
471
|
+
):
|
472
|
+
job_id: str = jq.get()
|
473
|
+
logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
|
474
|
+
job: Job = self.jobs[job_id]
|
335
475
|
# TODO: Condition on ``needs`` of this job was set. It should create
|
336
476
|
# multithreading process on this step.
|
477
|
+
# But, I don't know how to handle changes params between each job
|
478
|
+
# execution while its use them together.
|
479
|
+
# ---
|
480
|
+
# >>> import multiprocessing
|
481
|
+
# >>> with multiprocessing.Pool(processes=3) as pool:
|
482
|
+
# ... results = pool.starmap(merge_names, ('', '', ...))
|
483
|
+
if any(params["jobs"].get(need) for need in job.needs):
|
484
|
+
jq.put(job_id)
|
337
485
|
job.execute(params=params)
|
486
|
+
params["jobs"][job_id] = {
|
487
|
+
"stages": params.pop("stages", {}),
|
488
|
+
"matrix": params.pop("matrix", {}),
|
489
|
+
}
|
490
|
+
if not not_time_out_flag:
|
491
|
+
raise RuntimeError("Execution of pipeline was time out")
|
338
492
|
return params
|
ddeutil/workflow/schedule.py
CHANGED
@@ -9,19 +9,17 @@ from datetime import datetime
|
|
9
9
|
from typing import Annotated
|
10
10
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
11
11
|
|
12
|
-
from ddeutil.io import Params
|
13
12
|
from ddeutil.workflow.vendors.__schedule import CronJob, CronRunner
|
14
13
|
from pydantic import BaseModel, ConfigDict, Field
|
15
14
|
from pydantic.functional_validators import field_validator
|
16
15
|
from typing_extensions import Self
|
17
16
|
|
18
17
|
from .__types import DictData
|
19
|
-
from .
|
20
|
-
from .loader import SimLoad
|
18
|
+
from .loader import Loader
|
21
19
|
|
22
20
|
|
23
|
-
class
|
24
|
-
"""Base
|
21
|
+
class BaseSchedule(BaseModel):
|
22
|
+
"""Base Schedule (Schedule) Model"""
|
25
23
|
|
26
24
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
27
25
|
|
@@ -37,14 +35,11 @@ class BaseScdl(BaseModel):
|
|
37
35
|
def from_loader(
|
38
36
|
cls,
|
39
37
|
name: str,
|
40
|
-
params: Params,
|
41
38
|
externals: DictData,
|
42
39
|
) -> Self:
|
43
|
-
loader:
|
40
|
+
loader: Loader = Loader(name, externals=externals)
|
44
41
|
if "cronjob" not in loader.data:
|
45
|
-
raise
|
46
|
-
"cronjob", "Config does not set ``cronjob``"
|
47
|
-
)
|
42
|
+
raise ValueError("Config does not set ``cronjob`` value")
|
48
43
|
return cls(cronjob=loader.data["cronjob"], extras=externals)
|
49
44
|
|
50
45
|
@field_validator("tz")
|
@@ -66,16 +61,16 @@ class BaseScdl(BaseModel):
|
|
66
61
|
return self.cronjob.schedule(date=(start.astimezone(ZoneInfo(self.tz))))
|
67
62
|
|
68
63
|
|
69
|
-
class
|
70
|
-
"""
|
64
|
+
class Schedule(BaseSchedule):
|
65
|
+
"""Schedule (Schedule) Model.
|
71
66
|
|
72
67
|
See Also:
|
73
68
|
* ``generate()`` is the main usecase of this schedule object.
|
74
69
|
"""
|
75
70
|
|
76
71
|
|
77
|
-
class
|
78
|
-
"""Asia Bangkok
|
72
|
+
class ScheduleBkk(Schedule):
|
73
|
+
"""Asia Bangkok Schedule (Schedule) timezone Model.
|
79
74
|
|
80
75
|
This model use for change timezone from utc to Asia/Bangkok
|
81
76
|
"""
|
@@ -83,5 +78,5 @@ class ScdlBkk(Scdl):
|
|
83
78
|
tz: Annotated[str, Field(description="Timezone")] = "Asia/Bangkok"
|
84
79
|
|
85
80
|
|
86
|
-
class
|
81
|
+
class AwsSchedule(BaseSchedule):
|
87
82
|
"""Implement Schedule for AWS Service."""
|
@@ -1,10 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
"polars": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet"),
|
8
|
-
"polars-dir": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet_dir"),
|
9
|
-
},
|
10
|
-
}
|
1
|
+
# ------------------------------------------------------------------------------
|
2
|
+
# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
|
3
|
+
# Licensed under the MIT License. See LICENSE in the project root for
|
4
|
+
# license information.
|
5
|
+
# ------------------------------------------------------------------------------
|
6
|
+
from ._polars import *
|