ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +51 -0
- ddeutil/workflow/cron.py +713 -0
- ddeutil/workflow/exceptions.py +1 -4
- ddeutil/workflow/loader.py +65 -13
- ddeutil/workflow/log.py +164 -17
- ddeutil/workflow/on.py +18 -15
- ddeutil/workflow/pipeline.py +644 -235
- ddeutil/workflow/repeat.py +9 -5
- ddeutil/workflow/route.py +30 -37
- ddeutil/workflow/scheduler.py +398 -659
- ddeutil/workflow/stage.py +269 -103
- ddeutil/workflow/utils.py +198 -29
- ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
- ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -41
- ddeutil_workflow-0.0.7.dist-info/METADATA +0 -341
- ddeutil_workflow-0.0.7.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.7.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0
ddeutil/workflow/exceptions.py
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
# Licensed under the MIT License. See LICENSE in the project root for
|
4
4
|
# license information.
|
5
5
|
# ------------------------------------------------------------------------------
|
6
|
-
"""
|
7
|
-
Define Errors Object for Node package
|
8
|
-
"""
|
9
6
|
from __future__ import annotations
|
10
7
|
|
11
8
|
|
@@ -24,4 +21,4 @@ class JobException(WorkflowException): ...
|
|
24
21
|
class PipelineException(WorkflowException): ...
|
25
22
|
|
26
23
|
|
27
|
-
class ParamValueException(
|
24
|
+
class ParamValueException(WorkflowException): ...
|
ddeutil/workflow/loader.py
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
from collections.abc import Iterator
|
8
9
|
from functools import cached_property
|
9
10
|
from typing import TypeVar
|
10
11
|
|
@@ -35,18 +36,65 @@ class SimLoad:
|
|
35
36
|
self,
|
36
37
|
name: str,
|
37
38
|
params: ConfParams,
|
38
|
-
externals: DictData,
|
39
|
+
externals: DictData | None = None,
|
39
40
|
) -> None:
|
40
41
|
self.data: DictData = {}
|
41
42
|
for file in PathSearch(params.engine.paths.conf).files:
|
42
|
-
if any(file.suffix.endswith(s) for s in ("yml", "yaml")) and (
|
43
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
43
44
|
data := YamlFlResolve(file).read().get(name, {})
|
44
45
|
):
|
45
46
|
self.data = data
|
46
47
|
if not self.data:
|
47
48
|
raise ValueError(f"Config {name!r} does not found on conf path")
|
49
|
+
|
50
|
+
# TODO: Validate the version of template data that mean if version of
|
51
|
+
# Template were change it should raise to upgrade package version.
|
52
|
+
# ---
|
53
|
+
# <pipeline-name>:
|
54
|
+
# version: 1
|
55
|
+
# type: pipeline.Pipeline
|
56
|
+
#
|
48
57
|
self.conf_params: ConfParams = params
|
49
|
-
self.externals: DictData = externals
|
58
|
+
self.externals: DictData = externals or {}
|
59
|
+
self.data.update(self.externals)
|
60
|
+
|
61
|
+
@classmethod
|
62
|
+
def find(
|
63
|
+
cls,
|
64
|
+
obj: object,
|
65
|
+
params: ConfParams,
|
66
|
+
*,
|
67
|
+
include: list[str] | None = None,
|
68
|
+
exclude: list[str] | None = None,
|
69
|
+
) -> Iterator[tuple[str, DictData]]:
|
70
|
+
"""Find all object"""
|
71
|
+
exclude: list[str] = exclude or []
|
72
|
+
for file in PathSearch(params.engine.paths.conf).files:
|
73
|
+
if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
|
74
|
+
values := YamlFlResolve(file).read()
|
75
|
+
):
|
76
|
+
for key, data in values.items():
|
77
|
+
if key in exclude:
|
78
|
+
continue
|
79
|
+
if (
|
80
|
+
(t := data.get("type"))
|
81
|
+
and issubclass(cls.get_type(t, params), obj)
|
82
|
+
and all(i in data for i in (include or data.keys()))
|
83
|
+
):
|
84
|
+
yield key, data
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def get_type(cls, t: str, params: ConfParams) -> AnyModelType:
|
88
|
+
try:
|
89
|
+
# NOTE: Auto adding module prefix if it does not set
|
90
|
+
return import_string(f"ddeutil.workflow.{t}")
|
91
|
+
except ModuleNotFoundError:
|
92
|
+
for registry in params.engine.registry:
|
93
|
+
try:
|
94
|
+
return import_string(f"{registry}.{t}")
|
95
|
+
except ModuleNotFoundError:
|
96
|
+
continue
|
97
|
+
return import_string(f"{t}")
|
50
98
|
|
51
99
|
@cached_property
|
52
100
|
def type(self) -> AnyModelType:
|
@@ -57,16 +105,7 @@ class SimLoad:
|
|
57
105
|
raise ValueError(
|
58
106
|
f"the 'type' value: {_typ} does not exists in config data."
|
59
107
|
)
|
60
|
-
|
61
|
-
# NOTE: Auto adding module prefix if it does not set
|
62
|
-
return import_string(f"ddeutil.workflow.{_typ}")
|
63
|
-
except ModuleNotFoundError:
|
64
|
-
for registry in self.conf_params.engine.registry:
|
65
|
-
try:
|
66
|
-
return import_string(f"{registry}.{_typ}")
|
67
|
-
except ModuleNotFoundError:
|
68
|
-
continue
|
69
|
-
return import_string(f"{_typ}")
|
108
|
+
return self.get_type(_typ, self.conf_params)
|
70
109
|
|
71
110
|
|
72
111
|
class Loader(SimLoad):
|
@@ -76,5 +115,18 @@ class Loader(SimLoad):
|
|
76
115
|
:param externals: An external parameters
|
77
116
|
"""
|
78
117
|
|
118
|
+
@classmethod
|
119
|
+
def find(
|
120
|
+
cls,
|
121
|
+
obj,
|
122
|
+
*,
|
123
|
+
include: list[str] | None = None,
|
124
|
+
exclude: list[str] | None = None,
|
125
|
+
**kwargs,
|
126
|
+
) -> DictData:
|
127
|
+
return super().find(
|
128
|
+
obj=obj, params=config(), include=include, exclude=exclude
|
129
|
+
)
|
130
|
+
|
79
131
|
def __init__(self, name: str, externals: DictData) -> None:
|
80
132
|
super().__init__(name, config(), externals)
|
ddeutil/workflow/log.py
CHANGED
@@ -5,26 +5,173 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import
|
9
|
-
|
8
|
+
import json
|
9
|
+
import os
|
10
|
+
import re
|
11
|
+
from abc import ABC, abstractmethod
|
12
|
+
from datetime import datetime
|
13
|
+
from heapq import heappop, heappush
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Optional, Union
|
10
16
|
|
11
|
-
from
|
12
|
-
from
|
17
|
+
from ddeutil.core import str2bool
|
18
|
+
from pydantic import BaseModel, Field
|
19
|
+
from pydantic.functional_validators import model_validator
|
13
20
|
|
14
|
-
|
21
|
+
from .__types import DictData
|
22
|
+
from .utils import config
|
15
23
|
|
16
24
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
25
|
+
class BaseLog(BaseModel, ABC):
|
26
|
+
"""Base Log Pydantic Model"""
|
27
|
+
|
28
|
+
name: str = Field(description="A pipeline name.")
|
29
|
+
on: str = Field(description="A cronjob string of this piepline schedule.")
|
30
|
+
release: datetime = Field(description="A release datetime.")
|
31
|
+
context: DictData = Field(
|
32
|
+
default_factory=dict,
|
33
|
+
description=(
|
34
|
+
"A context data that receive from a pipeline execution result.",
|
35
|
+
),
|
22
36
|
)
|
23
|
-
|
24
|
-
|
25
|
-
|
37
|
+
parent_run_id: Optional[str] = Field(default=None)
|
38
|
+
run_id: str
|
39
|
+
update: datetime = Field(default_factory=datetime.now)
|
40
|
+
|
41
|
+
@model_validator(mode="after")
|
42
|
+
def __model_action(self):
|
43
|
+
if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
44
|
+
self.do_before()
|
45
|
+
return self
|
46
|
+
|
47
|
+
def do_before(self) -> None:
|
48
|
+
"""To something before end up of initial log model."""
|
49
|
+
return
|
50
|
+
|
51
|
+
@abstractmethod
|
52
|
+
def save(self) -> None:
|
53
|
+
"""Save logging"""
|
54
|
+
raise NotImplementedError("Log should implement ``save`` method.")
|
55
|
+
|
56
|
+
|
57
|
+
class FileLog(BaseLog):
|
58
|
+
"""File Log Pydantic Model that use to saving log data from result of
|
59
|
+
pipeline execution. It inherit from BaseLog model that implement the
|
60
|
+
``self.save`` method for file.
|
61
|
+
"""
|
62
|
+
|
63
|
+
def do_before(self) -> None:
|
64
|
+
"""Create directory of release before saving log file."""
|
65
|
+
self.pointer().mkdir(parents=True, exist_ok=True)
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def latest_point(
|
69
|
+
cls,
|
70
|
+
name: str,
|
71
|
+
*,
|
72
|
+
queue: list[datetime] | None = None,
|
73
|
+
) -> datetime | None:
|
74
|
+
"""Return latest point that exist in current logging pointer keeping.
|
75
|
+
|
76
|
+
:param name: A pipeline name
|
77
|
+
:param queue: A release queue.
|
78
|
+
"""
|
79
|
+
keeping: Path = config().engine.paths.root / f"./logs/pipeline={name}/"
|
80
|
+
if not keeping.exists():
|
81
|
+
return None
|
82
|
+
|
83
|
+
keeping_files: list[int] = [
|
84
|
+
int(found.stem)
|
85
|
+
for found in keeping.glob("*")
|
86
|
+
if found.is_dir() and re.match(r"\d{14}", found.stem)
|
87
|
+
]
|
88
|
+
|
89
|
+
latest = max(keeping_files or [None])
|
90
|
+
|
91
|
+
if not queue:
|
92
|
+
if latest is None:
|
93
|
+
return None
|
94
|
+
return datetime.strptime(str(latest), "%Y%m%d%H%M%S")
|
95
|
+
|
96
|
+
latest_queue: datetime = max(queue)
|
97
|
+
|
98
|
+
if latest is None:
|
99
|
+
return latest_queue
|
100
|
+
|
101
|
+
latest_dt: datetime = datetime.strptime(
|
102
|
+
str(latest), "%Y%m%d%H%M%S"
|
103
|
+
).replace(tzinfo=latest_queue.tzinfo)
|
104
|
+
return max(latest_dt, latest_queue)
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def is_pointed(
|
108
|
+
cls,
|
109
|
+
name: str,
|
110
|
+
release: datetime,
|
111
|
+
*,
|
112
|
+
queue: list[datetime] | None = None,
|
113
|
+
) -> bool:
|
114
|
+
"""Check this log already point.
|
115
|
+
|
116
|
+
:param name: A pipeline name.
|
117
|
+
:param release: A release datetime.
|
118
|
+
:param queue: A list of queue of datetime that already run in the
|
119
|
+
future.
|
120
|
+
"""
|
121
|
+
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
122
|
+
return False
|
123
|
+
|
124
|
+
# NOTE: create pointer path that use the same logic of pointer method.
|
125
|
+
pointer: Path = (
|
126
|
+
config().engine.paths.root
|
127
|
+
/ f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
|
26
128
|
)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
129
|
+
|
130
|
+
if queue is None:
|
131
|
+
return pointer.exists()
|
132
|
+
|
133
|
+
if pointer.exists() and not queue:
|
134
|
+
return True
|
135
|
+
|
136
|
+
if queue:
|
137
|
+
latest: datetime = heappop(queue)
|
138
|
+
heappush(queue, latest)
|
139
|
+
if release == latest:
|
140
|
+
return True
|
141
|
+
|
142
|
+
return False
|
143
|
+
|
144
|
+
def pointer(self) -> Path:
|
145
|
+
"""Return release directory path that was generated from model data."""
|
146
|
+
return (
|
147
|
+
config().engine.paths.root
|
148
|
+
/ f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
|
149
|
+
)
|
150
|
+
|
151
|
+
def save(self) -> None:
|
152
|
+
"""Save logging data that receive a context data from a pipeline
|
153
|
+
execution result.
|
154
|
+
"""
|
155
|
+
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
156
|
+
return
|
157
|
+
|
158
|
+
log_file: Path = self.pointer() / f"{self.run_id}.log"
|
159
|
+
log_file.write_text(
|
160
|
+
json.dumps(
|
161
|
+
self.model_dump(),
|
162
|
+
default=str,
|
163
|
+
),
|
164
|
+
encoding="utf-8",
|
165
|
+
)
|
166
|
+
|
167
|
+
|
168
|
+
class SQLiteLog(BaseLog):
|
169
|
+
|
170
|
+
def save(self) -> None:
|
171
|
+
raise NotImplementedError("SQLiteLog does not implement yet.")
|
172
|
+
|
173
|
+
|
174
|
+
Log = Union[
|
175
|
+
FileLog,
|
176
|
+
SQLiteLog,
|
177
|
+
]
|
ddeutil/workflow/on.py
CHANGED
@@ -13,19 +13,14 @@ from pydantic import BaseModel, ConfigDict, Field
|
|
13
13
|
from pydantic.functional_validators import field_validator, model_validator
|
14
14
|
from typing_extensions import Self
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
WEEKDAYS,
|
25
|
-
CronJob,
|
26
|
-
CronJobYear,
|
27
|
-
CronRunner,
|
28
|
-
)
|
16
|
+
from .__types import DictData, DictStr, TupleStr
|
17
|
+
from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
|
18
|
+
from .loader import Loader
|
19
|
+
|
20
|
+
__all__: TupleStr = (
|
21
|
+
"On",
|
22
|
+
"interval2crontab",
|
23
|
+
)
|
29
24
|
|
30
25
|
|
31
26
|
def interval2crontab(
|
@@ -151,13 +146,13 @@ class On(BaseModel):
|
|
151
146
|
)
|
152
147
|
|
153
148
|
@model_validator(mode="before")
|
154
|
-
def __prepare_values(cls, values):
|
149
|
+
def __prepare_values(cls, values: DictData) -> DictData:
|
155
150
|
if tz := values.pop("tz", None):
|
156
151
|
values["timezone"] = tz
|
157
152
|
return values
|
158
153
|
|
159
154
|
@field_validator("tz")
|
160
|
-
def __validate_tz(cls, value: str):
|
155
|
+
def __validate_tz(cls, value: str) -> str:
|
161
156
|
"""Validate timezone value that able to initialize with ZoneInfo after
|
162
157
|
it passing to this model in before mode."""
|
163
158
|
try:
|
@@ -177,6 +172,14 @@ class On(BaseModel):
|
|
177
172
|
start: datetime = datetime.fromisoformat(start)
|
178
173
|
return self.cronjob.schedule(date=start, tz=self.tz)
|
179
174
|
|
175
|
+
def next(self, start: str | datetime) -> datetime:
|
176
|
+
"""Return a next datetime from Cron runner object that start with any
|
177
|
+
date that given from input.
|
178
|
+
"""
|
179
|
+
if not isinstance(start, datetime):
|
180
|
+
start: datetime = datetime.fromisoformat(start)
|
181
|
+
return self.cronjob.schedule(date=start, tz=self.tz).next
|
182
|
+
|
180
183
|
|
181
184
|
class AwsOn(On):
|
182
185
|
"""Implement On AWS Schedule for AWS Service like AWS Glue."""
|