ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +134 -0
- ddeutil/workflow/cron.py +803 -0
- ddeutil/workflow/exceptions.py +3 -0
- ddeutil/workflow/log.py +152 -47
- ddeutil/workflow/on.py +27 -18
- ddeutil/workflow/pipeline.py +527 -234
- ddeutil/workflow/repeat.py +71 -40
- ddeutil/workflow/route.py +77 -63
- ddeutil/workflow/scheduler.py +523 -616
- ddeutil/workflow/stage.py +158 -82
- ddeutil/workflow/utils.py +273 -46
- ddeutil_workflow-0.0.10.dist-info/METADATA +182 -0
- ddeutil_workflow-0.0.10.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.10.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil/workflow/loader.py +0 -80
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/top_level.txt +0 -0
ddeutil/workflow/exceptions.py
CHANGED
ddeutil/workflow/log.py
CHANGED
@@ -5,75 +5,180 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import json
|
8
9
|
import logging
|
10
|
+
import os
|
11
|
+
from abc import ABC, abstractmethod
|
9
12
|
from datetime import datetime
|
10
13
|
from functools import lru_cache
|
11
|
-
from
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Optional, Union
|
12
16
|
|
17
|
+
from ddeutil.core import str2bool
|
13
18
|
from pydantic import BaseModel, Field
|
14
|
-
from
|
15
|
-
from
|
19
|
+
from pydantic.functional_validators import model_validator
|
20
|
+
from typing_extensions import Self
|
16
21
|
|
17
22
|
from .__types import DictData
|
18
|
-
|
19
|
-
console = Console(color_system="256", width=200, style="blue")
|
23
|
+
from .utils import config
|
20
24
|
|
21
25
|
|
22
26
|
@lru_cache
|
23
|
-
def get_logger(
|
24
|
-
logger
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
"
|
31
|
-
)
|
27
|
+
def get_logger(name: str):
|
28
|
+
"""Return logger with an input module name."""
|
29
|
+
logger = logging.getLogger(name)
|
30
|
+
formatter = logging.Formatter(
|
31
|
+
fmt=(
|
32
|
+
"%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, "
|
33
|
+
"%(thread)-5d) [%(levelname)-7s] %(message)-120s "
|
34
|
+
"(%(filename)s:%(lineno)s)"
|
35
|
+
),
|
36
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
32
37
|
)
|
33
|
-
|
34
|
-
|
38
|
+
stream = logging.StreamHandler()
|
39
|
+
stream.setFormatter(formatter)
|
40
|
+
logger.addHandler(stream)
|
41
|
+
|
42
|
+
debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
|
43
|
+
logger.setLevel(logging.DEBUG if debug else logging.INFO)
|
35
44
|
return logger
|
36
45
|
|
37
46
|
|
38
|
-
class BaseLog(BaseModel):
|
39
|
-
"""Base
|
47
|
+
class BaseLog(BaseModel, ABC):
|
48
|
+
"""Base Log Pydantic Model abstraction that implement only model fields."""
|
40
49
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
name: str = Field(description="A pipeline name.")
|
51
|
+
on: str = Field(description="A cronjob string of this piepline schedule.")
|
52
|
+
release: datetime = Field(description="A release datetime.")
|
53
|
+
context: DictData = Field(
|
54
|
+
default_factory=dict,
|
55
|
+
description=(
|
56
|
+
"A context data that receive from a pipeline execution result.",
|
57
|
+
),
|
58
|
+
)
|
59
|
+
parent_run_id: Optional[str] = Field(default=None)
|
60
|
+
run_id: str
|
61
|
+
update: datetime = Field(default_factory=datetime.now)
|
62
|
+
|
63
|
+
@model_validator(mode="after")
|
64
|
+
def __model_action(self):
|
65
|
+
if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
66
|
+
self.do_before()
|
67
|
+
return self
|
68
|
+
|
69
|
+
def do_before(self) -> None:
|
70
|
+
"""To something before end up of initial log model."""
|
71
|
+
|
72
|
+
@abstractmethod
|
73
|
+
def save(self, excluded: list[str] | None) -> None:
|
74
|
+
"""Save logging"""
|
75
|
+
raise NotImplementedError("Log should implement ``save`` method.")
|
76
|
+
|
77
|
+
|
78
|
+
class FileLog(BaseLog):
|
79
|
+
"""File Log Pydantic Model that use to saving log data from result of
|
80
|
+
pipeline execution. It inherit from BaseLog model that implement the
|
81
|
+
``self.save`` method for file.
|
82
|
+
"""
|
83
|
+
|
84
|
+
def do_before(self) -> None:
|
85
|
+
"""Create directory of release before saving log file."""
|
86
|
+
self.pointer().mkdir(parents=True, exist_ok=True)
|
87
|
+
|
88
|
+
@classmethod
|
89
|
+
def find_logs(cls, name: str):
|
90
|
+
pointer: Path = config().engine.paths.root / f"./logs/pipeline={name}"
|
91
|
+
for file in pointer.glob("./release=*/*.log"):
|
92
|
+
with file.open(mode="r", encoding="utf-8") as f:
|
93
|
+
yield json.load(f)
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def find_log(cls, name: str, release: datetime | None = None):
|
97
|
+
if release is not None:
|
98
|
+
pointer: Path = (
|
99
|
+
config().engine.paths.root
|
100
|
+
/ f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
|
101
|
+
)
|
102
|
+
if not pointer.exists():
|
103
|
+
raise FileNotFoundError(
|
104
|
+
f"Pointer: ./logs/pipeline={name}/"
|
105
|
+
f"release={release:%Y%m%d%H%M%S} does not found."
|
106
|
+
)
|
107
|
+
return cls.model_validate(
|
108
|
+
obj=json.loads(pointer.read_text(encoding="utf-8"))
|
109
|
+
)
|
110
|
+
raise NotImplementedError("Find latest log does not implement yet.")
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def is_pointed(
|
114
|
+
cls,
|
115
|
+
name: str,
|
116
|
+
release: datetime,
|
117
|
+
*,
|
118
|
+
queue: list[datetime] | None = None,
|
119
|
+
) -> bool:
|
120
|
+
"""Check this log already point in the destination.
|
121
|
+
|
122
|
+
:param name: A pipeline name.
|
123
|
+
:param release: A release datetime.
|
124
|
+
:param queue: A list of queue of datetime that already run in the
|
125
|
+
future.
|
126
|
+
"""
|
127
|
+
# NOTE: Check environ variable was set for real writing.
|
128
|
+
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
129
|
+
return False
|
130
|
+
|
131
|
+
# NOTE: create pointer path that use the same logic of pointer method.
|
132
|
+
pointer: Path = (
|
133
|
+
config().engine.paths.root
|
134
|
+
/ f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
|
135
|
+
)
|
136
|
+
|
137
|
+
if not queue:
|
138
|
+
return pointer.exists()
|
139
|
+
return pointer.exists() or (release in queue)
|
46
140
|
|
141
|
+
def pointer(self) -> Path:
|
142
|
+
"""Return release directory path that was generated from model data.
|
47
143
|
|
48
|
-
|
144
|
+
:rtype: Path
|
145
|
+
"""
|
146
|
+
return (
|
147
|
+
config().engine.paths.root
|
148
|
+
/ f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
|
149
|
+
)
|
49
150
|
|
151
|
+
def save(self, excluded: list[str] | None) -> Self:
|
152
|
+
"""Save logging data that receive a context data from a pipeline
|
153
|
+
execution result.
|
154
|
+
|
155
|
+
:param excluded: An excluded list of key name that want to pass in the
|
156
|
+
model_dump method.
|
157
|
+
:rtype: Self
|
158
|
+
"""
|
159
|
+
# NOTE: Check environ variable was set for real writing.
|
160
|
+
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
161
|
+
return self
|
162
|
+
|
163
|
+
log_file: Path = self.pointer() / f"{self.run_id}.log"
|
164
|
+
log_file.write_text(
|
165
|
+
json.dumps(
|
166
|
+
self.model_dump(exclude=excluded),
|
167
|
+
default=str,
|
168
|
+
indent=2,
|
169
|
+
),
|
170
|
+
encoding="utf-8",
|
171
|
+
)
|
172
|
+
return self
|
50
173
|
|
51
|
-
class JobLog(BaseLog): ...
|
52
174
|
|
175
|
+
class SQLiteLog(BaseLog):
|
53
176
|
|
54
|
-
|
177
|
+
def save(self, excluded: list[str] | None) -> None:
|
178
|
+
raise NotImplementedError("SQLiteLog does not implement yet.")
|
55
179
|
|
56
180
|
|
57
181
|
Log = Union[
|
58
|
-
|
59
|
-
|
60
|
-
PipelineLog,
|
182
|
+
FileLog,
|
183
|
+
SQLiteLog,
|
61
184
|
]
|
62
|
-
|
63
|
-
|
64
|
-
def push_log_memory(log: DictData):
|
65
|
-
"""Push message log to globals log queue."""
|
66
|
-
print(log)
|
67
|
-
|
68
|
-
|
69
|
-
LOGS_REGISTRY = {
|
70
|
-
"memory": push_log_memory,
|
71
|
-
}
|
72
|
-
|
73
|
-
|
74
|
-
def push_log(log: DictData, mode: str = "memory"):
|
75
|
-
return LOGS_REGISTRY[mode](log)
|
76
|
-
|
77
|
-
|
78
|
-
def save_log():
|
79
|
-
"""Save log that push to queue to target saving"""
|
ddeutil/workflow/on.py
CHANGED
@@ -10,22 +10,18 @@ from typing import Annotated, Literal
|
|
10
10
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
11
11
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, Field
|
13
|
+
from pydantic.functional_serializers import field_serializer
|
13
14
|
from pydantic.functional_validators import field_validator, model_validator
|
14
15
|
from typing_extensions import Self
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
WEEKDAYS,
|
25
|
-
CronJob,
|
26
|
-
CronJobYear,
|
27
|
-
CronRunner,
|
28
|
-
)
|
17
|
+
from .__types import DictData, DictStr, TupleStr
|
18
|
+
from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
|
19
|
+
from .utils import Loader
|
20
|
+
|
21
|
+
__all__: TupleStr = (
|
22
|
+
"On",
|
23
|
+
"interval2crontab",
|
24
|
+
)
|
29
25
|
|
30
26
|
|
31
27
|
def interval2crontab(
|
@@ -106,7 +102,7 @@ class On(BaseModel):
|
|
106
102
|
passing["cronjob"] = interval2crontab(
|
107
103
|
**{v: value[v] for v in value if v in ("interval", "day", "time")}
|
108
104
|
)
|
109
|
-
return cls(extras=externals, **passing)
|
105
|
+
return cls(extras=externals | passing.pop("extras", {}), **passing)
|
110
106
|
|
111
107
|
@classmethod
|
112
108
|
def from_loader(
|
@@ -121,6 +117,7 @@ class On(BaseModel):
|
|
121
117
|
:param externals: A extras external parameter that will keep in extras.
|
122
118
|
"""
|
123
119
|
loader: Loader = Loader(name, externals=externals)
|
120
|
+
|
124
121
|
# NOTE: Validate the config type match with current connection model
|
125
122
|
if loader.type != cls:
|
126
123
|
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
@@ -136,7 +133,7 @@ class On(BaseModel):
|
|
136
133
|
if v in ("interval", "day", "time")
|
137
134
|
}
|
138
135
|
),
|
139
|
-
extras=externals,
|
136
|
+
extras=externals | loader_data.pop("extras", {}),
|
140
137
|
**loader_data,
|
141
138
|
)
|
142
139
|
)
|
@@ -145,19 +142,19 @@ class On(BaseModel):
|
|
145
142
|
return cls.model_validate(
|
146
143
|
obj=dict(
|
147
144
|
cronjob=loader_data.pop("cronjob"),
|
148
|
-
extras=externals,
|
145
|
+
extras=externals | loader_data.pop("extras", {}),
|
149
146
|
**loader_data,
|
150
147
|
)
|
151
148
|
)
|
152
149
|
|
153
150
|
@model_validator(mode="before")
|
154
|
-
def __prepare_values(cls, values):
|
151
|
+
def __prepare_values(cls, values: DictData) -> DictData:
|
155
152
|
if tz := values.pop("tz", None):
|
156
153
|
values["timezone"] = tz
|
157
154
|
return values
|
158
155
|
|
159
156
|
@field_validator("tz")
|
160
|
-
def __validate_tz(cls, value: str):
|
157
|
+
def __validate_tz(cls, value: str) -> str:
|
161
158
|
"""Validate timezone value that able to initialize with ZoneInfo after
|
162
159
|
it passing to this model in before mode."""
|
163
160
|
try:
|
@@ -171,12 +168,24 @@ class On(BaseModel):
|
|
171
168
|
"""Prepare crontab value that able to receive with string type."""
|
172
169
|
return CronJob(value) if isinstance(value, str) else value
|
173
170
|
|
171
|
+
@field_serializer("cronjob")
|
172
|
+
def __serialize_cronjob(self, value: CronJob) -> str:
|
173
|
+
return str(value)
|
174
|
+
|
174
175
|
def generate(self, start: str | datetime) -> CronRunner:
|
175
176
|
"""Return Cron runner object."""
|
176
177
|
if not isinstance(start, datetime):
|
177
178
|
start: datetime = datetime.fromisoformat(start)
|
178
179
|
return self.cronjob.schedule(date=start, tz=self.tz)
|
179
180
|
|
181
|
+
def next(self, start: str | datetime) -> datetime:
|
182
|
+
"""Return a next datetime from Cron runner object that start with any
|
183
|
+
date that given from input.
|
184
|
+
"""
|
185
|
+
if not isinstance(start, datetime):
|
186
|
+
start: datetime = datetime.fromisoformat(start)
|
187
|
+
return self.cronjob.schedule(date=start, tz=self.tz).next
|
188
|
+
|
180
189
|
|
181
190
|
class AwsOn(On):
|
182
191
|
"""Implement On AWS Schedule for AWS Service like AWS Glue."""
|