ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -2
- ddeutil/workflow/api.py +99 -31
- ddeutil/workflow/cli.py +105 -22
- ddeutil/workflow/cron.py +116 -26
- ddeutil/workflow/exceptions.py +8 -5
- ddeutil/workflow/job.py +572 -0
- ddeutil/workflow/log.py +73 -66
- ddeutil/workflow/on.py +10 -4
- ddeutil/workflow/repeat.py +68 -39
- ddeutil/workflow/route.py +194 -44
- ddeutil/workflow/scheduler.py +1020 -229
- ddeutil/workflow/stage.py +27 -23
- ddeutil/workflow/utils.py +145 -9
- ddeutil_workflow-0.0.11.dist-info/METADATA +178 -0
- ddeutil_workflow-0.0.11.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.11.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/loader.py +0 -132
- ddeutil/workflow/pipeline.py +0 -1142
- ddeutil_workflow-0.0.9.dist-info/METADATA +0 -273
- ddeutil_workflow-0.0.9.dist-info/RECORD +0 -22
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +0 -2
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.9.dist-info → ddeutil_workflow-0.0.11.dist-info}/top_level.txt +0 -0
ddeutil/workflow/log.py
CHANGED
@@ -6,32 +6,54 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import json
|
9
|
+
import logging
|
9
10
|
import os
|
10
|
-
import re
|
11
11
|
from abc import ABC, abstractmethod
|
12
12
|
from datetime import datetime
|
13
|
-
from
|
13
|
+
from functools import lru_cache
|
14
14
|
from pathlib import Path
|
15
15
|
from typing import Optional, Union
|
16
16
|
|
17
17
|
from ddeutil.core import str2bool
|
18
18
|
from pydantic import BaseModel, Field
|
19
19
|
from pydantic.functional_validators import model_validator
|
20
|
+
from typing_extensions import Self
|
20
21
|
|
21
22
|
from .__types import DictData
|
22
23
|
from .utils import config
|
23
24
|
|
24
25
|
|
26
|
+
@lru_cache
|
27
|
+
def get_logger(name: str):
|
28
|
+
"""Return logger with an input module name."""
|
29
|
+
logger = logging.getLogger(name)
|
30
|
+
formatter = logging.Formatter(
|
31
|
+
fmt=(
|
32
|
+
"%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, "
|
33
|
+
"%(thread)-5d) [%(levelname)-7s] %(message)-120s "
|
34
|
+
"(%(filename)s:%(lineno)s)"
|
35
|
+
),
|
36
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
37
|
+
)
|
38
|
+
stream = logging.StreamHandler()
|
39
|
+
stream.setFormatter(formatter)
|
40
|
+
logger.addHandler(stream)
|
41
|
+
|
42
|
+
debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
|
43
|
+
logger.setLevel(logging.DEBUG if debug else logging.INFO)
|
44
|
+
return logger
|
45
|
+
|
46
|
+
|
25
47
|
class BaseLog(BaseModel, ABC):
|
26
|
-
"""Base Log Pydantic Model"""
|
48
|
+
"""Base Log Pydantic Model abstraction that implement only model fields."""
|
27
49
|
|
28
|
-
name: str = Field(description="A
|
50
|
+
name: str = Field(description="A workflow name.")
|
29
51
|
on: str = Field(description="A cronjob string of this piepline schedule.")
|
30
52
|
release: datetime = Field(description="A release datetime.")
|
31
53
|
context: DictData = Field(
|
32
54
|
default_factory=dict,
|
33
55
|
description=(
|
34
|
-
"A context data that receive from a
|
56
|
+
"A context data that receive from a workflow execution result.",
|
35
57
|
),
|
36
58
|
)
|
37
59
|
parent_run_id: Optional[str] = Field(default=None)
|
@@ -46,17 +68,16 @@ class BaseLog(BaseModel, ABC):
|
|
46
68
|
|
47
69
|
def do_before(self) -> None:
|
48
70
|
"""To something before end up of initial log model."""
|
49
|
-
return
|
50
71
|
|
51
72
|
@abstractmethod
|
52
|
-
def save(self) -> None:
|
73
|
+
def save(self, excluded: list[str] | None) -> None:
|
53
74
|
"""Save logging"""
|
54
75
|
raise NotImplementedError("Log should implement ``save`` method.")
|
55
76
|
|
56
77
|
|
57
78
|
class FileLog(BaseLog):
|
58
79
|
"""File Log Pydantic Model that use to saving log data from result of
|
59
|
-
|
80
|
+
workflow execution. It inherit from BaseLog model that implement the
|
60
81
|
``self.save`` method for file.
|
61
82
|
"""
|
62
83
|
|
@@ -65,43 +86,28 @@ class FileLog(BaseLog):
|
|
65
86
|
self.pointer().mkdir(parents=True, exist_ok=True)
|
66
87
|
|
67
88
|
@classmethod
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
) -> datetime | None:
|
74
|
-
"""Return latest point that exist in current logging pointer keeping.
|
75
|
-
|
76
|
-
:param name: A pipeline name
|
77
|
-
:param queue: A release queue.
|
78
|
-
"""
|
79
|
-
keeping: Path = config().engine.paths.root / f"./logs/pipeline={name}/"
|
80
|
-
if not keeping.exists():
|
81
|
-
return None
|
82
|
-
|
83
|
-
keeping_files: list[int] = [
|
84
|
-
int(found.stem)
|
85
|
-
for found in keeping.glob("*")
|
86
|
-
if found.is_dir() and re.match(r"\d{14}", found.stem)
|
87
|
-
]
|
88
|
-
|
89
|
-
latest = max(keeping_files or [None])
|
89
|
+
def find_logs(cls, name: str):
|
90
|
+
pointer: Path = config().engine.paths.root / f"./logs/workflow={name}"
|
91
|
+
for file in pointer.glob("./release=*/*.log"):
|
92
|
+
with file.open(mode="r", encoding="utf-8") as f:
|
93
|
+
yield json.load(f)
|
90
94
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
95
|
+
@classmethod
|
96
|
+
def find_log(cls, name: str, release: datetime | None = None):
|
97
|
+
if release is not None:
|
98
|
+
pointer: Path = (
|
99
|
+
config().engine.paths.root
|
100
|
+
/ f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
|
101
|
+
)
|
102
|
+
if not pointer.exists():
|
103
|
+
raise FileNotFoundError(
|
104
|
+
f"Pointer: ./logs/workflow={name}/"
|
105
|
+
f"release={release:%Y%m%d%H%M%S} does not found."
|
106
|
+
)
|
107
|
+
return cls.model_validate(
|
108
|
+
obj=json.loads(pointer.read_text(encoding="utf-8"))
|
109
|
+
)
|
110
|
+
raise NotImplementedError("Find latest log does not implement yet.")
|
105
111
|
|
106
112
|
@classmethod
|
107
113
|
def is_pointed(
|
@@ -111,63 +117,64 @@ class FileLog(BaseLog):
|
|
111
117
|
*,
|
112
118
|
queue: list[datetime] | None = None,
|
113
119
|
) -> bool:
|
114
|
-
"""Check this log already point.
|
120
|
+
"""Check this log already point in the destination.
|
115
121
|
|
116
|
-
:param name: A
|
122
|
+
:param name: A workflow name.
|
117
123
|
:param release: A release datetime.
|
118
124
|
:param queue: A list of queue of datetime that already run in the
|
119
125
|
future.
|
120
126
|
"""
|
127
|
+
# NOTE: Check environ variable was set for real writing.
|
121
128
|
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
122
129
|
return False
|
123
130
|
|
124
131
|
# NOTE: create pointer path that use the same logic of pointer method.
|
125
132
|
pointer: Path = (
|
126
133
|
config().engine.paths.root
|
127
|
-
/ f"./logs/
|
134
|
+
/ f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
|
128
135
|
)
|
129
136
|
|
130
|
-
if queue
|
137
|
+
if not queue:
|
131
138
|
return pointer.exists()
|
132
|
-
|
133
|
-
if pointer.exists() and not queue:
|
134
|
-
return True
|
135
|
-
|
136
|
-
if queue:
|
137
|
-
latest: datetime = heappop(queue)
|
138
|
-
heappush(queue, latest)
|
139
|
-
if release == latest:
|
140
|
-
return True
|
141
|
-
|
142
|
-
return False
|
139
|
+
return pointer.exists() or (release in queue)
|
143
140
|
|
144
141
|
def pointer(self) -> Path:
|
145
|
-
"""Return release directory path that was generated from model data.
|
142
|
+
"""Return release directory path that was generated from model data.
|
143
|
+
|
144
|
+
:rtype: Path
|
145
|
+
"""
|
146
146
|
return (
|
147
147
|
config().engine.paths.root
|
148
|
-
/ f"./logs/
|
148
|
+
/ f"./logs/workflow={self.name}/release={self.release:%Y%m%d%H%M%S}"
|
149
149
|
)
|
150
150
|
|
151
|
-
def save(self) ->
|
152
|
-
"""Save logging data that receive a context data from a
|
151
|
+
def save(self, excluded: list[str] | None) -> Self:
|
152
|
+
"""Save logging data that receive a context data from a workflow
|
153
153
|
execution result.
|
154
|
+
|
155
|
+
:param excluded: An excluded list of key name that want to pass in the
|
156
|
+
model_dump method.
|
157
|
+
:rtype: Self
|
154
158
|
"""
|
159
|
+
# NOTE: Check environ variable was set for real writing.
|
155
160
|
if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
|
156
|
-
return
|
161
|
+
return self
|
157
162
|
|
158
163
|
log_file: Path = self.pointer() / f"{self.run_id}.log"
|
159
164
|
log_file.write_text(
|
160
165
|
json.dumps(
|
161
|
-
self.model_dump(),
|
166
|
+
self.model_dump(exclude=excluded),
|
162
167
|
default=str,
|
168
|
+
indent=2,
|
163
169
|
),
|
164
170
|
encoding="utf-8",
|
165
171
|
)
|
172
|
+
return self
|
166
173
|
|
167
174
|
|
168
175
|
class SQLiteLog(BaseLog):
|
169
176
|
|
170
|
-
def save(self) -> None:
|
177
|
+
def save(self, excluded: list[str] | None) -> None:
|
171
178
|
raise NotImplementedError("SQLiteLog does not implement yet.")
|
172
179
|
|
173
180
|
|
ddeutil/workflow/on.py
CHANGED
@@ -10,12 +10,13 @@ from typing import Annotated, Literal
|
|
10
10
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
11
11
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, Field
|
13
|
+
from pydantic.functional_serializers import field_serializer
|
13
14
|
from pydantic.functional_validators import field_validator, model_validator
|
14
15
|
from typing_extensions import Self
|
15
16
|
|
16
17
|
from .__types import DictData, DictStr, TupleStr
|
17
18
|
from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
|
18
|
-
from .
|
19
|
+
from .utils import Loader
|
19
20
|
|
20
21
|
__all__: TupleStr = (
|
21
22
|
"On",
|
@@ -101,7 +102,7 @@ class On(BaseModel):
|
|
101
102
|
passing["cronjob"] = interval2crontab(
|
102
103
|
**{v: value[v] for v in value if v in ("interval", "day", "time")}
|
103
104
|
)
|
104
|
-
return cls(extras=externals, **passing)
|
105
|
+
return cls(extras=externals | passing.pop("extras", {}), **passing)
|
105
106
|
|
106
107
|
@classmethod
|
107
108
|
def from_loader(
|
@@ -116,6 +117,7 @@ class On(BaseModel):
|
|
116
117
|
:param externals: A extras external parameter that will keep in extras.
|
117
118
|
"""
|
118
119
|
loader: Loader = Loader(name, externals=externals)
|
120
|
+
|
119
121
|
# NOTE: Validate the config type match with current connection model
|
120
122
|
if loader.type != cls:
|
121
123
|
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
@@ -131,7 +133,7 @@ class On(BaseModel):
|
|
131
133
|
if v in ("interval", "day", "time")
|
132
134
|
}
|
133
135
|
),
|
134
|
-
extras=externals,
|
136
|
+
extras=externals | loader_data.pop("extras", {}),
|
135
137
|
**loader_data,
|
136
138
|
)
|
137
139
|
)
|
@@ -140,7 +142,7 @@ class On(BaseModel):
|
|
140
142
|
return cls.model_validate(
|
141
143
|
obj=dict(
|
142
144
|
cronjob=loader_data.pop("cronjob"),
|
143
|
-
extras=externals,
|
145
|
+
extras=externals | loader_data.pop("extras", {}),
|
144
146
|
**loader_data,
|
145
147
|
)
|
146
148
|
)
|
@@ -166,6 +168,10 @@ class On(BaseModel):
|
|
166
168
|
"""Prepare crontab value that able to receive with string type."""
|
167
169
|
return CronJob(value) if isinstance(value, str) else value
|
168
170
|
|
171
|
+
@field_serializer("cronjob")
|
172
|
+
def __serialize_cronjob(self, value: CronJob) -> str:
|
173
|
+
return str(value)
|
174
|
+
|
169
175
|
def generate(self, start: str | datetime) -> CronRunner:
|
170
176
|
"""Return Cron runner object."""
|
171
177
|
if not isinstance(start, datetime):
|
ddeutil/workflow/repeat.py
CHANGED
@@ -3,33 +3,63 @@
|
|
3
3
|
# Licensed under the MIT License.
|
4
4
|
# This code refs from: https://github.com/priyanshu-panwar/fastapi-utilities
|
5
5
|
# ------------------------------------------------------------------------------
|
6
|
+
from __future__ import annotations
|
7
|
+
|
6
8
|
import asyncio
|
7
|
-
import logging
|
8
9
|
import os
|
9
10
|
from asyncio import ensure_future
|
10
11
|
from datetime import datetime
|
11
12
|
from functools import wraps
|
12
13
|
from zoneinfo import ZoneInfo
|
13
14
|
|
14
|
-
from croniter import croniter
|
15
15
|
from starlette.concurrency import run_in_threadpool
|
16
16
|
|
17
|
+
from .cron import CronJob
|
18
|
+
from .log import get_logger
|
19
|
+
|
20
|
+
logger = get_logger("ddeutil.workflow")
|
17
21
|
|
18
|
-
|
22
|
+
|
23
|
+
def get_cronjob_delta(cron: str) -> float:
|
19
24
|
"""This function returns the time delta between now and the next cron
|
20
25
|
execution time.
|
21
26
|
"""
|
22
27
|
now: datetime = datetime.now(
|
23
28
|
tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
24
29
|
)
|
25
|
-
cron =
|
26
|
-
return (cron.
|
30
|
+
cron = CronJob(cron)
|
31
|
+
return (cron.schedule(now).next - now).total_seconds()
|
32
|
+
|
33
|
+
|
34
|
+
def cron_valid(cron: str):
|
35
|
+
try:
|
36
|
+
CronJob(cron)
|
37
|
+
except Exception as err:
|
38
|
+
raise ValueError(f"Crontab value does not valid, {cron}") from err
|
39
|
+
|
40
|
+
|
41
|
+
async def run_func(
|
42
|
+
is_coroutine,
|
43
|
+
func,
|
44
|
+
*args,
|
45
|
+
raise_exceptions: bool = False,
|
46
|
+
**kwargs,
|
47
|
+
):
|
48
|
+
try:
|
49
|
+
if is_coroutine:
|
50
|
+
await func(*args, **kwargs)
|
51
|
+
else:
|
52
|
+
await run_in_threadpool(func, *args, **kwargs)
|
53
|
+
except Exception as e:
|
54
|
+
logger.exception(e)
|
55
|
+
if raise_exceptions:
|
56
|
+
raise e
|
27
57
|
|
28
58
|
|
29
59
|
def repeat_at(
|
30
60
|
*,
|
31
61
|
cron: str,
|
32
|
-
|
62
|
+
delay: float = 0,
|
33
63
|
raise_exceptions: bool = False,
|
34
64
|
max_repetitions: int = None,
|
35
65
|
):
|
@@ -38,40 +68,38 @@ def repeat_at(
|
|
38
68
|
|
39
69
|
:param cron: str
|
40
70
|
Cron-style string for periodic execution, eg. '0 0 * * *' every midnight
|
41
|
-
:param
|
42
|
-
Logger object to log exceptions
|
71
|
+
:param delay:
|
43
72
|
:param raise_exceptions: bool (default False)
|
44
73
|
Whether to raise exceptions or log them
|
45
74
|
:param max_repetitions: int (default None)
|
46
75
|
Maximum number of times to repeat the function. If None, repeat
|
47
76
|
indefinitely.
|
48
|
-
|
49
77
|
"""
|
78
|
+
if max_repetitions and max_repetitions <= 0:
|
79
|
+
raise ValueError(
|
80
|
+
"max_repetitions should more than zero if it want to set"
|
81
|
+
)
|
50
82
|
|
51
83
|
def decorator(func):
|
52
|
-
is_coroutine = asyncio.iscoroutinefunction(func)
|
84
|
+
is_coroutine: bool = asyncio.iscoroutinefunction(func)
|
53
85
|
|
54
86
|
@wraps(func)
|
55
87
|
def wrapper(*_args, **_kwargs):
|
56
|
-
repititions = 0
|
57
|
-
|
58
|
-
raise ValueError("Invalid cron expression")
|
88
|
+
repititions: int = 0
|
89
|
+
cron_valid(cron)
|
59
90
|
|
60
91
|
async def loop(*args, **kwargs):
|
61
92
|
nonlocal repititions
|
62
93
|
while max_repetitions is None or repititions < max_repetitions:
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
logger.exception(e)
|
73
|
-
if raise_exceptions:
|
74
|
-
raise e
|
94
|
+
sleep_time = get_cronjob_delta(cron) + delay
|
95
|
+
await asyncio.sleep(sleep_time)
|
96
|
+
await run_func(
|
97
|
+
is_coroutine,
|
98
|
+
func,
|
99
|
+
*args,
|
100
|
+
raise_exceptions=raise_exceptions,
|
101
|
+
**kwargs,
|
102
|
+
)
|
75
103
|
repititions += 1
|
76
104
|
|
77
105
|
ensure_future(loop(*_args, **_kwargs))
|
@@ -85,7 +113,6 @@ def repeat_every(
|
|
85
113
|
*,
|
86
114
|
seconds: float,
|
87
115
|
wait_first: bool = False,
|
88
|
-
logger: logging.Logger = None,
|
89
116
|
raise_exceptions: bool = False,
|
90
117
|
max_repetitions: int = None,
|
91
118
|
):
|
@@ -97,17 +124,19 @@ def repeat_every(
|
|
97
124
|
:param wait_first: bool (default False)
|
98
125
|
Whether to wait `seconds` seconds before executing the function for the
|
99
126
|
first time.
|
100
|
-
:param logger: logging.Logger (default None)
|
101
|
-
The logger to use for logging exceptions.
|
102
127
|
:param raise_exceptions: bool (default False)
|
103
128
|
Whether to raise exceptions instead of logging them.
|
104
129
|
:param max_repetitions: int (default None)
|
105
130
|
The maximum number of times to repeat the function. If None, the
|
106
131
|
function will repeat indefinitely.
|
107
132
|
"""
|
133
|
+
if max_repetitions and max_repetitions <= 0:
|
134
|
+
raise ValueError(
|
135
|
+
"max_repetitions should more than zero if it want to set"
|
136
|
+
)
|
108
137
|
|
109
138
|
def decorator(func):
|
110
|
-
is_coroutine = asyncio.iscoroutinefunction(func)
|
139
|
+
is_coroutine: bool = asyncio.iscoroutinefunction(func)
|
111
140
|
|
112
141
|
@wraps(func)
|
113
142
|
async def wrapper(*_args, **_kwargs):
|
@@ -115,19 +144,19 @@ def repeat_every(
|
|
115
144
|
|
116
145
|
async def loop(*args, **kwargs):
|
117
146
|
nonlocal repetitions
|
147
|
+
|
118
148
|
if wait_first:
|
119
149
|
await asyncio.sleep(seconds)
|
150
|
+
|
120
151
|
while max_repetitions is None or repetitions < max_repetitions:
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
if raise_exceptions:
|
130
|
-
raise e
|
152
|
+
await run_func(
|
153
|
+
is_coroutine,
|
154
|
+
func,
|
155
|
+
*args,
|
156
|
+
raise_exceptions=raise_exceptions,
|
157
|
+
**kwargs,
|
158
|
+
)
|
159
|
+
|
131
160
|
repetitions += 1
|
132
161
|
await asyncio.sleep(seconds)
|
133
162
|
|