ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +134 -0
- ddeutil/workflow/cron.py +803 -0
- ddeutil/workflow/exceptions.py +3 -0
- ddeutil/workflow/log.py +152 -47
- ddeutil/workflow/on.py +27 -18
- ddeutil/workflow/pipeline.py +527 -234
- ddeutil/workflow/repeat.py +71 -40
- ddeutil/workflow/route.py +77 -63
- ddeutil/workflow/scheduler.py +523 -616
- ddeutil/workflow/stage.py +158 -82
- ddeutil/workflow/utils.py +273 -46
- ddeutil_workflow-0.0.10.dist-info/METADATA +182 -0
- ddeutil_workflow-0.0.10.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.10.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil/workflow/loader.py +0 -80
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.10.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -6,708 +6,615 @@
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import copy
|
9
|
+
import json
|
10
|
+
import logging
|
11
|
+
import os
|
12
|
+
import time
|
9
13
|
from collections.abc import Iterator
|
10
|
-
from
|
14
|
+
from concurrent.futures import Future, ProcessPoolExecutor, as_completed
|
15
|
+
from dataclasses import dataclass
|
11
16
|
from datetime import datetime, timedelta
|
12
|
-
from functools import
|
13
|
-
from
|
14
|
-
from
|
15
|
-
|
16
|
-
from
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
from functools import wraps
|
18
|
+
from heapq import heappush
|
19
|
+
from threading import Thread
|
20
|
+
from typing import Optional
|
21
|
+
from zoneinfo import ZoneInfo
|
22
|
+
|
23
|
+
from dotenv import load_dotenv
|
24
|
+
from pydantic import BaseModel, Field
|
25
|
+
from pydantic.functional_validators import model_validator
|
26
|
+
from typing_extensions import Self
|
27
|
+
|
28
|
+
try:
|
29
|
+
from schedule import CancelJob, Scheduler
|
30
|
+
except ImportError:
|
31
|
+
raise ImportError(
|
32
|
+
"Should install schedule package before use this module."
|
33
|
+
) from None
|
34
|
+
|
35
|
+
from .__types import DictData
|
36
|
+
from .cron import CronRunner
|
37
|
+
from .exceptions import WorkflowException
|
38
|
+
from .log import FileLog, Log, get_logger
|
39
|
+
from .on import On
|
40
|
+
from .pipeline import Pipeline
|
41
|
+
from .utils import (
|
42
|
+
Loader,
|
43
|
+
Result,
|
44
|
+
batch,
|
45
|
+
delay,
|
46
|
+
get_diff_sec,
|
47
|
+
param2template,
|
20
48
|
)
|
21
|
-
|
22
|
-
|
23
|
-
|
49
|
+
|
50
|
+
load_dotenv()
|
51
|
+
logger = get_logger("ddeutil.workflow")
|
52
|
+
logging.getLogger("schedule").setLevel(logging.INFO)
|
53
|
+
|
54
|
+
|
55
|
+
__all__ = (
|
56
|
+
"PipelineSchedule",
|
57
|
+
"Schedule",
|
58
|
+
"workflow",
|
24
59
|
)
|
25
60
|
|
26
|
-
WEEKDAYS: dict[str, int] = {
|
27
|
-
"Sun": 0,
|
28
|
-
"Mon": 1,
|
29
|
-
"Tue": 2,
|
30
|
-
"Wed": 3,
|
31
|
-
"Thu": 4,
|
32
|
-
"Fri": 5,
|
33
|
-
"Sat": 6,
|
34
|
-
}
|
35
61
|
|
62
|
+
class PipelineSchedule(BaseModel):
|
63
|
+
"""Pipeline schedule Pydantic Model."""
|
36
64
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
return (
|
47
|
-
f"{self.__class__}(name={self.name!r}, range={self.range},"
|
48
|
-
f"min={self.min}, max={self.max}"
|
49
|
-
f"{f', alt={self.alt}' if self.alt else ''})"
|
50
|
-
)
|
65
|
+
name: str = Field(description="A pipeline name.")
|
66
|
+
on: list[On] = Field(
|
67
|
+
default_factory=list,
|
68
|
+
description="An override On instance value.",
|
69
|
+
)
|
70
|
+
params: DictData = Field(
|
71
|
+
default_factory=dict,
|
72
|
+
description="A parameters that want to use to pipeline execution.",
|
73
|
+
)
|
51
74
|
|
75
|
+
@model_validator(mode="before")
|
76
|
+
def __prepare__values(cls, values: DictData) -> DictData:
|
77
|
+
"""Prepare incoming values before validating with model fields."""
|
52
78
|
|
53
|
-
|
54
|
-
class Options:
|
55
|
-
output_weekday_names: bool = False
|
56
|
-
output_month_names: bool = False
|
57
|
-
output_hashes: bool = False
|
58
|
-
|
59
|
-
|
60
|
-
CRON_UNITS: tuple[Unit, ...] = (
|
61
|
-
Unit(
|
62
|
-
name="minute",
|
63
|
-
range=partial(range, 0, 60),
|
64
|
-
min=0,
|
65
|
-
max=59,
|
66
|
-
),
|
67
|
-
Unit(
|
68
|
-
name="hour",
|
69
|
-
range=partial(range, 0, 24),
|
70
|
-
min=0,
|
71
|
-
max=23,
|
72
|
-
),
|
73
|
-
Unit(
|
74
|
-
name="day",
|
75
|
-
range=partial(range, 1, 32),
|
76
|
-
min=1,
|
77
|
-
max=31,
|
78
|
-
),
|
79
|
-
Unit(
|
80
|
-
name="month",
|
81
|
-
range=partial(range, 1, 13),
|
82
|
-
min=1,
|
83
|
-
max=12,
|
84
|
-
alt=[
|
85
|
-
"JAN",
|
86
|
-
"FEB",
|
87
|
-
"MAR",
|
88
|
-
"APR",
|
89
|
-
"MAY",
|
90
|
-
"JUN",
|
91
|
-
"JUL",
|
92
|
-
"AUG",
|
93
|
-
"SEP",
|
94
|
-
"OCT",
|
95
|
-
"NOV",
|
96
|
-
"DEC",
|
97
|
-
],
|
98
|
-
),
|
99
|
-
Unit(
|
100
|
-
name="weekday",
|
101
|
-
range=partial(range, 0, 7),
|
102
|
-
min=0,
|
103
|
-
max=6,
|
104
|
-
alt=[
|
105
|
-
"SUN",
|
106
|
-
"MON",
|
107
|
-
"TUE",
|
108
|
-
"WED",
|
109
|
-
"THU",
|
110
|
-
"FRI",
|
111
|
-
"SAT",
|
112
|
-
],
|
113
|
-
),
|
114
|
-
)
|
79
|
+
values["name"] = values["name"].replace(" ", "_")
|
115
80
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
81
|
+
cls.__bypass_on(values)
|
82
|
+
return values
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def __bypass_on(cls, data: DictData, externals: DictData | None = None):
|
86
|
+
"""Bypass the on data to loaded config data."""
|
87
|
+
if on := data.pop("on", []):
|
88
|
+
|
89
|
+
if isinstance(on, str):
|
90
|
+
on = [on]
|
124
91
|
|
92
|
+
if any(not isinstance(n, (dict, str)) for n in on):
|
93
|
+
raise TypeError("The ``on`` key should be list of str or dict")
|
125
94
|
|
126
|
-
|
127
|
-
|
128
|
-
|
95
|
+
# NOTE: Pass on value to Loader and keep on model object to on field
|
96
|
+
data["on"] = [
|
97
|
+
(
|
98
|
+
Loader(n, externals=(externals or {})).data
|
99
|
+
if isinstance(n, str)
|
100
|
+
else n
|
101
|
+
)
|
102
|
+
for n in on
|
103
|
+
]
|
104
|
+
return data
|
129
105
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
106
|
+
|
107
|
+
class Schedule(BaseModel):
|
108
|
+
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
109
|
+
not equal the on value in Pipeline model but it use same logic to running
|
110
|
+
release date with crontab interval.
|
111
|
+
"""
|
112
|
+
|
113
|
+
desc: Optional[str] = Field(
|
114
|
+
default=None,
|
115
|
+
description=(
|
116
|
+
"A schedule description that can be string of markdown content."
|
117
|
+
),
|
118
|
+
)
|
119
|
+
pipelines: list[PipelineSchedule] = Field(
|
120
|
+
default_factory=list,
|
121
|
+
description="A list of PipelineSchedule models.",
|
134
122
|
)
|
135
123
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
) ->
|
142
|
-
|
143
|
-
self.options: Options = options
|
144
|
-
|
145
|
-
if isinstance(values, str):
|
146
|
-
values: list[int] = self.from_str(values) if values != "?" else []
|
147
|
-
elif isinstance_check(values, list[int]):
|
148
|
-
values: list[int] = self.replace_weekday(values)
|
149
|
-
else:
|
150
|
-
raise TypeError(f"Invalid type of value in cron part: {values}.")
|
151
|
-
|
152
|
-
self.values: list[int] = self.out_of_range(
|
153
|
-
sorted(dict.fromkeys(values))
|
154
|
-
)
|
124
|
+
@classmethod
|
125
|
+
def from_loader(
|
126
|
+
cls,
|
127
|
+
name: str,
|
128
|
+
externals: DictData | None = None,
|
129
|
+
) -> Self:
|
130
|
+
loader: Loader = Loader(name, externals=(externals or {}))
|
155
131
|
|
156
|
-
|
157
|
-
|
158
|
-
|
132
|
+
# NOTE: Validate the config type match with current connection model
|
133
|
+
if loader.type != cls:
|
134
|
+
raise ValueError(f"Type {loader.type} does not match with {cls}")
|
159
135
|
|
160
|
-
|
161
|
-
return _hash
|
136
|
+
loader_data: DictData = copy.deepcopy(loader.data)
|
162
137
|
|
163
|
-
|
164
|
-
|
165
|
-
return f"{_hash}/{self.step}"
|
166
|
-
_hash: str = (
|
167
|
-
f"H({self.filler(self.min)}-{self.filler(self.max)})"
|
168
|
-
if _hash == "H"
|
169
|
-
else f"{self.filler(self.min)}-{self.filler(self.max)}"
|
170
|
-
)
|
171
|
-
return f"{_hash}/{self.step}"
|
138
|
+
# NOTE: Add name to loader data
|
139
|
+
loader_data["name"] = name.replace(" ", "_")
|
172
140
|
|
173
|
-
|
174
|
-
for cron_range in self.ranges():
|
175
|
-
if isinstance(cron_range, list):
|
176
|
-
cron_range_strings.append(
|
177
|
-
f"{self.filler(cron_range[0])}-{self.filler(cron_range[1])}"
|
178
|
-
)
|
179
|
-
else:
|
180
|
-
cron_range_strings.append(f"{self.filler(cron_range)}")
|
181
|
-
return ",".join(cron_range_strings) if cron_range_strings else "?"
|
182
|
-
|
183
|
-
def __repr__(self) -> str:
|
184
|
-
return (
|
185
|
-
f"{self.__class__.__name__}"
|
186
|
-
f"(unit={self.unit}, values={self.__str__()!r})"
|
187
|
-
)
|
141
|
+
return cls.model_validate(obj=loader_data)
|
188
142
|
|
189
|
-
def
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
"""Returns the smallest value in the range."""
|
204
|
-
return self.values[0]
|
205
|
-
|
206
|
-
@property
|
207
|
-
def max(self) -> int:
|
208
|
-
"""Returns the largest value in the range."""
|
209
|
-
return self.values[-1]
|
210
|
-
|
211
|
-
@property
|
212
|
-
def step(self) -> Optional[int]:
|
213
|
-
"""Returns the difference between first and second elements in the
|
214
|
-
range.
|
143
|
+
def tasks(
|
144
|
+
self,
|
145
|
+
start_date: datetime,
|
146
|
+
queue: dict[str, list[datetime]],
|
147
|
+
running: dict[str, list[datetime]],
|
148
|
+
externals: DictData | None = None,
|
149
|
+
) -> list[PipelineTask]:
|
150
|
+
"""Generate Task from the current datetime.
|
151
|
+
|
152
|
+
:param start_date: A start date that get from the workflow schedule.
|
153
|
+
:param queue:
|
154
|
+
:param running:
|
155
|
+
:param externals: An external parameters that pass to the Loader object.
|
156
|
+
:rtype: list[PipelineTask]
|
215
157
|
"""
|
216
|
-
if (
|
217
|
-
len(self.values) > 2
|
218
|
-
and (step := self.values[1] - self.values[0]) > 1
|
219
|
-
):
|
220
|
-
return step
|
221
158
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
return len(self.values) == (self.unit.max - self.unit.min + 1)
|
159
|
+
# NOTE: Create pair of pipeline and on.
|
160
|
+
pipeline_tasks: list[PipelineTask] = []
|
161
|
+
externals: DictData = externals or {}
|
226
162
|
|
227
|
-
|
228
|
-
|
229
|
-
|
163
|
+
for pipe in self.pipelines:
|
164
|
+
pipeline: Pipeline = Pipeline.from_loader(
|
165
|
+
pipe.name, externals=externals
|
166
|
+
)
|
230
167
|
|
231
|
-
|
232
|
-
|
168
|
+
# NOTE: Create default list of release datetime.
|
169
|
+
queue[pipe.name]: list[datetime] = []
|
170
|
+
running[pipe.name]: list[datetime] = []
|
171
|
+
|
172
|
+
for on in pipeline.on:
|
173
|
+
on_gen = on.generate(start_date)
|
174
|
+
next_running_date = on_gen.next
|
175
|
+
while next_running_date in queue[pipe.name]:
|
176
|
+
next_running_date = on_gen.next
|
177
|
+
|
178
|
+
heappush(queue[pipe.name], next_running_date)
|
179
|
+
|
180
|
+
pipeline_tasks.append(
|
181
|
+
PipelineTask(
|
182
|
+
pipeline=pipeline,
|
183
|
+
on=on,
|
184
|
+
params=pipe.params,
|
185
|
+
queue=queue,
|
186
|
+
running=running,
|
187
|
+
),
|
188
|
+
)
|
233
189
|
|
234
|
-
|
235
|
-
TODO: if you didn't care what day of the week the 7th was, you
|
236
|
-
could enter ? in the Day-of-week field.
|
237
|
-
TODO: L : the Day-of-month or Day-of-week fields specifies the last day
|
238
|
-
of the month or week.
|
239
|
-
DEV: use -1 for represent with L
|
240
|
-
TODO: W : In the Day-of-month field, 3W specifies the weekday closest
|
241
|
-
to the third day of the month.
|
242
|
-
TODO: # : 3#2 would be the second Tuesday of the month,
|
243
|
-
the 3 refers to Tuesday because it is the third day of each week.
|
190
|
+
return pipeline_tasks
|
244
191
|
|
245
|
-
Noted:
|
246
|
-
- 0 10 * * ? *
|
247
|
-
Run at 10:00 am (UTC) every day
|
248
192
|
|
249
|
-
|
250
|
-
|
193
|
+
def catch_exceptions(cancel_on_failure=False):
|
194
|
+
"""Catch exception error from scheduler job."""
|
251
195
|
|
252
|
-
|
253
|
-
|
196
|
+
def catch_exceptions_decorator(func):
|
197
|
+
@wraps(func)
|
198
|
+
def wrapper(*args, **kwargs):
|
199
|
+
try:
|
200
|
+
return func(*args, **kwargs)
|
201
|
+
except Exception as err:
|
202
|
+
logger.exception(err)
|
203
|
+
if cancel_on_failure:
|
204
|
+
return CancelJob
|
254
205
|
|
255
|
-
|
256
|
-
Run at 8:00 am (UTC) every 1st day of the month
|
206
|
+
return wrapper
|
257
207
|
|
258
|
-
|
259
|
-
Run every 15 minutes
|
208
|
+
return catch_exceptions_decorator
|
260
209
|
|
261
|
-
- 0/10 * ? * MON-FRI *
|
262
|
-
Run every 10 minutes Monday through Friday
|
263
210
|
|
264
|
-
|
265
|
-
|
266
|
-
5:55 pm (UTC)
|
211
|
+
def catch_exceptions_method(cancel_on_failure=False):
|
212
|
+
"""Catch exception error from scheduler job."""
|
267
213
|
|
268
|
-
|
269
|
-
|
214
|
+
def catch_exceptions_decorator(func):
|
215
|
+
@wraps(func)
|
216
|
+
def wrapper(self, *args, **kwargs):
|
217
|
+
try:
|
218
|
+
return func(self, *args, **kwargs)
|
219
|
+
except Exception as err:
|
220
|
+
logger.exception(err)
|
221
|
+
if cancel_on_failure:
|
222
|
+
return CancelJob
|
270
223
|
|
271
|
-
|
272
|
-
Run at 10:15am UTC on the last Friday of each month during the
|
273
|
-
years 2002 to 2005
|
224
|
+
return wrapper
|
274
225
|
|
275
|
-
|
276
|
-
"""
|
277
|
-
interval_list: list[list[int]] = []
|
278
|
-
for _value in self.replace_alternative(value.upper()).split(","):
|
279
|
-
if _value == "?":
|
280
|
-
continue
|
281
|
-
elif _value.count("/") > 1:
|
282
|
-
raise ValueError(
|
283
|
-
f"Invalid value {_value!r} in cron part {value!r}"
|
284
|
-
)
|
226
|
+
return catch_exceptions_decorator
|
285
227
|
|
286
|
-
value_range, value_step = must_split(_value, "/", maxsplit=1)
|
287
|
-
value_range_list: list[int] = self.out_of_range(
|
288
|
-
self._parse_range(value_range)
|
289
|
-
)
|
290
|
-
|
291
|
-
if (value_step and not is_int(value_step)) or value_step == "":
|
292
|
-
raise ValueError(
|
293
|
-
f"Invalid interval step value {value_step!r} for "
|
294
|
-
f"{self.unit.name!r}"
|
295
|
-
)
|
296
228
|
|
297
|
-
|
298
|
-
|
229
|
+
@dataclass(frozen=True)
|
230
|
+
class PipelineTask:
|
231
|
+
"""Pipeline task dataclass that use to keep mapping data and objects for
|
232
|
+
passing in multithreading task.
|
233
|
+
"""
|
299
234
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
return value
|
235
|
+
pipeline: Pipeline
|
236
|
+
on: On
|
237
|
+
params: DictData
|
238
|
+
queue: list[datetime]
|
239
|
+
running: list[datetime]
|
306
240
|
|
307
|
-
|
308
|
-
|
241
|
+
@catch_exceptions_method(cancel_on_failure=True)
|
242
|
+
def release(self, log: Log | None = None) -> None:
|
243
|
+
"""Pipeline release, it will use with the same logic of
|
244
|
+
`pipeline.release` method.
|
309
245
|
|
310
|
-
:param
|
311
|
-
:rtype: list[int]
|
246
|
+
:param log: A log object.
|
312
247
|
"""
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
248
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
249
|
+
log: Log = log or FileLog
|
250
|
+
pipeline: Pipeline = self.pipeline
|
251
|
+
on: On = self.on
|
317
252
|
|
318
|
-
|
319
|
-
|
253
|
+
gen: CronRunner = on.generate(
|
254
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
255
|
+
)
|
256
|
+
cron_tz: ZoneInfo = gen.tz
|
320
257
|
|
321
|
-
|
322
|
-
:
|
258
|
+
# NOTE: get next schedule time that generate from now.
|
259
|
+
next_time: datetime = gen.next
|
323
260
|
|
324
|
-
:
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
f"Value {first!r} out of range for {self.unit.name!r}"
|
330
|
-
)
|
331
|
-
elif (last := values[-1]) > self.unit.max:
|
332
|
-
raise ValueError(
|
333
|
-
f"Value {last!r} out of range for {self.unit.name!r}"
|
334
|
-
)
|
335
|
-
return values
|
261
|
+
# NOTE: get next utils it does not running.
|
262
|
+
while log.is_pointed(
|
263
|
+
pipeline.name, next_time, queue=self.running[pipeline.name]
|
264
|
+
):
|
265
|
+
next_time: datetime = gen.next
|
336
266
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
raise ValueError(f"Invalid value {value}")
|
343
|
-
try:
|
344
|
-
sub_parts: list[int] = list(map(int, value.split("-")))
|
345
|
-
except ValueError as exc:
|
346
|
-
raise ValueError(f"Invalid value {value!r} --> {exc}") from exc
|
347
|
-
|
348
|
-
if len(sub_parts) == 2:
|
349
|
-
min_value, max_value = sub_parts
|
350
|
-
if max_value < min_value:
|
351
|
-
raise ValueError(f"Max range is less than min range in {value}")
|
352
|
-
sub_parts: list[int] = list(range(min_value, max_value + 1))
|
353
|
-
return self.replace_weekday(sub_parts)
|
354
|
-
|
355
|
-
def _interval(
|
356
|
-
self,
|
357
|
-
values: list[int],
|
358
|
-
step: int | None = None,
|
359
|
-
) -> list[int]:
|
360
|
-
"""Applies an interval step to a collection of values."""
|
361
|
-
if not step:
|
362
|
-
return values
|
363
|
-
elif (_step := int(step)) < 1:
|
364
|
-
raise ValueError(
|
365
|
-
f"Invalid interval step value {_step!r} for "
|
366
|
-
f"{self.unit.name!r}"
|
367
|
-
)
|
368
|
-
min_value: int = values[0]
|
369
|
-
return [
|
370
|
-
value
|
371
|
-
for value in values
|
372
|
-
if (value % _step == min_value % _step) or (value == min_value)
|
373
|
-
]
|
267
|
+
logger.debug(
|
268
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
269
|
+
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
270
|
+
)
|
271
|
+
heappush(self.running[pipeline.name], next_time)
|
374
272
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
return False
|
380
|
-
for idx, value in enumerate(self.values):
|
381
|
-
if idx == 0:
|
382
|
-
continue
|
383
|
-
elif (value - self.values[idx - 1]) != step:
|
384
|
-
return False
|
385
|
-
return True
|
386
|
-
|
387
|
-
@property
|
388
|
-
def is_full_interval(self) -> bool:
|
389
|
-
"""Returns true if the range contains all the interval values."""
|
390
|
-
if step := self.step:
|
391
|
-
return (
|
392
|
-
self.min == self.unit.min
|
393
|
-
and (self.max + step) > self.unit.max
|
394
|
-
and (
|
395
|
-
len(self.values)
|
396
|
-
== (round((self.max - self.min) / step) + 1)
|
397
|
-
)
|
273
|
+
if get_diff_sec(next_time, tz=cron_tz) > 55:
|
274
|
+
logger.debug(
|
275
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
|
276
|
+
f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
398
277
|
)
|
399
|
-
return False
|
400
278
|
|
401
|
-
|
402
|
-
|
403
|
-
|
279
|
+
# NOTE: Add this next running datetime that not in period to queue
|
280
|
+
# and remove it to running.
|
281
|
+
self.running[pipeline.name].remove(next_time)
|
282
|
+
heappush(self.queue[pipeline.name], next_time)
|
404
283
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
next_value: int = self.values[idx + 1]
|
412
|
-
except IndexError:
|
413
|
-
next_value: int = -1
|
414
|
-
if value != (next_value - 1):
|
415
|
-
# NOTE: ``next_value`` is not the subsequent number
|
416
|
-
if start_number is None:
|
417
|
-
# NOTE:
|
418
|
-
# The last number of the list ``self.values`` is not in a
|
419
|
-
# range.
|
420
|
-
multi_dim_values.append(value)
|
421
|
-
else:
|
422
|
-
multi_dim_values.append([start_number, value])
|
423
|
-
start_number: Optional[int] = None
|
424
|
-
elif start_number is None:
|
425
|
-
start_number: Optional[int] = value
|
426
|
-
return multi_dim_values
|
427
|
-
|
428
|
-
def filler(self, value: int) -> int | str:
|
429
|
-
"""Formats weekday and month names as string when the relevant options
|
430
|
-
are set.
|
431
|
-
|
432
|
-
:param value: a int value
|
433
|
-
:type value: int
|
434
|
-
|
435
|
-
:rtype: int | str
|
436
|
-
"""
|
437
|
-
return (
|
438
|
-
self.unit.alt[value - self.unit.min]
|
439
|
-
if (
|
440
|
-
(
|
441
|
-
self.options.output_weekday_names
|
442
|
-
and self.unit.name == "weekday"
|
443
|
-
)
|
444
|
-
or (
|
445
|
-
self.options.output_month_names
|
446
|
-
and self.unit.name == "month"
|
447
|
-
)
|
448
|
-
)
|
449
|
-
else value
|
284
|
+
time.sleep(0.2)
|
285
|
+
return
|
286
|
+
|
287
|
+
logger.debug(
|
288
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
289
|
+
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
450
290
|
)
|
451
291
|
|
292
|
+
# NOTE: Release when the time is nearly to schedule time.
|
293
|
+
while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
|
294
|
+
logger.debug(
|
295
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
|
296
|
+
f": Sleep until: {duration}"
|
297
|
+
)
|
298
|
+
time.sleep(15)
|
452
299
|
|
453
|
-
|
454
|
-
class CronJob:
|
455
|
-
"""The Cron Job Converter object that generate datetime dimension of cron
|
456
|
-
job schedule format,
|
300
|
+
time.sleep(0.5)
|
457
301
|
|
458
|
-
|
302
|
+
# NOTE: Release parameter that use to change if params has
|
303
|
+
# templating.
|
304
|
+
release_params: DictData = {
|
305
|
+
"release": {
|
306
|
+
"logical_date": next_time,
|
307
|
+
},
|
308
|
+
}
|
459
309
|
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
310
|
+
# WARNING: Re-create pipeline object that use new running pipeline
|
311
|
+
# ID.
|
312
|
+
runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
|
313
|
+
rs: Result = runner.execute(
|
314
|
+
params=param2template(self.params, release_params),
|
315
|
+
)
|
316
|
+
logger.debug(
|
317
|
+
f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
318
|
+
f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
|
319
|
+
)
|
466
320
|
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
321
|
+
del runner
|
322
|
+
|
323
|
+
# NOTE: Set parent ID on this result.
|
324
|
+
rs.set_parent_run_id(pipeline.run_id)
|
325
|
+
|
326
|
+
# NOTE: Save result to log object saving.
|
327
|
+
rs_log: Log = log.model_validate(
|
328
|
+
{
|
329
|
+
"name": pipeline.name,
|
330
|
+
"on": str(on.cronjob),
|
331
|
+
"release": next_time,
|
332
|
+
"context": rs.context,
|
333
|
+
"parent_run_id": rs.run_id,
|
334
|
+
"run_id": rs.run_id,
|
335
|
+
}
|
336
|
+
)
|
337
|
+
rs_log.save(excluded=None)
|
471
338
|
|
472
|
-
|
473
|
-
|
474
|
-
- https://pypi.org/project/python-crontab/
|
475
|
-
"""
|
339
|
+
# NOTE: remove this release date from running
|
340
|
+
self.running[pipeline.name].remove(next_time)
|
476
341
|
|
477
|
-
|
478
|
-
|
342
|
+
# IMPORTANT:
|
343
|
+
# Add the next running datetime to pipeline queue
|
344
|
+
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
345
|
+
second=0, microsecond=0
|
346
|
+
)
|
347
|
+
future_running_time: datetime = gen.next
|
348
|
+
while (
|
349
|
+
future_running_time in self.running[pipeline.name]
|
350
|
+
or future_running_time in self.queue[pipeline.name]
|
351
|
+
or future_running_time < finish_time
|
352
|
+
):
|
353
|
+
future_running_time: datetime = gen.next
|
479
354
|
|
480
|
-
|
481
|
-
|
482
|
-
value: Union[list[list[int]], str],
|
483
|
-
*,
|
484
|
-
option: Optional[dict[str, bool]] = None,
|
485
|
-
) -> None:
|
486
|
-
if isinstance(value, str):
|
487
|
-
value: list[str] = value.strip().split()
|
488
|
-
elif not isinstance_check(value, list[list[int]]):
|
489
|
-
raise TypeError(
|
490
|
-
f"{self.__class__.__name__} cron value does not support "
|
491
|
-
f"type: {type(value)}."
|
492
|
-
)
|
355
|
+
heappush(self.queue[pipeline.name], future_running_time)
|
356
|
+
logger.debug(f"[CORE]: {'-' * 100}")
|
493
357
|
|
494
|
-
# NOTE: Validate length of crontab of this class.
|
495
|
-
if len(value) != self.cron_length:
|
496
|
-
raise ValueError(
|
497
|
-
f"Invalid cron value does not have length equal "
|
498
|
-
f"{self.cron_length}: {value}."
|
499
|
-
)
|
500
|
-
self.options: Options = Options(**(option or {}))
|
501
358
|
|
502
|
-
|
503
|
-
|
504
|
-
CronPart(unit, values=item, options=self.options)
|
505
|
-
for item, unit in zip(value, self.cron_units)
|
506
|
-
]
|
359
|
+
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
360
|
+
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
507
361
|
|
508
|
-
# NOTE: Validate values of `day` and `dow` from parts.
|
509
|
-
if self.day == self.dow == []:
|
510
|
-
raise ValueError(
|
511
|
-
"Invalid cron value when set the `?` on day of month and "
|
512
|
-
"day of week together"
|
513
|
-
)
|
514
362
|
|
515
|
-
|
516
|
-
|
517
|
-
|
363
|
+
@catch_exceptions(cancel_on_failure=True)
|
364
|
+
def workflow_task(
|
365
|
+
pipeline_tasks: list[PipelineTask],
|
366
|
+
stop: datetime,
|
367
|
+
threads: dict[str, Thread],
|
368
|
+
) -> CancelJob | None:
|
369
|
+
"""Workflow task generator that create release pair of pipeline and on to
|
370
|
+
the threading in background.
|
518
371
|
|
519
|
-
|
520
|
-
return (
|
521
|
-
f"{self.__class__.__name__}(value={self.__str__()!r}, "
|
522
|
-
f"option={self.options.__dict__})"
|
523
|
-
)
|
372
|
+
This workflow task will start every minute at :02 second.
|
524
373
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
374
|
+
:param pipeline_tasks:
|
375
|
+
:param stop:
|
376
|
+
:param threads:
|
377
|
+
:rtype: CancelJob | None
|
378
|
+
"""
|
379
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
380
|
+
start_date: datetime = datetime.now(tz=tz)
|
381
|
+
start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
|
382
|
+
|
383
|
+
if start_date > stop:
|
384
|
+
logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
385
|
+
while len(threads) > 0:
|
386
|
+
logger.warning(
|
387
|
+
"[WORKFLOW]: Waiting pipeline release thread that still "
|
388
|
+
"running in background."
|
389
|
+
)
|
390
|
+
time.sleep(15)
|
391
|
+
workflow_long_running_task(threads)
|
392
|
+
return CancelJob
|
393
|
+
|
394
|
+
# IMPORTANT:
|
395
|
+
# Filter pipeline & on that should to run with `pipeline_release`
|
396
|
+
# function. It will deplicate running with different schedule value
|
397
|
+
# because I use current time in this condition.
|
398
|
+
#
|
399
|
+
# For example, if a pipeline A queue has '00:02:00' time that
|
400
|
+
# should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
|
401
|
+
# This condition will release with 2 threading job.
|
402
|
+
#
|
403
|
+
# '00:02:00' --> '*/2 * * * *' --> running
|
404
|
+
# --> '*/35 * * * *' --> skip
|
405
|
+
#
|
406
|
+
for task in pipeline_tasks:
|
407
|
+
|
408
|
+
# NOTE: Get incoming datetime queue.
|
409
|
+
logger.debug(
|
410
|
+
f"[WORKFLOW]: Current queue: {task.pipeline.name!r} : "
|
411
|
+
f"{list(queue2str(task.queue[task.pipeline.name]))}"
|
529
412
|
)
|
530
413
|
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
414
|
+
# NOTE: Create minute unit value for any scheduler datetime that
|
415
|
+
# checking a pipeline task should run in this datetime.
|
416
|
+
current_running_time: datetime = start_date_minute.astimezone(
|
417
|
+
tz=ZoneInfo(task.on.tz)
|
535
418
|
)
|
419
|
+
if (
|
420
|
+
len(task.queue[task.pipeline.name]) > 0
|
421
|
+
and current_running_time != task.queue[task.pipeline.name][0]
|
422
|
+
) or (
|
423
|
+
task.on.next(current_running_time)
|
424
|
+
!= task.queue[task.pipeline.name][0]
|
425
|
+
):
|
426
|
+
logger.debug(
|
427
|
+
f"[WORKFLOW]: Skip schedule "
|
428
|
+
f"{current_running_time:%Y-%m-%d %H:%M:%S} "
|
429
|
+
f"for : {task.pipeline.name!r} : {task.on.cronjob}"
|
430
|
+
)
|
431
|
+
continue
|
432
|
+
elif len(task.queue[task.pipeline.name]) == 0:
|
433
|
+
logger.warning(
|
434
|
+
f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
|
435
|
+
f"{task.on.cronjob}"
|
436
|
+
)
|
437
|
+
continue
|
536
438
|
|
537
|
-
|
538
|
-
|
539
|
-
return reversed(self.parts[:3] + [self.parts[4], self.parts[3]])
|
439
|
+
# NOTE: Remove this datetime from queue.
|
440
|
+
task.queue[task.pipeline.name].pop(0)
|
540
441
|
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
442
|
+
# NOTE: Create thread name that able to tracking with observe schedule
|
443
|
+
# job.
|
444
|
+
thread_name: str = (
|
445
|
+
f"{task.pipeline.name}|{str(task.on.cronjob)}|"
|
446
|
+
f"{current_running_time:%Y%m%d%H%M}"
|
447
|
+
)
|
448
|
+
pipe_thread: Thread = Thread(
|
449
|
+
target=task.release,
|
450
|
+
name=thread_name,
|
451
|
+
daemon=True,
|
452
|
+
)
|
545
453
|
|
546
|
-
|
547
|
-
def hour(self) -> CronPart:
|
548
|
-
"""Return part of hour."""
|
549
|
-
return self.parts[1]
|
454
|
+
threads[thread_name] = pipe_thread
|
550
455
|
|
551
|
-
|
552
|
-
def day(self) -> CronPart:
|
553
|
-
"""Return part of day."""
|
554
|
-
return self.parts[2]
|
456
|
+
pipe_thread.start()
|
555
457
|
|
556
|
-
|
557
|
-
def month(self) -> CronPart:
|
558
|
-
"""Return part of month."""
|
559
|
-
return self.parts[3]
|
458
|
+
delay()
|
560
459
|
|
561
|
-
|
562
|
-
def dow(self) -> CronPart:
|
563
|
-
"""Return part of day of month."""
|
564
|
-
return self.parts[4]
|
460
|
+
logger.debug(f"[WORKFLOW]: {'=' * 100}")
|
565
461
|
|
566
|
-
def to_list(self) -> list[list[int]]:
|
567
|
-
"""Returns the cron schedule as a 2-dimensional list of integers."""
|
568
|
-
return [part.values for part in self.parts]
|
569
462
|
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
*,
|
574
|
-
tz: str | None = None,
|
575
|
-
) -> CronRunner:
|
576
|
-
"""Returns the schedule datetime runner with this cronjob. It would run
|
577
|
-
``next``, ``prev``, or ``reset`` to generate running date that you want.
|
578
|
-
|
579
|
-
:param date: An initial date that want to mark as the start point.
|
580
|
-
:param tz: A string timezone that want to change on runner.
|
581
|
-
:rtype: CronRunner
|
582
|
-
"""
|
583
|
-
return CronRunner(self, date, tz=tz)
|
463
|
+
def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
464
|
+
"""Workflow schedule for monitoring long running thread from the schedule
|
465
|
+
control.
|
584
466
|
|
467
|
+
:param threads: A mapping of Thread object and its name.
|
468
|
+
:rtype: None
|
469
|
+
"""
|
470
|
+
logger.debug(
|
471
|
+
"[MONITOR]: Start checking long running pipeline release task."
|
472
|
+
)
|
473
|
+
snapshot_threads = list(threads.keys())
|
474
|
+
for t_name in snapshot_threads:
|
585
475
|
|
586
|
-
|
587
|
-
|
588
|
-
|
476
|
+
# NOTE: remove the thread that running success.
|
477
|
+
if not threads[t_name].is_alive():
|
478
|
+
threads.pop(t_name)
|
589
479
|
|
590
|
-
@property
|
591
|
-
def year(self) -> CronPart:
|
592
|
-
"""Return part of year."""
|
593
|
-
return self.parts[5]
|
594
480
|
|
481
|
+
def workflow_control(
|
482
|
+
schedules: list[str],
|
483
|
+
stop: datetime | None = None,
|
484
|
+
externals: DictData | None = None,
|
485
|
+
) -> list[str]:
|
486
|
+
"""Workflow scheduler control.
|
595
487
|
|
596
|
-
|
597
|
-
|
598
|
-
|
488
|
+
:param schedules: A list of pipeline names that want to schedule running.
|
489
|
+
:param stop: An datetime value that use to stop running schedule.
|
490
|
+
:param externals: An external parameters that pass to Loader.
|
491
|
+
:rtype: list[str]
|
599
492
|
"""
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
493
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
494
|
+
schedule: Scheduler = Scheduler()
|
495
|
+
start_date: datetime = datetime.now(tz=tz)
|
496
|
+
|
497
|
+
# NOTE: Design workflow queue caching.
|
498
|
+
# ---
|
499
|
+
# {"pipeline-name": [<release-datetime>, <release-datetime>, ...]}
|
500
|
+
#
|
501
|
+
wf_queue: dict[str, list[datetime]] = {}
|
502
|
+
wf_running: dict[str, list[datetime]] = {}
|
503
|
+
thread_releases: dict[str, Thread] = {}
|
504
|
+
|
505
|
+
start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
|
506
|
+
second=0, microsecond=0
|
607
507
|
)
|
608
508
|
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
) -> None:
|
616
|
-
# NOTE: Prepare timezone if this value does not set, it will use UTC.
|
617
|
-
self.tz: ZoneInfo = ZoneInfo("UTC")
|
618
|
-
if tz:
|
619
|
-
try:
|
620
|
-
self.tz = ZoneInfo(tz)
|
621
|
-
except ZoneInfoNotFoundError as err:
|
622
|
-
raise ValueError(f"Invalid timezone: {tz}") from err
|
623
|
-
|
624
|
-
# NOTE: Prepare date
|
625
|
-
if date:
|
626
|
-
if not isinstance(date, datetime):
|
627
|
-
raise ValueError(
|
628
|
-
"Input schedule start time is not a valid datetime object."
|
629
|
-
)
|
630
|
-
if tz is None:
|
631
|
-
self.tz = date.tzinfo
|
632
|
-
self.date: datetime = date.astimezone(self.tz)
|
633
|
-
else:
|
634
|
-
self.date: datetime = datetime.now(tz=self.tz)
|
635
|
-
|
636
|
-
# NOTE: Add one minute if the second value more than 0.
|
637
|
-
if self.date.second > 0:
|
638
|
-
self.date: datetime = self.date + timedelta(minutes=1)
|
639
|
-
|
640
|
-
self.__start_date: datetime = self.date
|
641
|
-
self.cron: CronJob | CronJobYear = cron
|
642
|
-
self.reset_flag: bool = True
|
643
|
-
|
644
|
-
def reset(self) -> None:
|
645
|
-
"""Resets the iterator to start time."""
|
646
|
-
self.date: datetime = self.__start_date
|
647
|
-
self.reset_flag: bool = True
|
648
|
-
|
649
|
-
@property
|
650
|
-
def next(self) -> datetime:
|
651
|
-
"""Returns the next time of the schedule."""
|
652
|
-
self.date = (
|
653
|
-
self.date
|
654
|
-
if self.reset_flag
|
655
|
-
else (self.date + timedelta(minutes=+1))
|
509
|
+
# NOTE: Create pair of pipeline and on from schedule model.
|
510
|
+
pipeline_tasks: list[PipelineTask] = []
|
511
|
+
for name in schedules:
|
512
|
+
sch: Schedule = Schedule.from_loader(name, externals=externals)
|
513
|
+
pipeline_tasks.extend(
|
514
|
+
sch.tasks(start_date_waiting, wf_queue, wf_running, externals)
|
656
515
|
)
|
657
|
-
return self.find_date(reverse=False)
|
658
516
|
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
517
|
+
# NOTE: This schedule job will start every minute at :02 seconds.
|
518
|
+
schedule.every(1).minutes.at(":02").do(
|
519
|
+
workflow_task,
|
520
|
+
pipeline_tasks=pipeline_tasks,
|
521
|
+
stop=stop
|
522
|
+
or (
|
523
|
+
start_date
|
524
|
+
+ timedelta(
|
525
|
+
**json.loads(
|
526
|
+
os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
|
527
|
+
or '{"minutes": 5, "seconds": 20}'
|
528
|
+
)
|
529
|
+
)
|
530
|
+
),
|
531
|
+
threads=thread_releases,
|
532
|
+
).tag("control")
|
533
|
+
|
534
|
+
# NOTE: Checking zombie task with schedule job will start every 5 minute.
|
535
|
+
schedule.every(5).minutes.at(":10").do(
|
536
|
+
workflow_long_running_task,
|
537
|
+
threads=thread_releases,
|
538
|
+
).tag("monitor")
|
539
|
+
|
540
|
+
# NOTE: Start running schedule
|
541
|
+
logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
|
542
|
+
while True:
|
543
|
+
schedule.run_pending()
|
544
|
+
time.sleep(1)
|
545
|
+
if not schedule.get_jobs("control"):
|
546
|
+
schedule.clear("monitor")
|
547
|
+
logger.warning(
|
548
|
+
f"[WORKFLOW]: Pipeline release thread: {thread_releases}"
|
549
|
+
)
|
550
|
+
logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
|
551
|
+
break
|
667
552
|
|
668
|
-
|
669
|
-
""
|
670
|
-
|
671
|
-
|
672
|
-
for
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
553
|
+
logger.warning(
|
554
|
+
f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
|
555
|
+
)
|
556
|
+
logger.warning(
|
557
|
+
f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
|
558
|
+
)
|
559
|
+
return schedules
|
560
|
+
|
561
|
+
|
562
|
+
def workflow(
|
563
|
+
stop: datetime | None = None,
|
564
|
+
externals: DictData | None = None,
|
565
|
+
excluded: list[str] | None = None,
|
566
|
+
) -> list[str]:
|
567
|
+
"""Workflow application that running multiprocessing schedule with chunk of
|
568
|
+
pipelines that exists in config path.
|
569
|
+
|
570
|
+
:param stop:
|
571
|
+
:param excluded:
|
572
|
+
:param externals:
|
573
|
+
:rtype: list[str]
|
574
|
+
|
575
|
+
This function will get all pipelines that include on value that was
|
576
|
+
created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
|
577
|
+
value to multiprocess executor pool.
|
578
|
+
|
579
|
+
The current workflow logic:
|
580
|
+
---
|
581
|
+
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
582
|
+
pipeline task 01 01
|
583
|
+
--> thread of release
|
584
|
+
pipeline task 01 02
|
585
|
+
==> process 02 ==> schedule 1 minute --> thread of release
|
586
|
+
pipeline task 02 01
|
587
|
+
--> thread of release
|
588
|
+
pipeline task 02 02
|
589
|
+
==> ...
|
590
|
+
"""
|
591
|
+
excluded: list[str] = excluded or []
|
592
|
+
|
593
|
+
with ProcessPoolExecutor(
|
594
|
+
max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
|
595
|
+
) as executor:
|
596
|
+
futures: list[Future] = [
|
597
|
+
executor.submit(
|
598
|
+
workflow_control,
|
599
|
+
schedules=[load[0] for load in loader],
|
600
|
+
stop=stop,
|
601
|
+
externals=(externals or {}),
|
693
602
|
)
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
getattr(self.date, mode) not in getattr(self.cron, mode).values
|
700
|
-
) or _addition_condition():
|
701
|
-
self.date: datetime = next_date(self.date, mode, reverse=reverse)
|
702
|
-
self.date: datetime = replace_date(self.date, mode, reverse=reverse)
|
703
|
-
if current_value != getattr(self.date, switch[mode]):
|
704
|
-
return mode != "month"
|
705
|
-
return False
|
603
|
+
for loader in batch(
|
604
|
+
Loader.finds(Schedule, excluded=excluded),
|
605
|
+
n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
|
606
|
+
)
|
607
|
+
]
|
706
608
|
|
609
|
+
results: list[str] = []
|
610
|
+
for future in as_completed(futures):
|
611
|
+
if err := future.exception():
|
612
|
+
logger.error(str(err))
|
613
|
+
raise WorkflowException(str(err)) from err
|
614
|
+
results.extend(future.result(timeout=1))
|
615
|
+
return results
|
707
616
|
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
"
|
712
|
-
"WEEKDAYS",
|
713
|
-
)
|
617
|
+
|
618
|
+
if __name__ == "__main__":
|
619
|
+
workflow_rs: list[str] = workflow()
|
620
|
+
logger.info(f"Application run success: {workflow_rs}")
|