ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__init__.py +3 -14
- ddeutil/workflow/api.py +44 -75
- ddeutil/workflow/cli.py +51 -0
- ddeutil/workflow/cron.py +713 -0
- ddeutil/workflow/loader.py +65 -13
- ddeutil/workflow/log.py +147 -49
- ddeutil/workflow/on.py +18 -15
- ddeutil/workflow/pipeline.py +389 -140
- ddeutil/workflow/repeat.py +9 -5
- ddeutil/workflow/route.py +30 -37
- ddeutil/workflow/scheduler.py +398 -659
- ddeutil/workflow/stage.py +145 -73
- ddeutil/workflow/utils.py +133 -42
- ddeutil_workflow-0.0.9.dist-info/METADATA +273 -0
- ddeutil_workflow-0.0.9.dist-info/RECORD +22 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.9.dist-info/entry_points.txt +2 -0
- ddeutil/workflow/app.py +0 -45
- ddeutil_workflow-0.0.8.dist-info/METADATA +0 -266
- ddeutil_workflow-0.0.8.dist-info/RECORD +0 -20
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.8.dist-info → ddeutil_workflow-0.0.9.dist-info}/top_level.txt +0 -0
ddeutil/workflow/scheduler.py
CHANGED
@@ -5,709 +5,448 @@
|
|
5
5
|
# ------------------------------------------------------------------------------
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import
|
8
|
+
import logging
|
9
|
+
import os
|
10
|
+
import time
|
9
11
|
from collections.abc import Iterator
|
10
|
-
from
|
12
|
+
from concurrent.futures import Future, ProcessPoolExecutor, as_completed
|
13
|
+
from dataclasses import dataclass
|
11
14
|
from datetime import datetime, timedelta
|
12
|
-
from functools import
|
13
|
-
from
|
14
|
-
from
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
from functools import wraps
|
16
|
+
from heapq import heappush
|
17
|
+
from threading import Thread
|
18
|
+
from zoneinfo import ZoneInfo
|
19
|
+
|
20
|
+
from ddeutil.workflow.__types import DictData
|
21
|
+
from ddeutil.workflow.cron import CronRunner
|
22
|
+
from ddeutil.workflow.exceptions import WorkflowException
|
23
|
+
from ddeutil.workflow.log import FileLog, Log
|
24
|
+
from ddeutil.workflow.on import On
|
25
|
+
from ddeutil.workflow.pipeline import Pipeline
|
26
|
+
from ddeutil.workflow.utils import (
|
27
|
+
Result,
|
28
|
+
batch,
|
29
|
+
delay,
|
30
|
+
get_diff_sec,
|
31
|
+
param2template,
|
20
32
|
)
|
21
|
-
from
|
22
|
-
|
23
|
-
|
33
|
+
from dotenv import load_dotenv
|
34
|
+
from schedule import CancelJob, Scheduler
|
35
|
+
|
36
|
+
load_dotenv("../../../.env")
|
37
|
+
logging.basicConfig(
|
38
|
+
level=logging.DEBUG,
|
39
|
+
format=(
|
40
|
+
"%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
|
41
|
+
"[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
|
42
|
+
),
|
43
|
+
handlers=[logging.StreamHandler()],
|
44
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
24
45
|
)
|
46
|
+
logging.getLogger("schedule").setLevel(logging.INFO)
|
47
|
+
|
48
|
+
tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
|
25
49
|
|
26
|
-
|
27
|
-
|
28
|
-
"
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
alt: list[str] = field(default_factory=list)
|
44
|
-
|
45
|
-
def __repr__(self) -> str:
|
46
|
-
return (
|
47
|
-
f"{self.__class__}(name={self.name!r}, range={self.range},"
|
48
|
-
f"min={self.min}, max={self.max}"
|
49
|
-
f"{f', alt={self.alt}' if self.alt else ''})"
|
50
|
-
)
|
50
|
+
|
51
|
+
def catch_exceptions(cancel_on_failure=False):
|
52
|
+
"""Catch exception error from scheduler job."""
|
53
|
+
|
54
|
+
def catch_exceptions_decorator(job_func):
|
55
|
+
@wraps(job_func)
|
56
|
+
def wrapper(*args, **kwargs):
|
57
|
+
try:
|
58
|
+
return job_func(*args, **kwargs)
|
59
|
+
except Exception as err:
|
60
|
+
logging.exception(err)
|
61
|
+
if cancel_on_failure:
|
62
|
+
return CancelJob
|
63
|
+
|
64
|
+
return wrapper
|
65
|
+
|
66
|
+
return catch_exceptions_decorator
|
51
67
|
|
52
68
|
|
53
69
|
@dataclass
|
54
|
-
class
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
range=partial(range, 0, 60),
|
64
|
-
min=0,
|
65
|
-
max=59,
|
66
|
-
),
|
67
|
-
Unit(
|
68
|
-
name="hour",
|
69
|
-
range=partial(range, 0, 24),
|
70
|
-
min=0,
|
71
|
-
max=23,
|
72
|
-
),
|
73
|
-
Unit(
|
74
|
-
name="day",
|
75
|
-
range=partial(range, 1, 32),
|
76
|
-
min=1,
|
77
|
-
max=31,
|
78
|
-
),
|
79
|
-
Unit(
|
80
|
-
name="month",
|
81
|
-
range=partial(range, 1, 13),
|
82
|
-
min=1,
|
83
|
-
max=12,
|
84
|
-
alt=[
|
85
|
-
"JAN",
|
86
|
-
"FEB",
|
87
|
-
"MAR",
|
88
|
-
"APR",
|
89
|
-
"MAY",
|
90
|
-
"JUN",
|
91
|
-
"JUL",
|
92
|
-
"AUG",
|
93
|
-
"SEP",
|
94
|
-
"OCT",
|
95
|
-
"NOV",
|
96
|
-
"DEC",
|
97
|
-
],
|
98
|
-
),
|
99
|
-
Unit(
|
100
|
-
name="weekday",
|
101
|
-
range=partial(range, 0, 7),
|
102
|
-
min=0,
|
103
|
-
max=6,
|
104
|
-
alt=[
|
105
|
-
"SUN",
|
106
|
-
"MON",
|
107
|
-
"TUE",
|
108
|
-
"WED",
|
109
|
-
"THU",
|
110
|
-
"FRI",
|
111
|
-
"SAT",
|
112
|
-
],
|
113
|
-
),
|
114
|
-
)
|
70
|
+
class PipelineTask:
|
71
|
+
pipeline: Pipeline
|
72
|
+
on: On
|
73
|
+
queue: list[datetime]
|
74
|
+
running: list[datetime]
|
75
|
+
|
76
|
+
|
77
|
+
def queue2str(queue: list[datetime]) -> Iterator[str]:
|
78
|
+
return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
|
115
79
|
|
116
|
-
CRON_UNITS_YEAR: tuple[Unit, ...] = CRON_UNITS + (
|
117
|
-
Unit(
|
118
|
-
name="year",
|
119
|
-
range=partial(range, 1990, 2101),
|
120
|
-
min=1990,
|
121
|
-
max=2100,
|
122
|
-
),
|
123
|
-
)
|
124
80
|
|
81
|
+
def pipeline_release(
|
82
|
+
task: PipelineTask,
|
83
|
+
*,
|
84
|
+
log: Log | None = None,
|
85
|
+
) -> None:
|
86
|
+
"""Pipeline release, it will use with the same logic of `pipeline.release`
|
87
|
+
method.
|
125
88
|
|
126
|
-
|
127
|
-
|
128
|
-
"""
|
89
|
+
:param task: A PipelineTask dataclass.
|
90
|
+
:param log: A log object.
|
91
|
+
"""
|
92
|
+
log: Log = log or FileLog
|
93
|
+
pipeline: Pipeline = task.pipeline
|
94
|
+
on: On = task.on
|
95
|
+
|
96
|
+
gen: CronRunner = on.generate(
|
97
|
+
datetime.now(tz=tz).replace(second=0, microsecond=0)
|
98
|
+
)
|
99
|
+
cron_tz: ZoneInfo = gen.tz
|
100
|
+
|
101
|
+
next_running_time: datetime = gen.next
|
102
|
+
while next_running_time in task.running[pipeline.name]:
|
103
|
+
next_running_time: datetime = gen.next
|
104
|
+
|
105
|
+
logging.debug(
|
106
|
+
f"[CORE]: {pipeline.name!r} : {on.cronjob} : "
|
107
|
+
f"{next_running_time:%Y-%m-%d %H:%M:%S}"
|
108
|
+
)
|
109
|
+
heappush(task.running[pipeline.name], next_running_time)
|
110
|
+
|
111
|
+
# TODO: event should set on this step for release next pipeline task?
|
112
|
+
|
113
|
+
if get_diff_sec(next_running_time, tz=cron_tz) > 55:
|
114
|
+
logging.debug(
|
115
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
116
|
+
f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
|
117
|
+
)
|
129
118
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
119
|
+
# NOTE: Add this next running datetime to queue
|
120
|
+
heappush(task.queue[pipeline.name], next_running_time)
|
121
|
+
task.running[pipeline.name].remove(next_running_time)
|
122
|
+
time.sleep(0.5)
|
123
|
+
return
|
124
|
+
|
125
|
+
logging.debug(
|
126
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
127
|
+
f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
|
134
128
|
)
|
135
129
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
) -> None:
|
142
|
-
self.unit: Unit = unit
|
143
|
-
self.options: Options = options
|
144
|
-
|
145
|
-
if isinstance(values, str):
|
146
|
-
values: list[int] = self.from_str(values) if values != "?" else []
|
147
|
-
elif isinstance_check(values, list[int]):
|
148
|
-
values: list[int] = self.replace_weekday(values)
|
149
|
-
else:
|
150
|
-
raise TypeError(f"Invalid type of value in cron part: {values}.")
|
151
|
-
|
152
|
-
self.values: list[int] = self.out_of_range(
|
153
|
-
sorted(dict.fromkeys(values))
|
130
|
+
# NOTE: Release when the time is nearly to schedule time.
|
131
|
+
while (duration := get_diff_sec(next_running_time, tz=tz)) > (15 + 5):
|
132
|
+
logging.debug(
|
133
|
+
f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
134
|
+
f"Sleep until: {duration}"
|
154
135
|
)
|
136
|
+
time.sleep(15)
|
137
|
+
|
138
|
+
time.sleep(0.5)
|
139
|
+
|
140
|
+
# NOTE: Release parameter that use to change if params has
|
141
|
+
# templating.
|
142
|
+
release_params: DictData = {
|
143
|
+
"release": {
|
144
|
+
"logical_date": next_running_time,
|
145
|
+
},
|
146
|
+
}
|
147
|
+
|
148
|
+
# WARNING: Re-create pipeline object that use new running pipeline
|
149
|
+
# ID.
|
150
|
+
runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
|
151
|
+
rs: Result = runner.execute(
|
152
|
+
# FIXME: replace fix parameters on this execution process.
|
153
|
+
params=param2template(
|
154
|
+
{"asat-dt": "${{ release.logical_date }}"}, release_params
|
155
|
+
),
|
156
|
+
)
|
157
|
+
logging.debug(
|
158
|
+
f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
|
159
|
+
f"End release"
|
160
|
+
)
|
161
|
+
|
162
|
+
del runner
|
163
|
+
|
164
|
+
# NOTE: remove this release date from running
|
165
|
+
task.running[pipeline.name].remove(next_running_time)
|
166
|
+
|
167
|
+
# IMPORTANT:
|
168
|
+
# Add the next running datetime to pipeline queue
|
169
|
+
finish_time: datetime = datetime.now(tz=cron_tz).replace(
|
170
|
+
second=0, microsecond=0
|
171
|
+
)
|
172
|
+
future_running_time: datetime = gen.next
|
173
|
+
while (
|
174
|
+
future_running_time in task.running[pipeline.name]
|
175
|
+
or future_running_time in task.queue[pipeline.name]
|
176
|
+
or future_running_time < finish_time
|
177
|
+
):
|
178
|
+
future_running_time: datetime = gen.next
|
179
|
+
|
180
|
+
heappush(task.queue[pipeline.name], future_running_time)
|
181
|
+
|
182
|
+
# NOTE: Set parent ID on this result.
|
183
|
+
rs.set_parent_run_id(pipeline.run_id)
|
184
|
+
|
185
|
+
# NOTE: Save result to log object saving.
|
186
|
+
rs_log: Log = log.model_validate(
|
187
|
+
{
|
188
|
+
"name": pipeline.name,
|
189
|
+
"on": str(on.cronjob),
|
190
|
+
"release": next_running_time,
|
191
|
+
"context": rs.context,
|
192
|
+
"parent_run_id": rs.run_id,
|
193
|
+
"run_id": rs.run_id,
|
194
|
+
}
|
195
|
+
)
|
196
|
+
rs_log.save()
|
155
197
|
|
156
|
-
|
157
|
-
"""Generate String value from part of cronjob."""
|
158
|
-
_hash: str = "H" if self.options.output_hashes else "*"
|
198
|
+
logging.debug(f"[CORE]: {rs}")
|
159
199
|
|
160
|
-
if self.is_full:
|
161
|
-
return _hash
|
162
200
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
201
|
+
@catch_exceptions(cancel_on_failure=True)
|
202
|
+
def workflow_task(
|
203
|
+
pipeline_tasks: list[PipelineTask],
|
204
|
+
stop: datetime,
|
205
|
+
threads: dict[str, Thread],
|
206
|
+
) -> CancelJob | None:
|
207
|
+
"""Workflow task generator that create release pair of pipeline and on to
|
208
|
+
the threading in background.
|
209
|
+
|
210
|
+
This workflow task will start every minute at :02 second.
|
211
|
+
"""
|
212
|
+
start_date: datetime = datetime.now(tz=tz)
|
213
|
+
start_date_minute = start_date.replace(second=0, microsecond=0)
|
214
|
+
|
215
|
+
if start_date > stop:
|
216
|
+
logging.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
|
217
|
+
while len(threads) > 0:
|
218
|
+
logging.warning(
|
219
|
+
"[WORKFLOW]: Waiting pipeline release thread that still "
|
220
|
+
"running in background."
|
170
221
|
)
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
222
|
+
time.sleep(15)
|
223
|
+
workflow_long_running_task(threads)
|
224
|
+
return CancelJob
|
225
|
+
|
226
|
+
# IMPORTANT:
|
227
|
+
# Filter pipeline & on that should to run with `pipeline_release`
|
228
|
+
# function. It will deplicate running with different schedule value
|
229
|
+
# because I use current time in this condition.
|
230
|
+
#
|
231
|
+
# For example, if a pipeline A queue has '00:02:00' time that
|
232
|
+
# should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
|
233
|
+
# This condition will release with 2 threading job.
|
234
|
+
#
|
235
|
+
# '00:02:00' --> '*/2 * * * *' --> running
|
236
|
+
# --> '*/35 * * * *' --> skip
|
237
|
+
#
|
238
|
+
for task in pipeline_tasks:
|
239
|
+
|
240
|
+
# NOTE: Get incoming datetime queue.
|
241
|
+
logging.debug(
|
242
|
+
f"[WORKFLOW]: Current queue: {task.pipeline.name!r} : "
|
243
|
+
f"{list(queue2str(task.queue[task.pipeline.name]))}"
|
187
244
|
)
|
188
245
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
def __eq__(self, other) -> bool:
|
196
|
-
if isinstance(other, CronPart):
|
197
|
-
return self.values == other.values
|
198
|
-
elif isinstance(other, list):
|
199
|
-
return self.values == other
|
200
|
-
|
201
|
-
@property
|
202
|
-
def min(self) -> int:
|
203
|
-
"""Returns the smallest value in the range."""
|
204
|
-
return self.values[0]
|
205
|
-
|
206
|
-
@property
|
207
|
-
def max(self) -> int:
|
208
|
-
"""Returns the largest value in the range."""
|
209
|
-
return self.values[-1]
|
210
|
-
|
211
|
-
@property
|
212
|
-
def step(self) -> Optional[int]:
|
213
|
-
"""Returns the difference between first and second elements in the
|
214
|
-
range.
|
215
|
-
"""
|
246
|
+
# NOTE: Create minute unit value for any scheduler datetime that
|
247
|
+
# checking a pipeline task should run in this datetime.
|
248
|
+
current_running_time: datetime = start_date_minute.astimezone(
|
249
|
+
tz=ZoneInfo(task.on.tz)
|
250
|
+
)
|
216
251
|
if (
|
217
|
-
len(
|
218
|
-
and
|
252
|
+
len(task.queue[task.pipeline.name]) > 0
|
253
|
+
and current_running_time != task.queue[task.pipeline.name][0]
|
254
|
+
) or (
|
255
|
+
task.on.next(current_running_time)
|
256
|
+
!= task.queue[task.pipeline.name][0]
|
219
257
|
):
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
"""Returns true if range has all the values of the unit."""
|
225
|
-
return len(self.values) == (self.unit.max - self.unit.min + 1)
|
226
|
-
|
227
|
-
def from_str(self, value: str) -> tuple[int, ...]:
|
228
|
-
"""Parses a string as a range of positive integers. The string should
|
229
|
-
include only `-` and `,` special strings.
|
230
|
-
|
231
|
-
:param value: A string value that want to parse
|
232
|
-
:type value: str
|
233
|
-
|
234
|
-
TODO: support for `L`, `W`, and `#`
|
235
|
-
TODO: if you didn't care what day of the week the 7th was, you
|
236
|
-
could enter ? in the Day-of-week field.
|
237
|
-
TODO: L : the Day-of-month or Day-of-week fields specifies the last day
|
238
|
-
of the month or week.
|
239
|
-
DEV: use -1 for represent with L
|
240
|
-
TODO: W : In the Day-of-month field, 3W specifies the weekday closest
|
241
|
-
to the third day of the month.
|
242
|
-
TODO: # : 3#2 would be the second Tuesday of the month,
|
243
|
-
the 3 refers to Tuesday because it is the third day of each week.
|
244
|
-
|
245
|
-
Noted:
|
246
|
-
- 0 10 * * ? *
|
247
|
-
Run at 10:00 am (UTC) every day
|
248
|
-
|
249
|
-
- 15 12 * * ? *
|
250
|
-
Run at 12:15 pm (UTC) every day
|
251
|
-
|
252
|
-
- 0 18 ? * MON-FRI *
|
253
|
-
Run at 6:00 pm (UTC) every Monday through Friday
|
254
|
-
|
255
|
-
- 0 8 1 * ? *
|
256
|
-
Run at 8:00 am (UTC) every 1st day of the month
|
257
|
-
|
258
|
-
- 0/15 * * * ? *
|
259
|
-
Run every 15 minutes
|
260
|
-
|
261
|
-
- 0/10 * ? * MON-FRI *
|
262
|
-
Run every 10 minutes Monday through Friday
|
263
|
-
|
264
|
-
- 0/5 8-17 ? * MON-FRI *
|
265
|
-
Run every 5 minutes Monday through Friday between 8:00 am and
|
266
|
-
5:55 pm (UTC)
|
267
|
-
|
268
|
-
- 5,35 14 * * ? *
|
269
|
-
Run every day, at 5 and 35 minutes past 2:00 pm (UTC)
|
270
|
-
|
271
|
-
- 15 10 ? * 6L 2002-2005
|
272
|
-
Run at 10:15am UTC on the last Friday of each month during the
|
273
|
-
years 2002 to 2005
|
274
|
-
|
275
|
-
:rtype: tuple[int, ...]
|
276
|
-
"""
|
277
|
-
interval_list: list[list[int]] = []
|
278
|
-
for _value in self.replace_alternative(value.upper()).split(","):
|
279
|
-
if _value == "?":
|
280
|
-
continue
|
281
|
-
elif _value.count("/") > 1:
|
282
|
-
raise ValueError(
|
283
|
-
f"Invalid value {_value!r} in cron part {value!r}"
|
284
|
-
)
|
285
|
-
|
286
|
-
value_range, value_step = must_split(_value, "/", maxsplit=1)
|
287
|
-
value_range_list: list[int] = self.out_of_range(
|
288
|
-
self._parse_range(value_range)
|
258
|
+
logging.debug(
|
259
|
+
f"[WORKFLOW]: Skip schedule "
|
260
|
+
f"{current_running_time:%Y-%m-%d %H:%M:%S} "
|
261
|
+
f"for : {task.pipeline.name!r} : {task.on.cronjob}"
|
289
262
|
)
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
interval_list.append(self._interval(value_range_list, value_step))
|
298
|
-
return tuple(item for sublist in interval_list for item in sublist)
|
299
|
-
|
300
|
-
def replace_alternative(self, value: str) -> str:
|
301
|
-
"""Replaces the alternative representations of numbers in a string."""
|
302
|
-
for i, alt in enumerate(self.unit.alt):
|
303
|
-
if alt in value:
|
304
|
-
value: str = value.replace(alt, str(self.unit.min + i))
|
305
|
-
return value
|
306
|
-
|
307
|
-
def replace_weekday(self, values: list[int] | Iterator[int]) -> list[int]:
|
308
|
-
"""Replaces all 7 with 0 as Sunday can be represented by both.
|
309
|
-
|
310
|
-
:param values: list or iter of int that want to mode by 7
|
311
|
-
:rtype: list[int]
|
312
|
-
"""
|
313
|
-
if self.unit.name == "weekday":
|
314
|
-
# NOTE: change weekday value in range 0-6 (div-mod by 7).
|
315
|
-
return [value % 7 for value in values]
|
316
|
-
return list(values)
|
317
|
-
|
318
|
-
def out_of_range(self, values: list[int]) -> list[int]:
|
319
|
-
"""Return an integer is a value out of range was found, otherwise None.
|
320
|
-
|
321
|
-
:param values: A list of int value
|
322
|
-
:type values: list[int]
|
323
|
-
|
324
|
-
:rtype: list[int]
|
325
|
-
"""
|
326
|
-
if values:
|
327
|
-
if (first := values[0]) < self.unit.min:
|
328
|
-
raise ValueError(
|
329
|
-
f"Value {first!r} out of range for {self.unit.name!r}"
|
330
|
-
)
|
331
|
-
elif (last := values[-1]) > self.unit.max:
|
332
|
-
raise ValueError(
|
333
|
-
f"Value {last!r} out of range for {self.unit.name!r}"
|
334
|
-
)
|
335
|
-
return values
|
336
|
-
|
337
|
-
def _parse_range(self, value: str) -> list[int]:
|
338
|
-
"""Parses a range string."""
|
339
|
-
if value == "*":
|
340
|
-
return list(self.unit.range())
|
341
|
-
elif value.count("-") > 1:
|
342
|
-
raise ValueError(f"Invalid value {value}")
|
343
|
-
try:
|
344
|
-
sub_parts: list[int] = list(map(int, value.split("-")))
|
345
|
-
except ValueError as exc:
|
346
|
-
raise ValueError(f"Invalid value {value!r} --> {exc}") from exc
|
347
|
-
|
348
|
-
if len(sub_parts) == 2:
|
349
|
-
min_value, max_value = sub_parts
|
350
|
-
if max_value < min_value:
|
351
|
-
raise ValueError(f"Max range is less than min range in {value}")
|
352
|
-
sub_parts: list[int] = list(range(min_value, max_value + 1))
|
353
|
-
return self.replace_weekday(sub_parts)
|
354
|
-
|
355
|
-
def _interval(
|
356
|
-
self,
|
357
|
-
values: list[int],
|
358
|
-
step: int | None = None,
|
359
|
-
) -> list[int]:
|
360
|
-
"""Applies an interval step to a collection of values."""
|
361
|
-
if not step:
|
362
|
-
return values
|
363
|
-
elif (_step := int(step)) < 1:
|
364
|
-
raise ValueError(
|
365
|
-
f"Invalid interval step value {_step!r} for "
|
366
|
-
f"{self.unit.name!r}"
|
263
|
+
continue
|
264
|
+
elif len(task.queue[task.pipeline.name]) == 0:
|
265
|
+
# TODO: Should auto add new queue?
|
266
|
+
logging.warning(
|
267
|
+
f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
|
268
|
+
f"{task.on.cronjob}"
|
367
269
|
)
|
368
|
-
|
369
|
-
return [
|
370
|
-
value
|
371
|
-
for value in values
|
372
|
-
if (value % _step == min_value % _step) or (value == min_value)
|
373
|
-
]
|
270
|
+
continue
|
374
271
|
|
375
|
-
|
376
|
-
|
377
|
-
"""Returns true if the range can be represented as an interval."""
|
378
|
-
if not (step := self.step):
|
379
|
-
return False
|
380
|
-
for idx, value in enumerate(self.values):
|
381
|
-
if idx == 0:
|
382
|
-
continue
|
383
|
-
elif (value - self.values[idx - 1]) != step:
|
384
|
-
return False
|
385
|
-
return True
|
386
|
-
|
387
|
-
@property
|
388
|
-
def is_full_interval(self) -> bool:
|
389
|
-
"""Returns true if the range contains all the interval values."""
|
390
|
-
if step := self.step:
|
391
|
-
return (
|
392
|
-
self.min == self.unit.min
|
393
|
-
and (self.max + step) > self.unit.max
|
394
|
-
and (
|
395
|
-
len(self.values)
|
396
|
-
== (round((self.max - self.min) / step) + 1)
|
397
|
-
)
|
398
|
-
)
|
399
|
-
return False
|
400
|
-
|
401
|
-
def ranges(self) -> list[Union[int, list[int]]]:
|
402
|
-
"""Returns the range as an array of ranges defined as arrays of
|
403
|
-
positive integers.
|
272
|
+
# NOTE: Remove this datetime from queue.
|
273
|
+
task.queue[task.pipeline.name].pop(0)
|
404
274
|
|
405
|
-
:
|
406
|
-
|
407
|
-
|
408
|
-
start_number: Optional[int] = None
|
409
|
-
for idx, value in enumerate(self.values):
|
410
|
-
try:
|
411
|
-
next_value: int = self.values[idx + 1]
|
412
|
-
except IndexError:
|
413
|
-
next_value: int = -1
|
414
|
-
if value != (next_value - 1):
|
415
|
-
# NOTE: ``next_value`` is not the subsequent number
|
416
|
-
if start_number is None:
|
417
|
-
# NOTE:
|
418
|
-
# The last number of the list ``self.values`` is not in a
|
419
|
-
# range.
|
420
|
-
multi_dim_values.append(value)
|
421
|
-
else:
|
422
|
-
multi_dim_values.append([start_number, value])
|
423
|
-
start_number: Optional[int] = None
|
424
|
-
elif start_number is None:
|
425
|
-
start_number: Optional[int] = value
|
426
|
-
return multi_dim_values
|
427
|
-
|
428
|
-
def filler(self, value: int) -> int | str:
|
429
|
-
"""Formats weekday and month names as string when the relevant options
|
430
|
-
are set.
|
431
|
-
|
432
|
-
:param value: a int value
|
433
|
-
:type value: int
|
434
|
-
|
435
|
-
:rtype: int | str
|
436
|
-
"""
|
437
|
-
return (
|
438
|
-
self.unit.alt[value - self.unit.min]
|
439
|
-
if (
|
440
|
-
(
|
441
|
-
self.options.output_weekday_names
|
442
|
-
and self.unit.name == "weekday"
|
443
|
-
)
|
444
|
-
or (
|
445
|
-
self.options.output_month_names
|
446
|
-
and self.unit.name == "month"
|
447
|
-
)
|
448
|
-
)
|
449
|
-
else value
|
275
|
+
thread_name: str = (
|
276
|
+
f"{task.pipeline.name}|{str(task.on.cronjob)}|"
|
277
|
+
f"{current_running_time:%Y%m%d%H%M}"
|
450
278
|
)
|
279
|
+
pipe_thread: Thread = Thread(
|
280
|
+
target=pipeline_release,
|
281
|
+
args=(task,),
|
282
|
+
name=thread_name,
|
283
|
+
daemon=True,
|
284
|
+
)
|
285
|
+
|
286
|
+
threads[thread_name] = pipe_thread
|
451
287
|
|
288
|
+
pipe_thread.start()
|
452
289
|
|
453
|
-
|
454
|
-
class CronJob:
|
455
|
-
"""The Cron Job Converter object that generate datetime dimension of cron
|
456
|
-
job schedule format,
|
290
|
+
delay()
|
457
291
|
|
458
|
-
|
292
|
+
logging.debug(f"[WORKFLOW]: {'=' * 100}")
|
459
293
|
|
460
|
-
(i) minute (0 - 59)
|
461
|
-
(ii) hour (0 - 23)
|
462
|
-
(iii) day of the month (1 - 31)
|
463
|
-
(iv) month (1 - 12)
|
464
|
-
(v) day of the week (0 - 6) (Sunday to Saturday; 7 is also Sunday
|
465
|
-
on some systems)
|
466
294
|
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
and day of week value).
|
295
|
+
def workflow_long_running_task(threads: dict[str, Thread]) -> None:
|
296
|
+
"""Workflow schedule for monitoring long running thread from the schedule
|
297
|
+
control.
|
471
298
|
|
472
|
-
|
473
|
-
- https://github.com/Sonic0/cron-converter
|
474
|
-
- https://pypi.org/project/python-crontab/
|
299
|
+
:param threads: A mapping of Thread object and its name.
|
475
300
|
"""
|
301
|
+
logging.debug("[MONITOR]: Start checking long running pipeline release.")
|
302
|
+
snapshot_threads = list(threads.keys())
|
303
|
+
for t_name in snapshot_threads:
|
476
304
|
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
def __init__(
|
481
|
-
self,
|
482
|
-
value: Union[list[list[int]], str],
|
483
|
-
*,
|
484
|
-
option: Optional[dict[str, bool]] = None,
|
485
|
-
) -> None:
|
486
|
-
if isinstance(value, str):
|
487
|
-
value: list[str] = value.strip().split()
|
488
|
-
elif not isinstance_check(value, list[list[int]]):
|
489
|
-
raise TypeError(
|
490
|
-
f"{self.__class__.__name__} cron value does not support "
|
491
|
-
f"type: {type(value)}."
|
492
|
-
)
|
305
|
+
# NOTE: remove the thread that running success.
|
306
|
+
if not threads[t_name].is_alive():
|
307
|
+
threads.pop(t_name)
|
493
308
|
|
494
|
-
# NOTE: Validate length of crontab of this class.
|
495
|
-
if len(value) != self.cron_length:
|
496
|
-
raise ValueError(
|
497
|
-
f"Invalid cron value does not have length equal "
|
498
|
-
f"{self.cron_length}: {value}."
|
499
|
-
)
|
500
|
-
self.options: Options = Options(**(option or {}))
|
501
309
|
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
310
|
+
def workflow_control(
|
311
|
+
pipelines: list[str],
|
312
|
+
until: datetime | None = None,
|
313
|
+
externals: DictData | None = None,
|
314
|
+
) -> list[str]:
|
315
|
+
"""Workflow scheduler control.
|
507
316
|
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
317
|
+
:param pipelines: A list of pipeline names that want to schedule running.
|
318
|
+
:param until:
|
319
|
+
:param externals: An external parameters that pass to Loader.
|
320
|
+
"""
|
321
|
+
schedule: Scheduler = Scheduler()
|
322
|
+
start_date: datetime = datetime.now(tz=tz)
|
323
|
+
|
324
|
+
# NOTE: Design workflow queue caching.
|
325
|
+
# ---
|
326
|
+
# {"pipeline-name": [<release-datetime>, <release-datetime>, ...]}
|
327
|
+
#
|
328
|
+
wf_queue: dict[str, list[datetime]] = {}
|
329
|
+
wf_running: dict[str, list[datetime]] = {}
|
330
|
+
thread_releases: dict[str, Thread] = {}
|
331
|
+
|
332
|
+
start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
|
333
|
+
second=0, microsecond=0
|
334
|
+
)
|
514
335
|
|
515
|
-
|
516
|
-
|
517
|
-
return " ".join(str(part) for part in self.parts)
|
336
|
+
# NOTE: Create pair of pipeline and on.
|
337
|
+
pipeline_tasks: list[PipelineTask] = []
|
518
338
|
|
519
|
-
|
520
|
-
|
521
|
-
f"{self.__class__.__name__}(value={self.__str__()!r}, "
|
522
|
-
f"option={self.options.__dict__})"
|
523
|
-
)
|
339
|
+
for name in pipelines:
|
340
|
+
pipeline: Pipeline = Pipeline.from_loader(name, externals=externals)
|
524
341
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
for part, other_part in zip(self.parts_order, other.parts_order)
|
529
|
-
)
|
342
|
+
# NOTE: Create default list of release datetime.
|
343
|
+
wf_queue[name]: list[datetime] = []
|
344
|
+
wf_running[name]: list[datetime] = []
|
530
345
|
|
531
|
-
|
532
|
-
return all(
|
533
|
-
part == other_part
|
534
|
-
for part, other_part in zip(self.parts, other.parts)
|
535
|
-
)
|
346
|
+
for on in pipeline.on:
|
536
347
|
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
@property
|
542
|
-
def minute(self) -> CronPart:
|
543
|
-
"""Return part of minute."""
|
544
|
-
return self.parts[0]
|
545
|
-
|
546
|
-
@property
|
547
|
-
def hour(self) -> CronPart:
|
548
|
-
"""Return part of hour."""
|
549
|
-
return self.parts[1]
|
550
|
-
|
551
|
-
@property
|
552
|
-
def day(self) -> CronPart:
|
553
|
-
"""Return part of day."""
|
554
|
-
return self.parts[2]
|
555
|
-
|
556
|
-
@property
|
557
|
-
def month(self) -> CronPart:
|
558
|
-
"""Return part of month."""
|
559
|
-
return self.parts[3]
|
560
|
-
|
561
|
-
@property
|
562
|
-
def dow(self) -> CronPart:
|
563
|
-
"""Return part of day of month."""
|
564
|
-
return self.parts[4]
|
565
|
-
|
566
|
-
def to_list(self) -> list[list[int]]:
|
567
|
-
"""Returns the cron schedule as a 2-dimensional list of integers."""
|
568
|
-
return [part.values for part in self.parts]
|
569
|
-
|
570
|
-
def schedule(
|
571
|
-
self,
|
572
|
-
date: datetime | None = None,
|
573
|
-
*,
|
574
|
-
tz: str | None = None,
|
575
|
-
) -> CronRunner:
|
576
|
-
"""Returns the schedule datetime runner with this cronjob. It would run
|
577
|
-
``next``, ``prev``, or ``reset`` to generate running date that you want.
|
578
|
-
|
579
|
-
:param date: An initial date that want to mark as the start point.
|
580
|
-
:param tz: A string timezone that want to change on runner.
|
581
|
-
:rtype: CronRunner
|
582
|
-
"""
|
583
|
-
return CronRunner(self, date, tz=tz)
|
584
|
-
|
585
|
-
|
586
|
-
class CronJobYear(CronJob):
|
587
|
-
cron_length = 6
|
588
|
-
cron_units = CRON_UNITS_YEAR
|
589
|
-
|
590
|
-
@property
|
591
|
-
def year(self) -> CronPart:
|
592
|
-
"""Return part of year."""
|
593
|
-
return self.parts[5]
|
594
|
-
|
595
|
-
|
596
|
-
class CronRunner:
|
597
|
-
"""Create an instance of Date Runner object for datetime generate with
|
598
|
-
cron schedule object value.
|
599
|
-
"""
|
348
|
+
on_gen = on.generate(start_date_waiting)
|
349
|
+
next_running_date = on_gen.next
|
350
|
+
while next_running_date in wf_queue[name]:
|
351
|
+
next_running_date = on_gen.next
|
600
352
|
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
)
|
353
|
+
heappush(wf_queue[name], next_running_date)
|
354
|
+
pipeline_tasks.append(
|
355
|
+
PipelineTask(
|
356
|
+
pipeline=pipeline, on=on, queue=wf_queue, running=wf_running
|
357
|
+
),
|
358
|
+
)
|
608
359
|
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
)
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
self.date: datetime = date.astimezone(self.tz)
|
633
|
-
else:
|
634
|
-
self.date: datetime = datetime.now(tz=self.tz)
|
635
|
-
|
636
|
-
# NOTE: Add one minute if the second value more than 0.
|
637
|
-
if self.date.second > 0:
|
638
|
-
self.date: datetime = self.date + timedelta(minutes=1)
|
639
|
-
|
640
|
-
self.__start_date: datetime = self.date
|
641
|
-
self.cron: CronJob | CronJobYear = cron
|
642
|
-
self.reset_flag: bool = True
|
643
|
-
|
644
|
-
def reset(self) -> None:
|
645
|
-
"""Resets the iterator to start time."""
|
646
|
-
self.date: datetime = self.__start_date
|
647
|
-
self.reset_flag: bool = True
|
648
|
-
|
649
|
-
@property
|
650
|
-
def next(self) -> datetime:
|
651
|
-
"""Returns the next time of the schedule."""
|
652
|
-
self.date = (
|
653
|
-
self.date
|
654
|
-
if self.reset_flag
|
655
|
-
else (self.date + timedelta(minutes=+1))
|
656
|
-
)
|
657
|
-
return self.find_date(reverse=False)
|
658
|
-
|
659
|
-
@property
|
660
|
-
def prev(self) -> datetime:
|
661
|
-
"""Returns the previous time of the schedule."""
|
662
|
-
self.date: datetime = self.date + timedelta(minutes=-1)
|
663
|
-
return self.find_date(reverse=True)
|
664
|
-
|
665
|
-
def find_date(self, reverse: bool = False) -> datetime:
|
666
|
-
"""Returns the time the schedule would run by `next` or `prev`.
|
667
|
-
|
668
|
-
:param reverse: A reverse flag.
|
669
|
-
"""
|
670
|
-
# NOTE: Set reset flag to false if start any action.
|
671
|
-
self.reset_flag: bool = False
|
672
|
-
for _ in range(25):
|
673
|
-
if all(
|
674
|
-
not self.__shift_date(mode, reverse)
|
675
|
-
for mode in ("month", "day", "hour", "minute")
|
676
|
-
):
|
677
|
-
return copy.deepcopy(self.date.replace(second=0, microsecond=0))
|
678
|
-
raise RecursionError("Unable to find execution time for schedule")
|
679
|
-
|
680
|
-
def __shift_date(self, mode: str, reverse: bool = False) -> bool:
|
681
|
-
"""Increments the mode value until matches with the schedule."""
|
682
|
-
switch: dict[str, str] = {
|
683
|
-
"month": "year",
|
684
|
-
"day": "month",
|
685
|
-
"hour": "day",
|
686
|
-
"minute": "hour",
|
687
|
-
}
|
688
|
-
current_value: int = getattr(self.date, switch[mode])
|
689
|
-
_addition_condition: Callable[[], bool] = (
|
690
|
-
(
|
691
|
-
lambda: WEEKDAYS.get(self.date.strftime("%a"))
|
692
|
-
not in self.cron.dow.values
|
360
|
+
# NOTE: This schedule job will start every minute at :02 seconds.
|
361
|
+
schedule.every(1).minutes.at(":02").do(
|
362
|
+
workflow_task,
|
363
|
+
pipeline_tasks=pipeline_tasks,
|
364
|
+
stop=until or (start_date + timedelta(minutes=5, seconds=20)),
|
365
|
+
threads=thread_releases,
|
366
|
+
).tag("control")
|
367
|
+
|
368
|
+
# NOTE: Checking zombie task with schedule job will start every 5 minute.
|
369
|
+
schedule.every(5).minutes.at(":10").do(
|
370
|
+
workflow_long_running_task,
|
371
|
+
threads=thread_releases,
|
372
|
+
).tag("monitor")
|
373
|
+
|
374
|
+
# NOTE: Start running schedule
|
375
|
+
logging.info(f"[WORKFLOW]: Start schedule: {pipelines}")
|
376
|
+
while True:
|
377
|
+
schedule.run_pending()
|
378
|
+
time.sleep(1)
|
379
|
+
if not schedule.get_jobs("control"):
|
380
|
+
schedule.clear("monitor")
|
381
|
+
logging.warning(
|
382
|
+
f"[WORKFLOW]: Pipeline release thread: {thread_releases}"
|
693
383
|
)
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
384
|
+
logging.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
|
385
|
+
break
|
386
|
+
|
387
|
+
logging.warning(f"Queue: {[wf_queue[wf] for wf in wf_queue]}")
|
388
|
+
logging.warning(f"Running: {[wf_running[wf] for wf in wf_running]}")
|
389
|
+
return pipelines
|
390
|
+
|
391
|
+
|
392
|
+
def workflow(
|
393
|
+
until: datetime | None = None,
|
394
|
+
externals: DictData | None = None,
|
395
|
+
excluded: list[str] | None = None,
|
396
|
+
):
|
397
|
+
"""Workflow application that running multiprocessing schedule with chunk of
|
398
|
+
pipelines that exists in config path.
|
399
|
+
|
400
|
+
:param until:
|
401
|
+
:param excluded:
|
402
|
+
:param externals:
|
403
|
+
|
404
|
+
This function will get all pipelines that include on value that was
|
405
|
+
created in config path and chuck it with WORKFLOW_APP_PIPELINE_PER_PROCESS
|
406
|
+
value to multiprocess executor pool.
|
407
|
+
|
408
|
+
The current workflow logic:
|
409
|
+
---
|
410
|
+
PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
|
411
|
+
pipeline task 01 01
|
412
|
+
--> thread of release
|
413
|
+
pipeline task 01 02
|
414
|
+
==> process 02 ==> schedule 1 minute --> thread of release
|
415
|
+
pipeline task 02 01
|
416
|
+
--> thread of release
|
417
|
+
pipeline task 02 02
|
418
|
+
==> ...
|
419
|
+
"""
|
420
|
+
excluded: list = excluded or []
|
421
|
+
|
422
|
+
with ProcessPoolExecutor(max_workers=2) as executor:
|
423
|
+
futures: list[Future] = [
|
424
|
+
executor.submit(
|
425
|
+
workflow_control,
|
426
|
+
pipelines=[load[0] for load in loader],
|
427
|
+
until=until,
|
428
|
+
externals=(externals or {}),
|
429
|
+
)
|
430
|
+
for loader in batch(
|
431
|
+
# Loader.find(Pipeline, include=["on"], excluded=excluded),
|
432
|
+
[
|
433
|
+
("pipe-scheduling", None),
|
434
|
+
# ("pipe-scheduling-minute", None),
|
435
|
+
],
|
436
|
+
n=1,
|
437
|
+
)
|
438
|
+
]
|
439
|
+
|
440
|
+
results: list[str] = []
|
441
|
+
for future in as_completed(futures):
|
442
|
+
if err := future.exception():
|
443
|
+
logging.error(str(err))
|
444
|
+
raise WorkflowException(str(err)) from err
|
445
|
+
results.extend(future.result(timeout=1))
|
446
|
+
return results
|
447
|
+
|
448
|
+
|
449
|
+
if __name__ == "__main__":
|
450
|
+
# TODO: Define input arguments that want to manage this application.
|
451
|
+
workflow_rs: list[str] = workflow()
|
452
|
+
logging.info(f"Application run success: {workflow_rs}")
|