ddeutil-workflow 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddeutil/workflow/__about__.py +1 -1
- ddeutil/workflow/__cron.py +1 -0
- ddeutil/workflow/conf.py +18 -15
- ddeutil/workflow/job.py +12 -0
- ddeutil/workflow/on.py +13 -0
- ddeutil/workflow/scheduler.py +157 -84
- ddeutil/workflow/utils.py +32 -19
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.19.dist-info}/METADATA +5 -5
- ddeutil_workflow-0.0.19.dist-info/RECORD +21 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.19.dist-info}/WHEEL +1 -1
- ddeutil_workflow-0.0.18.dist-info/RECORD +0 -21
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.19.dist-info}/LICENSE +0 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.19.dist-info}/entry_points.txt +0 -0
- {ddeutil_workflow-0.0.18.dist-info → ddeutil_workflow-0.0.19.dist-info}/top_level.txt +0 -0
ddeutil/workflow/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__: str = "0.0.
|
1
|
+
__version__: str = "0.0.19"
|
ddeutil/workflow/__cron.py
CHANGED
ddeutil/workflow/conf.py
CHANGED
@@ -195,6 +195,7 @@ class SimLoad:
|
|
195
195
|
:param conf: A config object.
|
196
196
|
:param include:
|
197
197
|
:param exclude:
|
198
|
+
|
198
199
|
:rtype: Iterator[tuple[str, DictData]]
|
199
200
|
"""
|
200
201
|
exclude: list[str] = exclude or []
|
@@ -247,12 +248,14 @@ class Loader(SimLoad):
|
|
247
248
|
include: list[str] | None = None,
|
248
249
|
exclude: list[str] | None = None,
|
249
250
|
**kwargs,
|
250
|
-
) -> DictData:
|
251
|
+
) -> Iterator[tuple[str, DictData]]:
|
251
252
|
"""Override the find class method from the Simple Loader object.
|
252
253
|
|
253
254
|
:param obj: A object that want to validate matching before return.
|
254
255
|
:param include:
|
255
256
|
:param exclude:
|
257
|
+
|
258
|
+
:rtype: Iterator[tuple[str, DictData]]
|
256
259
|
"""
|
257
260
|
return super().finds(
|
258
261
|
obj=obj, conf=Config(), include=include, exclude=exclude
|
@@ -268,6 +271,7 @@ def get_type(t: str, params: Config) -> AnyModelType:
|
|
268
271
|
:param t: A importable type string.
|
269
272
|
:param params: A config parameters that use registry to search this
|
270
273
|
type.
|
274
|
+
|
271
275
|
:rtype: AnyModelType
|
272
276
|
"""
|
273
277
|
try:
|
@@ -366,6 +370,8 @@ class FileLog(BaseLog):
|
|
366
370
|
workflow name.
|
367
371
|
|
368
372
|
:param name: A workflow name that want to search release logging data.
|
373
|
+
|
374
|
+
:rtype: Iterator[Self]
|
369
375
|
"""
|
370
376
|
pointer: Path = config.root_path / f"./logs/workflow={name}"
|
371
377
|
if not pointer.exists():
|
@@ -387,6 +393,9 @@ class FileLog(BaseLog):
|
|
387
393
|
workflow name and release values. If a release does not pass to an input
|
388
394
|
argument, it will return the latest release from the current log path.
|
389
395
|
|
396
|
+
:param name:
|
397
|
+
:param release:
|
398
|
+
|
390
399
|
:raise FileNotFoundError:
|
391
400
|
:raise NotImplementedError:
|
392
401
|
|
@@ -411,21 +420,17 @@ class FileLog(BaseLog):
|
|
411
420
|
return cls.model_validate(obj=json.load(f))
|
412
421
|
|
413
422
|
@classmethod
|
414
|
-
def is_pointed(
|
415
|
-
|
416
|
-
|
417
|
-
release: datetime,
|
418
|
-
*,
|
419
|
-
queue: list[datetime] | None = None,
|
420
|
-
) -> bool:
|
421
|
-
"""Check this log already point in the destination.
|
423
|
+
def is_pointed(cls, name: str, release: datetime) -> bool:
|
424
|
+
"""Check the release log already pointed or created at the destination
|
425
|
+
log path.
|
422
426
|
|
423
427
|
:param name: A workflow name.
|
424
428
|
:param release: A release datetime.
|
425
|
-
|
426
|
-
|
429
|
+
|
430
|
+
:rtype: bool
|
431
|
+
:return: Return False if the release log was not pointed or created.
|
427
432
|
"""
|
428
|
-
# NOTE:
|
433
|
+
# NOTE: Return False if enable writing log flag does not set.
|
429
434
|
if not config.enable_write_log:
|
430
435
|
return False
|
431
436
|
|
@@ -434,9 +439,7 @@ class FileLog(BaseLog):
|
|
434
439
|
name=name, release=release
|
435
440
|
)
|
436
441
|
|
437
|
-
|
438
|
-
return pointer.exists()
|
439
|
-
return pointer.exists() or (release in queue)
|
442
|
+
return pointer.exists()
|
440
443
|
|
441
444
|
def pointer(self) -> Path:
|
442
445
|
"""Return release directory path that was generated from model data.
|
ddeutil/workflow/job.py
CHANGED
@@ -19,6 +19,7 @@ from concurrent.futures import (
|
|
19
19
|
as_completed,
|
20
20
|
wait,
|
21
21
|
)
|
22
|
+
from enum import Enum
|
22
23
|
from functools import lru_cache
|
23
24
|
from textwrap import dedent
|
24
25
|
from threading import Event
|
@@ -198,6 +199,11 @@ class Strategy(BaseModel):
|
|
198
199
|
return make(self.matrix, self.include, self.exclude)
|
199
200
|
|
200
201
|
|
202
|
+
class TriggerRules(str, Enum):
|
203
|
+
all_success: str = "all_success"
|
204
|
+
all_failed: str = "all_failed"
|
205
|
+
|
206
|
+
|
201
207
|
class Job(BaseModel):
|
202
208
|
"""Job Pydantic model object (group of stages).
|
203
209
|
|
@@ -245,6 +251,11 @@ class Job(BaseModel):
|
|
245
251
|
default_factory=list,
|
246
252
|
description="A list of Stage of this job.",
|
247
253
|
)
|
254
|
+
trigger_rule: TriggerRules = Field(
|
255
|
+
default=TriggerRules.all_success,
|
256
|
+
description="A trigger rule of tracking needed jobs.",
|
257
|
+
serialization_alias="trigger-rule",
|
258
|
+
)
|
248
259
|
needs: list[str] = Field(
|
249
260
|
default_factory=list,
|
250
261
|
description="A list of the job ID that want to run before this job.",
|
@@ -269,6 +280,7 @@ class Job(BaseModel):
|
|
269
280
|
:rtype: DictData
|
270
281
|
"""
|
271
282
|
dash2underscore("runs-on", values)
|
283
|
+
dash2underscore("trigger-rule", values)
|
272
284
|
return values
|
273
285
|
|
274
286
|
@field_validator("desc", mode="after")
|
ddeutil/workflow/on.py
CHANGED
@@ -190,6 +190,19 @@ class On(BaseModel):
|
|
190
190
|
"""
|
191
191
|
return self.generate(start=start).next
|
192
192
|
|
193
|
+
# def pop(self, queue: list[datetime]) -> datetime:
|
194
|
+
# """Pop the matching datetime value from list of datetime alias queue."""
|
195
|
+
# for dt in queue:
|
196
|
+
# if self.next(dt) == dt:
|
197
|
+
# return dt
|
198
|
+
#
|
199
|
+
# # NOTE: Add 1 second value to the current datetime for forcing crontab
|
200
|
+
# # runner generate the next datetime instead if current datetime be
|
201
|
+
# # valid because I already replaced second to zero before passing.
|
202
|
+
# return datetime.now(tz=config.tz).replace(
|
203
|
+
# second=0, microsecond=0
|
204
|
+
# ) + timedelta(seconds=1)
|
205
|
+
|
193
206
|
|
194
207
|
class YearOn(On):
|
195
208
|
"""Implement On Year Schedule Model for limit year matrix that use by some
|
ddeutil/workflow/scheduler.py
CHANGED
@@ -218,6 +218,24 @@ class Workflow(BaseModel):
|
|
218
218
|
"""
|
219
219
|
return dedent(value)
|
220
220
|
|
221
|
+
@field_validator("on", mode="after")
|
222
|
+
def __on_no_dup__(cls, value: list[On]) -> list[On]:
|
223
|
+
"""Validate the on fields should not contain duplicate values and if it
|
224
|
+
contain every minute value, it should has only one on value."""
|
225
|
+
set_ons: set[str] = {str(on.cronjob) for on in value}
|
226
|
+
if len(set_ons) != len(value):
|
227
|
+
raise ValueError(
|
228
|
+
"The on fields should not contain duplicate on value."
|
229
|
+
)
|
230
|
+
|
231
|
+
# WARNING:
|
232
|
+
# if '* * * * *' in set_ons and len(set_ons) > 1:
|
233
|
+
# raise ValueError(
|
234
|
+
# "If it has every minute cronjob on value, it should has only "
|
235
|
+
# "one value in the on field."
|
236
|
+
# )
|
237
|
+
return value
|
238
|
+
|
221
239
|
@model_validator(mode="after")
|
222
240
|
def __validate_jobs_need_and_prepare_running_id(self) -> Self:
|
223
241
|
"""Validate each need job in any jobs should exists.
|
@@ -265,7 +283,7 @@ class Workflow(BaseModel):
|
|
265
283
|
:type name: str
|
266
284
|
|
267
285
|
:rtype: Job
|
268
|
-
:
|
286
|
+
:return: A job model that exists on this workflow by input name.
|
269
287
|
"""
|
270
288
|
if name not in self.jobs:
|
271
289
|
raise ValueError(
|
@@ -287,7 +305,13 @@ class Workflow(BaseModel):
|
|
287
305
|
|
288
306
|
:param params: A parameter mapping that receive from workflow execution.
|
289
307
|
:type params: DictData
|
308
|
+
|
309
|
+
:raise WorkflowException: If parameter value that want to validate does
|
310
|
+
not include the necessary parameter that had required flag.
|
311
|
+
|
290
312
|
:rtype: DictData
|
313
|
+
:return: The parameter value that validate with its parameter fields and
|
314
|
+
adding jobs key to this parameter.
|
291
315
|
"""
|
292
316
|
# VALIDATE: Incoming params should have keys that set on this workflow.
|
293
317
|
if check_key := tuple(
|
@@ -315,7 +339,7 @@ class Workflow(BaseModel):
|
|
315
339
|
|
316
340
|
def release(
|
317
341
|
self,
|
318
|
-
|
342
|
+
runner: CronRunner,
|
319
343
|
params: DictData,
|
320
344
|
queue: list[datetime],
|
321
345
|
*,
|
@@ -330,41 +354,42 @@ class Workflow(BaseModel):
|
|
330
354
|
This method allow workflow use log object to save the execution
|
331
355
|
result to log destination like file log to local `/logs` directory.
|
332
356
|
|
333
|
-
|
357
|
+
I will add sleep with 0.15 seconds on every step that interact with
|
358
|
+
the queue object.
|
359
|
+
|
360
|
+
:param runner: A CronRunner instance.
|
334
361
|
:param params: A workflow parameter that pass to execute method.
|
335
362
|
:param queue: A list of release time that already running.
|
336
363
|
:param waiting_sec: A second period value that allow workflow execute.
|
337
364
|
:param sleep_interval: A second value that want to waiting until time
|
338
365
|
to execute.
|
339
366
|
:param log: A log object that want to save execution result.
|
367
|
+
|
340
368
|
:rtype: Result
|
341
369
|
"""
|
342
370
|
logger.debug(
|
343
|
-
f"({self.run_id}) [CORE]: {self.name!r}: {
|
371
|
+
f"({self.run_id}) [CORE]: {self.name!r}: {runner.cron} : run with "
|
344
372
|
f"queue id: {id(queue)}"
|
345
373
|
)
|
346
374
|
log: Log = log or FileLog
|
347
|
-
|
348
|
-
datetime.now(tz=config.tz).replace(second=0, microsecond=0)
|
349
|
-
+ timedelta(seconds=1)
|
350
|
-
)
|
351
|
-
cron_tz: ZoneInfo = gen.tz
|
375
|
+
cron_tz: ZoneInfo = runner.tz
|
352
376
|
|
353
377
|
# NOTE: get next schedule time that generate from now.
|
354
|
-
next_time: datetime =
|
378
|
+
next_time: datetime = runner.next
|
355
379
|
|
356
380
|
# NOTE: While-loop to getting next until it does not logger.
|
357
|
-
while log.is_pointed(self.name, next_time
|
358
|
-
next_time: datetime =
|
381
|
+
while log.is_pointed(self.name, next_time) or (next_time in queue):
|
382
|
+
next_time: datetime = runner.next
|
359
383
|
|
360
384
|
# NOTE: Heap-push this next running time to log queue list.
|
361
385
|
heappush(queue, next_time)
|
386
|
+
time.sleep(0.15)
|
362
387
|
|
363
388
|
# VALIDATE: Check the different time between the next schedule time and
|
364
389
|
# now that less than waiting period (second unit).
|
365
390
|
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
366
391
|
logger.debug(
|
367
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {
|
392
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
|
368
393
|
f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
|
369
394
|
)
|
370
395
|
|
@@ -376,12 +401,15 @@ class Workflow(BaseModel):
|
|
376
401
|
status=0,
|
377
402
|
context={
|
378
403
|
"params": params,
|
379
|
-
"release": {
|
404
|
+
"release": {
|
405
|
+
"status": "skipped",
|
406
|
+
"cron": [str(runner.cron)],
|
407
|
+
},
|
380
408
|
},
|
381
409
|
)
|
382
410
|
|
383
411
|
logger.debug(
|
384
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {
|
412
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
|
385
413
|
f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
|
386
414
|
)
|
387
415
|
|
@@ -390,40 +418,34 @@ class Workflow(BaseModel):
|
|
390
418
|
sleep_interval + 5
|
391
419
|
): # pragma: no cov
|
392
420
|
logger.debug(
|
393
|
-
f"({self.run_id}) [CORE]: {self.name!r} : {
|
421
|
+
f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
|
394
422
|
f"Sleep until: {duration}"
|
395
423
|
)
|
396
424
|
time.sleep(sleep_interval)
|
397
425
|
|
398
|
-
time.sleep(0.
|
426
|
+
time.sleep(0.15)
|
399
427
|
|
400
|
-
# NOTE: Release parameter that use to change if params has
|
401
|
-
|
402
|
-
release_params: DictData = {
|
403
|
-
"release": {
|
404
|
-
"logical_date": next_time,
|
405
|
-
},
|
406
|
-
}
|
428
|
+
# NOTE: Release parameter that use to change if params has templating.
|
429
|
+
release_params: DictData = {"release": {"logical_date": next_time}}
|
407
430
|
|
408
|
-
# WARNING: Re-create workflow object that use new running workflow
|
409
|
-
|
410
|
-
|
411
|
-
rs: Result = runner.execute(
|
431
|
+
# WARNING: Re-create workflow object that use new running workflow ID.
|
432
|
+
workflow: Self = self.get_running_id(run_id=self.new_run_id)
|
433
|
+
rs: Result = workflow.execute(
|
412
434
|
params=param2template(params, release_params),
|
413
435
|
)
|
414
436
|
logger.debug(
|
415
|
-
f"({
|
437
|
+
f"({workflow.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
|
416
438
|
f"End release {next_time:%Y-%m-%d %H:%M:%S}"
|
417
439
|
)
|
418
440
|
|
419
441
|
# NOTE: Delete a copied workflow instance for saving memory.
|
420
|
-
del
|
442
|
+
del workflow
|
421
443
|
|
422
444
|
rs.set_parent_run_id(self.run_id)
|
423
445
|
rs_log: Log = log.model_validate(
|
424
446
|
{
|
425
447
|
"name": self.name,
|
426
|
-
"on": str(
|
448
|
+
"on": str(runner.cron),
|
427
449
|
"release": next_time,
|
428
450
|
"context": rs.context,
|
429
451
|
"parent_run_id": rs.run_id,
|
@@ -434,27 +456,31 @@ class Workflow(BaseModel):
|
|
434
456
|
rs_log.save(excluded=None)
|
435
457
|
|
436
458
|
queue.remove(next_time)
|
437
|
-
time.sleep(0.
|
459
|
+
time.sleep(0.15)
|
438
460
|
return Result(
|
439
461
|
status=0,
|
440
462
|
context={
|
441
463
|
"params": params,
|
442
|
-
"release": {"status": "run", "cron": [str(
|
464
|
+
"release": {"status": "run", "cron": [str(runner.cron)]},
|
443
465
|
},
|
444
466
|
)
|
445
467
|
|
446
468
|
def poke(
|
447
469
|
self,
|
470
|
+
start_date: datetime | None = None,
|
448
471
|
params: DictData | None = None,
|
449
472
|
*,
|
450
473
|
log: Log | None = None,
|
451
474
|
) -> list[Result]:
|
452
|
-
"""Poke workflow with threading executor pool for
|
453
|
-
schedules that was set on the `on` value.
|
454
|
-
schedule that nearing to run with the
|
475
|
+
"""Poke workflow with the ``on`` field with threading executor pool for
|
476
|
+
executing with all its schedules that was set on the `on` value.
|
477
|
+
This method will observe its schedule that nearing to run with the
|
478
|
+
``self.release()`` method.
|
455
479
|
|
480
|
+
:param start_date: A start datetime object.
|
456
481
|
:param params: A parameters that want to pass to the release method.
|
457
482
|
:param log: A log object that want to use on this poking process.
|
483
|
+
|
458
484
|
:rtype: list[Result]
|
459
485
|
"""
|
460
486
|
logger.info(
|
@@ -470,21 +496,32 @@ class Workflow(BaseModel):
|
|
470
496
|
queue: list[datetime] = []
|
471
497
|
results: list[Result] = []
|
472
498
|
|
499
|
+
start_date: datetime = start_date or datetime.now(tz=config.tz).replace(
|
500
|
+
second=0, microsecond=0
|
501
|
+
) + timedelta(seconds=1)
|
502
|
+
|
473
503
|
with ThreadPoolExecutor(
|
474
504
|
max_workers=config.max_poking_pool_worker,
|
475
505
|
thread_name_prefix="wf_poking_",
|
476
506
|
) as executor:
|
507
|
+
|
477
508
|
futures: list[Future] = []
|
509
|
+
|
510
|
+
# NOTE: For-loop the on values that exists in this workflow object.
|
478
511
|
for on in self.on:
|
479
512
|
futures.append(
|
480
513
|
executor.submit(
|
481
514
|
self.release,
|
482
|
-
on,
|
515
|
+
on.generate(start_date),
|
483
516
|
params=params,
|
484
517
|
log=log,
|
485
518
|
queue=queue,
|
486
519
|
)
|
487
520
|
)
|
521
|
+
|
522
|
+
# NOTE: Delay release date because it run so fast and making
|
523
|
+
# queue object can not handle release date that will duplicate
|
524
|
+
# by the cron runner object.
|
488
525
|
delay(second=0.15)
|
489
526
|
|
490
527
|
# WARNING: This poking method does not allow to use fail-fast logic
|
@@ -806,7 +843,7 @@ class ScheduleWorkflow(BaseModel):
|
|
806
843
|
)
|
807
844
|
params: DictData = Field(
|
808
845
|
default_factory=dict,
|
809
|
-
description="A parameters that want to use
|
846
|
+
description="A parameters that want to use in workflow execution.",
|
810
847
|
)
|
811
848
|
|
812
849
|
@model_validator(mode="before")
|
@@ -826,7 +863,10 @@ class ScheduleWorkflow(BaseModel):
|
|
826
863
|
data: DictData,
|
827
864
|
externals: DictData | None = None,
|
828
865
|
) -> DictData:
|
829
|
-
"""Bypass the on data to loaded config data.
|
866
|
+
"""Bypass and prepare the on data to loaded config data.
|
867
|
+
|
868
|
+
:param data:
|
869
|
+
:param externals:
|
830
870
|
|
831
871
|
:rtype: DictData
|
832
872
|
"""
|
@@ -850,6 +890,24 @@ class ScheduleWorkflow(BaseModel):
|
|
850
890
|
]
|
851
891
|
return data
|
852
892
|
|
893
|
+
@field_validator("on", mode="after")
|
894
|
+
def __on_no_dup__(cls, value: list[On]) -> list[On]:
|
895
|
+
"""Validate the on fields should not contain duplicate values and if it
|
896
|
+
contain every minute value, it should has only one on value."""
|
897
|
+
set_ons: set[str] = {str(on.cronjob) for on in value}
|
898
|
+
if len(set_ons) != len(value):
|
899
|
+
raise ValueError(
|
900
|
+
"The on fields should not contain duplicate on value."
|
901
|
+
)
|
902
|
+
|
903
|
+
# WARNING:
|
904
|
+
# if '* * * * *' in set_ons and len(set_ons) > 1:
|
905
|
+
# raise ValueError(
|
906
|
+
# "If it has every minute cronjob on value, it should has only "
|
907
|
+
# "one value in the on field."
|
908
|
+
# )
|
909
|
+
return value
|
910
|
+
|
853
911
|
|
854
912
|
class Schedule(BaseModel):
|
855
913
|
"""Schedule Pydantic Model that use to run with scheduler package. It does
|
@@ -868,6 +926,15 @@ class Schedule(BaseModel):
|
|
868
926
|
description="A list of ScheduleWorkflow models.",
|
869
927
|
)
|
870
928
|
|
929
|
+
@field_validator("desc", mode="after")
|
930
|
+
def __dedent_desc__(cls, value: str) -> str:
|
931
|
+
"""Prepare description string that was created on a template.
|
932
|
+
|
933
|
+
:param value: A description string value that want to dedent.
|
934
|
+
:rtype: str
|
935
|
+
"""
|
936
|
+
return dedent(value)
|
937
|
+
|
871
938
|
@classmethod
|
872
939
|
def from_loader(
|
873
940
|
cls,
|
@@ -913,40 +980,42 @@ class Schedule(BaseModel):
|
|
913
980
|
:param externals: An external parameters that pass to the Loader object.
|
914
981
|
|
915
982
|
:rtype: list[WorkflowTaskData]
|
983
|
+
:return: Return the list of WorkflowTaskData object from the specific
|
984
|
+
input datetime that mapping with the on field.
|
916
985
|
"""
|
917
986
|
|
918
987
|
# NOTE: Create pair of workflow and on.
|
919
988
|
workflow_tasks: list[WorkflowTaskData] = []
|
920
|
-
|
989
|
+
extras: DictData = externals or {}
|
921
990
|
|
922
|
-
for
|
923
|
-
wf: Workflow = Workflow.from_loader(
|
991
|
+
for sch_wf in self.workflows:
|
992
|
+
wf: Workflow = Workflow.from_loader(sch_wf.name, externals=extras)
|
924
993
|
|
925
994
|
# NOTE: Create default list of release datetime.
|
926
|
-
queue[
|
927
|
-
running[
|
995
|
+
queue[sch_wf.name]: list[datetime] = []
|
996
|
+
running[sch_wf.name]: list[datetime] = []
|
928
997
|
|
929
|
-
#
|
930
|
-
# Schedule object.
|
931
|
-
|
998
|
+
# IMPORTANT: Create the default 'on' value if it does not passing
|
999
|
+
# the on field to the Schedule object.
|
1000
|
+
ons: list[On] = wf.on.copy() if len(sch_wf.on) == 0 else sch_wf.on
|
932
1001
|
|
933
|
-
for on in
|
934
|
-
|
935
|
-
next_running_date =
|
1002
|
+
for on in ons:
|
1003
|
+
gen: CronRunner = on.generate(start_date)
|
1004
|
+
next_running_date = gen.next
|
936
1005
|
|
937
|
-
while next_running_date in queue[
|
938
|
-
next_running_date =
|
1006
|
+
while next_running_date in queue[sch_wf.name]:
|
1007
|
+
next_running_date = gen.next
|
939
1008
|
|
940
1009
|
# NOTE: Push the next running date to queue list.
|
941
|
-
heappush(queue[
|
1010
|
+
heappush(queue[sch_wf.name], next_running_date)
|
942
1011
|
|
943
1012
|
workflow_tasks.append(
|
944
1013
|
WorkflowTaskData(
|
945
1014
|
workflow=wf,
|
946
1015
|
on=on,
|
947
|
-
params=
|
948
|
-
queue=queue,
|
949
|
-
running=running,
|
1016
|
+
params=sch_wf.params,
|
1017
|
+
queue=queue[sch_wf.name],
|
1018
|
+
running=running[sch_wf.name],
|
950
1019
|
),
|
951
1020
|
)
|
952
1021
|
|
@@ -997,13 +1066,15 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
|
|
997
1066
|
class WorkflowTaskData:
|
998
1067
|
"""Workflow task dataclass that use to keep mapping data and objects for
|
999
1068
|
passing in multithreading task.
|
1069
|
+
|
1070
|
+
This dataclass will be 1-1 mapping with workflow and on objects.
|
1000
1071
|
"""
|
1001
1072
|
|
1002
1073
|
workflow: Workflow
|
1003
1074
|
on: On
|
1004
1075
|
params: DictData = field(compare=False, hash=False)
|
1005
|
-
queue:
|
1006
|
-
running:
|
1076
|
+
queue: list[datetime] = field(compare=False, hash=False)
|
1077
|
+
running: list[datetime] = field(compare=False, hash=False)
|
1007
1078
|
|
1008
1079
|
@catch_exceptions(cancel_on_failure=True)
|
1009
1080
|
def release(
|
@@ -1013,8 +1084,8 @@ class WorkflowTaskData:
|
|
1013
1084
|
waiting_sec: int = 60,
|
1014
1085
|
sleep_interval: int = 15,
|
1015
1086
|
) -> None: # pragma: no cov
|
1016
|
-
"""Workflow release
|
1017
|
-
|
1087
|
+
"""Workflow task release that use the same logic of `workflow.release`
|
1088
|
+
method.
|
1018
1089
|
|
1019
1090
|
:param log: A log object for saving result logging from workflow
|
1020
1091
|
execution process.
|
@@ -1035,14 +1106,14 @@ class WorkflowTaskData:
|
|
1035
1106
|
next_time: datetime = gen.next
|
1036
1107
|
|
1037
1108
|
# NOTE: get next utils it does not running.
|
1038
|
-
while log.is_pointed(wf.name, next_time
|
1109
|
+
while log.is_pointed(wf.name, next_time) or (next_time in self.running):
|
1039
1110
|
next_time: datetime = gen.next
|
1040
1111
|
|
1041
1112
|
logger.debug(
|
1042
1113
|
f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
|
1043
1114
|
f"{next_time:%Y-%m-%d %H:%M:%S}"
|
1044
1115
|
)
|
1045
|
-
heappush(self.running
|
1116
|
+
heappush(self.running, next_time)
|
1046
1117
|
|
1047
1118
|
if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
|
1048
1119
|
logger.debug(
|
@@ -1052,8 +1123,8 @@ class WorkflowTaskData:
|
|
1052
1123
|
|
1053
1124
|
# NOTE: Add this next running datetime that not in period to queue
|
1054
1125
|
# and remove it to running.
|
1055
|
-
self.running
|
1056
|
-
heappush(self.queue
|
1126
|
+
self.running.remove(next_time)
|
1127
|
+
heappush(self.queue, next_time)
|
1057
1128
|
|
1058
1129
|
time.sleep(0.2)
|
1059
1130
|
return
|
@@ -1114,7 +1185,7 @@ class WorkflowTaskData:
|
|
1114
1185
|
rs_log.save(excluded=None)
|
1115
1186
|
|
1116
1187
|
# NOTE: remove this release date from running
|
1117
|
-
self.running
|
1188
|
+
self.running.remove(next_time)
|
1118
1189
|
|
1119
1190
|
# IMPORTANT:
|
1120
1191
|
# Add the next running datetime to workflow queue
|
@@ -1123,13 +1194,13 @@ class WorkflowTaskData:
|
|
1123
1194
|
)
|
1124
1195
|
future_running_time: datetime = gen.next
|
1125
1196
|
while (
|
1126
|
-
future_running_time in self.running
|
1127
|
-
or future_running_time in self.queue
|
1197
|
+
future_running_time in self.running
|
1198
|
+
or future_running_time in self.queue
|
1128
1199
|
or future_running_time < finish_time
|
1129
1200
|
): # pragma: no cov
|
1130
1201
|
future_running_time: datetime = gen.next
|
1131
1202
|
|
1132
|
-
heappush(self.queue
|
1203
|
+
heappush(self.queue, future_running_time)
|
1133
1204
|
logger.debug(f"[CORE]: {'-' * 100}")
|
1134
1205
|
|
1135
1206
|
def __eq__(self, other) -> bool:
|
@@ -1277,7 +1348,7 @@ def workflow_control(
|
|
1277
1348
|
"Should install schedule package before use this module."
|
1278
1349
|
) from None
|
1279
1350
|
|
1280
|
-
|
1351
|
+
scheduler: Scheduler = Scheduler()
|
1281
1352
|
start_date: datetime = datetime.now(tz=config.tz)
|
1282
1353
|
|
1283
1354
|
# NOTE: Design workflow queue caching.
|
@@ -1295,9 +1366,11 @@ def workflow_control(
|
|
1295
1366
|
# NOTE: Create pair of workflow and on from schedule model.
|
1296
1367
|
workflow_tasks: list[WorkflowTaskData] = []
|
1297
1368
|
for name in schedules:
|
1298
|
-
|
1369
|
+
schedule: Schedule = Schedule.from_loader(name, externals=externals)
|
1370
|
+
|
1371
|
+
# NOTE: Create a workflow task data instance from schedule object.
|
1299
1372
|
workflow_tasks.extend(
|
1300
|
-
|
1373
|
+
schedule.tasks(
|
1301
1374
|
start_date_waiting,
|
1302
1375
|
queue=wf_queue,
|
1303
1376
|
running=wf_running,
|
@@ -1307,7 +1380,7 @@ def workflow_control(
|
|
1307
1380
|
|
1308
1381
|
# NOTE: This schedule job will start every minute at :02 seconds.
|
1309
1382
|
(
|
1310
|
-
|
1383
|
+
scheduler.every(1)
|
1311
1384
|
.minutes.at(":02")
|
1312
1385
|
.do(
|
1313
1386
|
workflow_task,
|
@@ -1319,7 +1392,7 @@ def workflow_control(
|
|
1319
1392
|
)
|
1320
1393
|
|
1321
1394
|
# NOTE: Checking zombie task with schedule job will start every 5 minute.
|
1322
|
-
|
1395
|
+
scheduler.every(5).minutes.at(":10").do(
|
1323
1396
|
workflow_monitor,
|
1324
1397
|
threads=thread_releases,
|
1325
1398
|
).tag("monitor")
|
@@ -1327,10 +1400,10 @@ def workflow_control(
|
|
1327
1400
|
# NOTE: Start running schedule
|
1328
1401
|
logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
|
1329
1402
|
while True:
|
1330
|
-
|
1403
|
+
scheduler.run_pending()
|
1331
1404
|
time.sleep(1)
|
1332
|
-
if not
|
1333
|
-
|
1405
|
+
if not scheduler.get_jobs("control"):
|
1406
|
+
scheduler.clear("monitor")
|
1334
1407
|
logger.warning(
|
1335
1408
|
f"[WORKFLOW]: Workflow release thread: {thread_releases}"
|
1336
1409
|
)
|
@@ -1367,14 +1440,14 @@ def workflow_runner(
|
|
1367
1440
|
|
1368
1441
|
The current workflow logic that split to process will be below diagram:
|
1369
1442
|
|
1370
|
-
PIPELINES ==> process 01 ==> schedule
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
==> process 02 ==> schedule
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1443
|
+
PIPELINES ==> process 01 ==> schedule --> thread of release
|
1444
|
+
workflow task 01 01
|
1445
|
+
--> thread of release
|
1446
|
+
workflow task 01 02
|
1447
|
+
==> process 02 ==> schedule --> thread of release
|
1448
|
+
workflow task 02 01
|
1449
|
+
--> thread of release
|
1450
|
+
workflow task 02 02
|
1378
1451
|
==> ...
|
1379
1452
|
"""
|
1380
1453
|
excluded: list[str] = excluded or []
|
ddeutil/workflow/utils.py
CHANGED
@@ -445,6 +445,7 @@ FILTERS: dict[str, callable] = { # pragma: no cov
|
|
445
445
|
"abs": abs,
|
446
446
|
"str": str,
|
447
447
|
"int": int,
|
448
|
+
"title": lambda x: x.title(),
|
448
449
|
"upper": lambda x: x.upper(),
|
449
450
|
"lower": lambda x: x.lower(),
|
450
451
|
"rstr": [str, repr],
|
@@ -549,6 +550,30 @@ def get_args_const(
|
|
549
550
|
return name.id, args, keywords
|
550
551
|
|
551
552
|
|
553
|
+
def get_args_from_filter(
|
554
|
+
ft: str,
|
555
|
+
filters: dict[str, FilterRegistry],
|
556
|
+
) -> tuple[str, FilterRegistry, list[Any], dict[Any, Any]]: # pragma: no cov
|
557
|
+
"""Get arguments and keyword-arguments from filter function calling string.
|
558
|
+
and validate it with the filter functions mapping dict.
|
559
|
+
"""
|
560
|
+
func_name, _args, _kwargs = get_args_const(ft)
|
561
|
+
args: list[Any] = [arg.value for arg in _args]
|
562
|
+
kwargs: dict[Any, Any] = {k: v.value for k, v in _kwargs.items()}
|
563
|
+
|
564
|
+
if func_name not in filters:
|
565
|
+
raise UtilException(
|
566
|
+
f"The post-filter: {func_name} does not support yet."
|
567
|
+
)
|
568
|
+
|
569
|
+
if isinstance((f_func := filters[func_name]), list) and (args or kwargs):
|
570
|
+
raise UtilException(
|
571
|
+
"Chain filter function does not support for passing arguments."
|
572
|
+
)
|
573
|
+
|
574
|
+
return func_name, f_func, args, kwargs
|
575
|
+
|
576
|
+
|
552
577
|
@custom_filter("fmt") # pragma: no cov
|
553
578
|
def datetime_format(value: datetime, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
|
554
579
|
"""Format datetime object to string with the format."""
|
@@ -573,28 +598,16 @@ def map_post_filter(
|
|
573
598
|
|
574
599
|
:rtype: T
|
575
600
|
"""
|
576
|
-
for
|
577
|
-
func_name,
|
578
|
-
args: list = [arg.value for arg in _args]
|
579
|
-
kwargs: dict = {k: v.value for k, v in _kwargs.items()}
|
580
|
-
|
581
|
-
if func_name not in filters:
|
582
|
-
raise UtilException(
|
583
|
-
f"The post-filter: {func_name} does not support yet."
|
584
|
-
)
|
585
|
-
|
601
|
+
for ft in post_filter:
|
602
|
+
func_name, f_func, args, kwargs = get_args_from_filter(ft, filters)
|
586
603
|
try:
|
587
|
-
if isinstance(
|
588
|
-
if args or kwargs:
|
589
|
-
raise UtilException(
|
590
|
-
"Chain filter function does not support for passing "
|
591
|
-
"arguments."
|
592
|
-
)
|
604
|
+
if isinstance(f_func, list):
|
593
605
|
for func in f_func:
|
594
|
-
value:
|
606
|
+
value: T = func(value)
|
595
607
|
else:
|
596
|
-
value:
|
597
|
-
except UtilException:
|
608
|
+
value: T = f_func(value, *args, **kwargs)
|
609
|
+
except UtilException as err:
|
610
|
+
logger.warning(str(err))
|
598
611
|
raise
|
599
612
|
except Exception as err:
|
600
613
|
logger.warning(str(err))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ddeutil-workflow
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.19
|
4
4
|
Summary: Lightweight workflow orchestration with less dependencies
|
5
5
|
Author-email: ddeutils <korawich.anu@gmail.com>
|
6
6
|
License: MIT
|
@@ -46,7 +46,7 @@ for easy to make a simple metadata driven for data workflow orchestration.
|
|
46
46
|
It can to use for data operator by a `.yaml` template.
|
47
47
|
|
48
48
|
> [!WARNING]
|
49
|
-
> This package provide only orchestration workload
|
49
|
+
> This package provide only orchestration workload. That mean you should not
|
50
50
|
> use the workflow stage to process any large volume data which use lot of compute
|
51
51
|
> resource. :cold_sweat:
|
52
52
|
|
@@ -58,10 +58,10 @@ configuration. It called **Metadata Driven Data Workflow**.
|
|
58
58
|
|
59
59
|
**:pushpin: <u>Rules of This Workflow engine</u>**:
|
60
60
|
|
61
|
-
1. Minimum frequency unit of scheduling is **1 minute** :warning:
|
61
|
+
1. The Minimum frequency unit of scheduling is **1 minute** :warning:
|
62
62
|
2. Can not re-run only failed stage and its pending downstream :rotating_light:
|
63
63
|
3. All parallel tasks inside workflow engine use Multi-Threading
|
64
|
-
(
|
64
|
+
(Python 3.13 unlock GIL :unlock:)
|
65
65
|
|
66
66
|
> [!NOTE]
|
67
67
|
> _Disclaimer_: I inspire the dynamic statement from the [**GitHub Action**](https://github.com/features/actions)
|
@@ -183,7 +183,7 @@ application. If any configuration values do not set yet, it will use default val
|
|
183
183
|
and do not raise any error to you.
|
184
184
|
|
185
185
|
| Environment | Component | Default | Description | Remark |
|
186
|
-
|
186
|
+
|:----------------------------------------|:----------|:---------------------------------|--------------------------------------------------------------------------------------------------------------------|--------|
|
187
187
|
| `WORKFLOW_ROOT_PATH` | Core | . | The root path of the workflow application. | |
|
188
188
|
| `WORKFLOW_CORE_REGISTRY` | Core | src.ddeutil.workflow,tests.utils | List of importable string for the hook stage. | |
|
189
189
|
| `WORKFLOW_CORE_REGISTRY_FILTER` | Core | ddeutil.workflow.utils | List of importable string for the filter template. | |
|
@@ -0,0 +1,21 @@
|
|
1
|
+
ddeutil/workflow/__about__.py,sha256=GGHkQYD3vWi0C2DiWlU64oocfEx-Bn3LsXBorKFxtlM,28
|
2
|
+
ddeutil/workflow/__cron.py,sha256=KUCSdx30juyX6IE6Dal8T_qSudOiaD02r1SRHFJp7IM,25778
|
3
|
+
ddeutil/workflow/__init__.py,sha256=HA0tjGBXJItNPsAqvhnFUXU0fP0K6iMMfMtJ37tRwcw,1385
|
4
|
+
ddeutil/workflow/__types.py,sha256=yizLXzjQpBt_WPaof2pIyncitJvYeksw4Q1zYJeuCLA,3707
|
5
|
+
ddeutil/workflow/api.py,sha256=vUT2RVS9sF3hvY-IrzAEnahxwq4ZFYP0G3xfctHbNsw,4701
|
6
|
+
ddeutil/workflow/cli.py,sha256=baHhvtI8snbHYHeThoX401Cd6SMB2boyyCbCtTrIl3E,3278
|
7
|
+
ddeutil/workflow/conf.py,sha256=3xJPHIQcY4Q7rJoe0V8CUVHiEt_kww_bmr1f6MhcyCM,15420
|
8
|
+
ddeutil/workflow/exceptions.py,sha256=Uf1-Tn8rAzj0aiVHSqo4fBqO80W0za7UFZgKv24E-tg,706
|
9
|
+
ddeutil/workflow/job.py,sha256=Ww1zjviDCfTVUC_q7e3HHJwk3KXEFZxzGROQXoi_JS8,24349
|
10
|
+
ddeutil/workflow/on.py,sha256=slaNJr2RWBEmAmEUcW0S99qD45ENUUgAGka5XoZ6Yag,7937
|
11
|
+
ddeutil/workflow/repeat.py,sha256=s0azh-f5JQeow7kpxM8GKlqgAmKL7oU6St3L4Ggx4cY,4925
|
12
|
+
ddeutil/workflow/route.py,sha256=JALwOH6xKu5rnII7DgA1Lbp_E5ehCoBbOW_eKqB_Olk,6753
|
13
|
+
ddeutil/workflow/scheduler.py,sha256=0xE3bjIMe4eguo24rotOt6JfTy78tgst_qe7csSlt4k,50477
|
14
|
+
ddeutil/workflow/stage.py,sha256=6Ng3RiCSrnQ-FUsRRcuG2ClMD6ifiQlgyBFi6tohfxI,25455
|
15
|
+
ddeutil/workflow/utils.py,sha256=ETzixrfrXhacAm06agnvI1E8UZKjKCkKempJnW9KKes,25581
|
16
|
+
ddeutil_workflow-0.0.19.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
+
ddeutil_workflow-0.0.19.dist-info/METADATA,sha256=zTUO4MZi08V0upHMA8xmH8q7ms5M7Eot0lflQyGeaXw,13597
|
18
|
+
ddeutil_workflow-0.0.19.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
19
|
+
ddeutil_workflow-0.0.19.dist-info/entry_points.txt,sha256=0BVOgO3LdUdXVZ-CiHHDKxzEk2c8J30jEwHeKn2YCWI,62
|
20
|
+
ddeutil_workflow-0.0.19.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
21
|
+
ddeutil_workflow-0.0.19.dist-info/RECORD,,
|
@@ -1,21 +0,0 @@
|
|
1
|
-
ddeutil/workflow/__about__.py,sha256=b5h9QJ6GhQ-EDPZTcMYoeJZy8blWgeG9xjpFBHrVLPg,28
|
2
|
-
ddeutil/workflow/__cron.py,sha256=ZiuV4ASkXvAyFJYxEb9PKiAFNYnUt4AJozu_kH3pI4U,25777
|
3
|
-
ddeutil/workflow/__init__.py,sha256=HA0tjGBXJItNPsAqvhnFUXU0fP0K6iMMfMtJ37tRwcw,1385
|
4
|
-
ddeutil/workflow/__types.py,sha256=yizLXzjQpBt_WPaof2pIyncitJvYeksw4Q1zYJeuCLA,3707
|
5
|
-
ddeutil/workflow/api.py,sha256=vUT2RVS9sF3hvY-IrzAEnahxwq4ZFYP0G3xfctHbNsw,4701
|
6
|
-
ddeutil/workflow/cli.py,sha256=baHhvtI8snbHYHeThoX401Cd6SMB2boyyCbCtTrIl3E,3278
|
7
|
-
ddeutil/workflow/conf.py,sha256=4j7m2blvCPlz_me4SBHf_exViUK3ZLLBCwldPztHJKo,15390
|
8
|
-
ddeutil/workflow/exceptions.py,sha256=Uf1-Tn8rAzj0aiVHSqo4fBqO80W0za7UFZgKv24E-tg,706
|
9
|
-
ddeutil/workflow/job.py,sha256=kSllDDiSnDpyFnIT9-Sum6OHQ16Pn5h2t5_-XljHbgk,23979
|
10
|
-
ddeutil/workflow/on.py,sha256=rneZB5HyFWTBWriGef999bovA3glQIK6LTgC996q9Gc,7334
|
11
|
-
ddeutil/workflow/repeat.py,sha256=s0azh-f5JQeow7kpxM8GKlqgAmKL7oU6St3L4Ggx4cY,4925
|
12
|
-
ddeutil/workflow/route.py,sha256=JALwOH6xKu5rnII7DgA1Lbp_E5ehCoBbOW_eKqB_Olk,6753
|
13
|
-
ddeutil/workflow/scheduler.py,sha256=baCYbv5f8HiQgV36fUvkkUpSiIRhrznuwKefsgKjHv4,47546
|
14
|
-
ddeutil/workflow/stage.py,sha256=6Ng3RiCSrnQ-FUsRRcuG2ClMD6ifiQlgyBFi6tohfxI,25455
|
15
|
-
ddeutil/workflow/utils.py,sha256=ouuQ3mqjKVzuchCcvVelo8Hh8c6UJ4_lHPqejcxNDRA,25147
|
16
|
-
ddeutil_workflow-0.0.18.dist-info/LICENSE,sha256=nGFZ1QEhhhWeMHf9n99_fdt4vQaXS29xWKxt-OcLywk,1085
|
17
|
-
ddeutil_workflow-0.0.18.dist-info/METADATA,sha256=VLZchB_AWG5kMf7RYFTyKI3zkxWdj7k942f_XZpBxyQ,13606
|
18
|
-
ddeutil_workflow-0.0.18.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
19
|
-
ddeutil_workflow-0.0.18.dist-info/entry_points.txt,sha256=0BVOgO3LdUdXVZ-CiHHDKxzEk2c8J30jEwHeKn2YCWI,62
|
20
|
-
ddeutil_workflow-0.0.18.dist-info/top_level.txt,sha256=m9M6XeSWDwt_yMsmH6gcOjHZVK5O0-vgtNBuncHjzW4,8
|
21
|
-
ddeutil_workflow-0.0.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|