ddeutil-workflow 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@ from functools import wraps
33
33
  from heapq import heappop, heappush
34
34
  from textwrap import dedent
35
35
  from threading import Thread
36
- from typing import Callable, Optional, TypedDict
36
+ from typing import Callable, Optional, TypedDict, Union
37
37
 
38
38
  from pydantic import BaseModel, Field
39
39
  from pydantic.functional_validators import field_validator, model_validator
@@ -41,7 +41,7 @@ from typing_extensions import Self
41
41
 
42
42
  try:
43
43
  from typing import ParamSpec
44
- except ImportError:
44
+ except ImportError: # pragma: no cov
45
45
  from typing_extensions import ParamSpec
46
46
 
47
47
  try:
@@ -53,11 +53,9 @@ from .__cron import CronRunner
53
53
  from .__types import DictData, TupleStr
54
54
  from .conf import Loader, Log, config, get_log, get_logger
55
55
  from .cron import On
56
- from .exceptions import WorkflowException
57
- from .utils import (
58
- batch,
59
- delay,
60
- )
56
+ from .exceptions import ScheduleException, WorkflowException
57
+ from .result import Result
58
+ from .utils import batch, delay
61
59
  from .workflow import Release, ReleaseQueue, Workflow, WorkflowTask
62
60
 
63
61
  P = ParamSpec("P")
@@ -69,7 +67,7 @@ logging.getLogger("schedule").setLevel(logging.INFO)
69
67
 
70
68
  __all__: TupleStr = (
71
69
  "Schedule",
72
- "WorkflowSchedule",
70
+ "ScheduleWorkflow",
73
71
  "schedule_task",
74
72
  "monitor",
75
73
  "schedule_control",
@@ -79,8 +77,8 @@ __all__: TupleStr = (
79
77
  )
80
78
 
81
79
 
82
- class WorkflowSchedule(BaseModel):
83
- """Workflow Schedule Pydantic model that use to keep workflow model for
80
+ class ScheduleWorkflow(BaseModel):
81
+ """Schedule Workflow Pydantic model that use to keep workflow model for
84
82
  the Schedule model. it should not use Workflow model directly because on the
85
83
  schedule config it can adjust crontab value that different from the Workflow
86
84
  model.
@@ -233,9 +231,9 @@ class Schedule(BaseModel):
233
231
  "A schedule description that can be string of markdown content."
234
232
  ),
235
233
  )
236
- workflows: list[WorkflowSchedule] = Field(
234
+ workflows: list[ScheduleWorkflow] = Field(
237
235
  default_factory=list,
238
- description="A list of WorkflowSchedule models.",
236
+ description="A list of ScheduleWorkflow models.",
239
237
  )
240
238
 
241
239
  @field_validator("desc", mode="after")
@@ -258,7 +256,7 @@ class Schedule(BaseModel):
258
256
  an input schedule name. The loader object will use this schedule name to
259
257
  searching configuration data of this schedule model in conf path.
260
258
 
261
- :param name: A schedule name that want to pass to Loader object.
259
+ :param name: (str) A schedule name that want to pass to Loader object.
262
260
  :param externals: An external parameters that want to pass to Loader
263
261
  object.
264
262
 
@@ -310,8 +308,102 @@ class Schedule(BaseModel):
310
308
 
311
309
  return workflow_tasks
312
310
 
311
+ def pending(
312
+ self,
313
+ *,
314
+ stop: datetime | None = None,
315
+ externals: DictData | None = None,
316
+ log: type[Log] | None = None,
317
+ ) -> None: # pragma: no cov
318
+ """Pending this schedule tasks with the schedule package.
319
+
320
+ :param stop: A datetime value that use to stop running schedule.
321
+ :param externals: An external parameters that pass to Loader.
322
+ :param log: A log class that use on the workflow task release for
323
+ writing its release log context.
324
+ """
325
+ try:
326
+ from schedule import Scheduler
327
+ except ImportError:
328
+ raise ImportError(
329
+ "Should install schedule package before use this method."
330
+ ) from None
331
+
332
+ # NOTE: Get default logging.
333
+ log: type[Log] = log or get_log()
334
+ scheduler: Scheduler = Scheduler()
335
+
336
+ # NOTE: Create the start and stop datetime.
337
+ start_date: datetime = datetime.now(tz=config.tz)
338
+ stop_date: datetime = stop or (start_date + config.stop_boundary_delta)
339
+
340
+ # IMPORTANT: Create main mapping of queue and thread object.
341
+ queue: dict[str, ReleaseQueue] = {}
342
+ threads: ReleaseThreads = {}
313
343
 
314
- ReturnCancelJob = Callable[P, Optional[CancelJob]]
344
+ start_date_waiting: datetime = start_date.replace(
345
+ second=0, microsecond=0
346
+ ) + timedelta(minutes=1)
347
+
348
+ # NOTE: This schedule job will start every minute at :02 seconds.
349
+ (
350
+ scheduler.every(1)
351
+ .minutes.at(":02")
352
+ .do(
353
+ schedule_task,
354
+ tasks=self.tasks(
355
+ start_date_waiting, queue=queue, externals=externals
356
+ ),
357
+ stop=stop_date,
358
+ queue=queue,
359
+ threads=threads,
360
+ log=log,
361
+ )
362
+ .tag("control")
363
+ )
364
+
365
+ # NOTE: Checking zombie task with schedule job will start every 5 minute at
366
+ # :10 seconds.
367
+ (
368
+ scheduler.every(5)
369
+ .minutes.at(":10")
370
+ .do(
371
+ monitor,
372
+ threads=threads,
373
+ )
374
+ .tag("monitor")
375
+ )
376
+
377
+ # NOTE: Start running schedule
378
+ logger.info(
379
+ f"[SCHEDULE]: Schedule with stopper: {stop_date:%Y-%m-%d %H:%M:%S}"
380
+ )
381
+
382
+ while True:
383
+ scheduler.run_pending()
384
+ time.sleep(1)
385
+
386
+ # NOTE: Break the scheduler when the control job does not exist.
387
+ if not scheduler.get_jobs("control"):
388
+ scheduler.clear("monitor")
389
+
390
+ while len(threads) > 0:
391
+ logger.warning(
392
+ "[SCHEDULE]: Waiting schedule release thread that still "
393
+ "running in background."
394
+ )
395
+ delay(10)
396
+ monitor(threads)
397
+
398
+ break
399
+
400
+ logger.warning(
401
+ f"[SCHEDULE]: Queue: {[list(queue[wf].queue) for wf in queue]}"
402
+ )
403
+
404
+
405
+ ResultOrCancelJob = Union[type[CancelJob], Result]
406
+ ReturnCancelJob = Callable[P, ResultOrCancelJob]
315
407
  DecoratorCancelJob = Callable[[ReturnCancelJob], ReturnCancelJob]
316
408
 
317
409
 
@@ -326,24 +418,25 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
326
418
  """
327
419
 
328
420
  def decorator(func: ReturnCancelJob) -> ReturnCancelJob: # pragma: no cov
329
- try:
330
421
 
331
- @wraps(func)
332
- def wrapper(*args, **kwargs):
422
+ @wraps(func)
423
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> ResultOrCancelJob:
424
+ try:
333
425
  return func(*args, **kwargs)
426
+ except Exception as err:
427
+ logger.exception(err)
428
+ if cancel_on_failure:
429
+ return CancelJob
430
+ raise err
334
431
 
335
- return wrapper
336
-
337
- except Exception as err:
338
- logger.exception(err)
339
- if cancel_on_failure:
340
- return CancelJob
341
- raise err
432
+ return wrapper
342
433
 
343
434
  return decorator
344
435
 
345
436
 
346
437
  class ReleaseThread(TypedDict):
438
+ """TypeDict for the release thread."""
439
+
347
440
  thread: Thread
348
441
  start_date: datetime
349
442
 
@@ -358,11 +451,13 @@ def schedule_task(
358
451
  queue: dict[str, ReleaseQueue],
359
452
  threads: ReleaseThreads,
360
453
  log: type[Log],
361
- ) -> CancelJob | None:
362
- """Workflow task generator that create release pair of workflow and on to
363
- the threading in background.
454
+ ) -> type[CancelJob] | None:
455
+ """Schedule task function that generate thread of workflow task release
456
+ method in background. This function do the same logic as the workflow poke
457
+ method, but it runs with map of schedules and the on values.
364
458
 
365
- This workflow task will start every minute at ':02' second.
459
+ This schedule task start runs every minute at ':02' second, and it does
460
+ not allow you to run with offset time.
366
461
 
367
462
  :param tasks: A list of WorkflowTask object.
368
463
  :param stop: A stop datetime object that force stop running scheduler.
@@ -370,7 +465,7 @@ def schedule_task(
370
465
  :param threads: A mapping of alias name and Thread object.
371
466
  :param log: A log class that want to make log object.
372
467
 
373
- :rtype: CancelJob | None
468
+ :rtype: type[CancelJob] | None
374
469
  """
375
470
  current_date: datetime = datetime.now(tz=config.tz)
376
471
  if current_date > stop.replace(tzinfo=config.tz):
@@ -381,15 +476,16 @@ def schedule_task(
381
476
  # function. It will deplicate running with different schedule value
382
477
  # because I use current time in this condition.
383
478
  #
384
- # For example, if a workflow A queue has '00:02:00' time that
385
- # should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
386
- # This condition will release with 2 threading job.
479
+ # For example, if a queue has a time release be '00:02:00' that should
480
+ # to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
481
+ # This condition make this function create 2 threading tasks.
387
482
  #
388
- # '00:02:00' --> '*/2 * * * *' --> running
389
- # --> '*/35 * * * *' --> skip
483
+ # '00:02:00' --> '*/2 * * * *' --> run
484
+ # --> '*/35 * * * *' --> skip
390
485
  #
391
486
  for task in tasks:
392
487
 
488
+ # NOTE: Get the ReleaseQueue with an alias of the WorkflowTask.
393
489
  q: ReleaseQueue = queue[task.alias]
394
490
 
395
491
  # NOTE: Start adding queue and move the runner date in the WorkflowTask.
@@ -410,12 +506,17 @@ def schedule_task(
410
506
  current_release: datetime = current_date.replace(
411
507
  second=0, microsecond=0
412
508
  )
413
- if (first_date := q.first_queue.date) != current_release:
509
+ if (first_date := q.first_queue.date) > current_release:
414
510
  logger.debug(
415
511
  f"[WORKFLOW]: Skip schedule "
416
512
  f"{first_date:%Y-%m-%d %H:%M:%S} for : {task.alias!r}"
417
513
  )
418
514
  continue
515
+ elif first_date < current_release: # pragma: no cov
516
+ raise ScheduleException(
517
+ "The first release date from queue should not less than current"
518
+ "release date."
519
+ )
419
520
 
420
521
  # NOTE: Pop the latest release and push it to running.
421
522
  release: Release = heappop(q.queue)
@@ -445,7 +546,7 @@ def schedule_task(
445
546
 
446
547
  delay()
447
548
 
448
- logger.debug(f"[SCHEDULE]: End schedule release {'=' * 80}")
549
+ logger.debug(f"[SCHEDULE]: End schedule task {'=' * 80}")
449
550
 
450
551
 
451
552
  def monitor(threads: ReleaseThreads) -> None: # pragma: no cov
@@ -455,9 +556,7 @@ def monitor(threads: ReleaseThreads) -> None: # pragma: no cov
455
556
  :param threads: A mapping of Thread object and its name.
456
557
  :type threads: ReleaseThreads
457
558
  """
458
- logger.debug(
459
- "[MONITOR]: Start checking long running workflow release task."
460
- )
559
+ logger.debug("[MONITOR]: Start checking long running schedule task.")
461
560
 
462
561
  snapshot_threads: list[str] = list(threads.keys())
463
562
  for t_name in snapshot_threads:
@@ -476,12 +575,15 @@ def schedule_control(
476
575
  *,
477
576
  log: type[Log] | None = None,
478
577
  ) -> list[str]: # pragma: no cov
479
- """Scheduler control function that running every minute.
578
+ """Scheduler control function that run the chuck of schedules every minute
579
+ and this function release monitoring thread for tracking undead thread in
580
+ the background.
480
581
 
481
582
  :param schedules: A list of workflow names that want to schedule running.
482
583
  :param stop: A datetime value that use to stop running schedule.
483
584
  :param externals: An external parameters that pass to Loader.
484
- :param log:
585
+ :param log: A log class that use on the workflow task release for writing
586
+ its release log context.
485
587
 
486
588
  :rtype: list[str]
487
589
  """
@@ -493,8 +595,11 @@ def schedule_control(
493
595
  "Should install schedule package before use this module."
494
596
  ) from None
495
597
 
598
+ # NOTE: Get default logging.
496
599
  log: type[Log] = log or get_log()
497
600
  scheduler: Scheduler = Scheduler()
601
+
602
+ # NOTE: Create the start and stop datetime.
498
603
  start_date: datetime = datetime.now(tz=config.tz)
499
604
  stop_date: datetime = stop or (start_date + config.stop_boundary_delta)
500
605
 
@@ -506,7 +611,6 @@ def schedule_control(
506
611
  second=0, microsecond=0
507
612
  ) + timedelta(minutes=1)
508
613
 
509
- # NOTE: Start create workflow tasks from list of schedule name.
510
614
  tasks: list[WorkflowTask] = []
511
615
  for name in schedules:
512
616
  schedule: Schedule = Schedule.from_loader(name, externals=externals)
@@ -533,7 +637,8 @@ def schedule_control(
533
637
  .tag("control")
534
638
  )
535
639
 
536
- # NOTE: Checking zombie task with schedule job will start every 5 minute.
640
+ # NOTE: Checking zombie task with schedule job will start every 5 minute at
641
+ # :10 seconds.
537
642
  (
538
643
  scheduler.every(5)
539
644
  .minutes.at(":10")
@@ -563,7 +668,7 @@ def schedule_control(
563
668
  "[SCHEDULE]: Waiting schedule release thread that still "
564
669
  "running in background."
565
670
  )
566
- delay(15)
671
+ delay(10)
567
672
  monitor(threads)
568
673
 
569
674
  break
@@ -579,16 +684,15 @@ def schedule_runner(
579
684
  externals: DictData | None = None,
580
685
  excluded: list[str] | None = None,
581
686
  ) -> list[str]: # pragma: no cov
582
- """Schedule runner function for start submit the ``schedule_control`` func
583
- in multiprocessing pool with chunk of schedule config that exists in config
584
- path by ``WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS``.
687
+ """Schedule runner function it the multiprocess controller function for
688
+ split the setting schedule to the `schedule_control` function on the
689
+ process pool. It chunks schedule configs that exists in config
690
+ path by `WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS` value.
585
691
 
586
692
  :param stop: A stop datetime object that force stop running scheduler.
587
693
  :param externals:
588
694
  :param excluded: A list of schedule name that want to exclude from finding.
589
695
 
590
- :rtype: list[str]
591
-
592
696
  This function will get all workflows that include on value that was
593
697
  created in config path and chuck it with application config variable
594
698
  ``WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS`` env var to multiprocess executor
@@ -600,7 +704,9 @@ def schedule_runner(
600
704
  --> thread of release task 01 02
601
705
  ==> schedule --> thread of release task 02 01
602
706
  --> thread of release task 02 02
603
- ==> process 02
707
+ ==> process 02 ==> ...
708
+
709
+ :rtype: list[str]
604
710
  """
605
711
  results: list[str] = []
606
712
 
ddeutil/workflow/stage.py CHANGED
@@ -328,7 +328,7 @@ class BashStage(BaseStage):
328
328
  If your current OS is Windows, it will run on the bash in the WSL.
329
329
 
330
330
  I get some limitation when I run shell statement with the built-in
331
- supprocess package. It does not good enough to use multiline statement.
331
+ subprocess package. It does not good enough to use multiline statement.
332
332
  Thus, I add writing ``.sh`` file before execution process for fix this
333
333
  issue.
334
334
 
@@ -665,3 +665,15 @@ Stage = Union[
665
665
  TriggerStage,
666
666
  EmptyStage,
667
667
  ]
668
+
669
+
670
+ # TODO: Not implement this stages yet
671
+ class ParallelStage(BaseModel): # pragma: no cov
672
+ parallel: list[Stage]
673
+ max_parallel_core: int = Field(default=2)
674
+
675
+
676
+ # TODO: Not implement this stages yet
677
+ class ForEachStage(BaseModel): # pragma: no cov
678
+ foreach: list[str]
679
+ stages: list[Stage]
@@ -79,7 +79,7 @@ def custom_filter(name: str) -> Callable[P, FilterFunc]:
79
79
  def make_filter_registry() -> dict[str, FilterRegistry]:
80
80
  """Return registries of all functions that able to called with task.
81
81
 
82
- :rtype: dict[str, Registry]
82
+ :rtype: dict[str, FilterRegistry]
83
83
  """
84
84
  rs: dict[str, FilterRegistry] = {}
85
85
  for module in config.regis_filter:
@@ -108,6 +108,8 @@ def get_args_const(
108
108
  ) -> tuple[str, list[Constant], dict[str, Constant]]:
109
109
  """Get arguments and keyword-arguments from function calling string.
110
110
 
111
+ :param expr: An expr string value.
112
+
111
113
  :rtype: tuple[str, list[Constant], dict[str, Constant]]
112
114
  """
113
115
  try:
@@ -150,6 +152,11 @@ def get_args_from_filter(
150
152
  ) -> tuple[str, FilterRegistry, list[Any], dict[Any, Any]]: # pragma: no cov
151
153
  """Get arguments and keyword-arguments from filter function calling string.
152
154
  and validate it with the filter functions mapping dict.
155
+
156
+ :param ft:
157
+ :param filters:
158
+
159
+ :rtype: tuple[str, FilterRegistry, list[Any], dict[Any, Any]]
153
160
  """
154
161
  func_name, _args, _kwargs = get_args_const(ft)
155
162
  args: list[Any] = [arg.value for arg in _args]
@@ -243,7 +250,7 @@ def str2template(
243
250
  params: DictData,
244
251
  *,
245
252
  filters: dict[str, FilterRegistry] | None = None,
246
- ) -> Any:
253
+ ) -> str:
247
254
  """(Sub-function) Pass param to template string that can search by
248
255
  ``RE_CALLER`` regular expression.
249
256
 
@@ -255,6 +262,8 @@ def str2template(
255
262
  :param params: A parameter value that getting with matched regular
256
263
  expression.
257
264
  :param filters:
265
+
266
+ :rtype: str
258
267
  """
259
268
  filters: dict[str, FilterRegistry] = filters or make_filter_registry()
260
269
 
@@ -295,7 +304,7 @@ def str2template(
295
304
  return search_env_replace(value)
296
305
 
297
306
 
298
- def param2template(value: Any, params: DictData) -> Any:
307
+ def param2template(value: T, params: DictData) -> T:
299
308
  """Pass param to template string that can search by ``RE_CALLER`` regular
300
309
  expression.
301
310
 
@@ -303,7 +312,7 @@ def param2template(value: Any, params: DictData) -> Any:
303
312
  :param params: A parameter value that getting with matched regular
304
313
  expression.
305
314
 
306
- :rtype: Any
315
+ :rtype: T
307
316
  :returns: An any getter value from the params input.
308
317
  """
309
318
  filters: dict[str, FilterRegistry] = make_filter_registry()
ddeutil/workflow/utils.py CHANGED
@@ -21,10 +21,9 @@ from zoneinfo import ZoneInfo
21
21
  from ddeutil.core import hash_str
22
22
 
23
23
  from .__types import DictData, Matrix
24
- from .conf import config
25
24
 
26
25
  T = TypeVar("T")
27
-
26
+ UTC = ZoneInfo("UTC")
28
27
  logger = logging.getLogger("ddeutil.workflow")
29
28
 
30
29
 
@@ -37,7 +36,7 @@ def get_dt_now(
37
36
  :param offset:
38
37
  :return: The current datetime object that use an input timezone or UTC.
39
38
  """
40
- return datetime.now(tz=(tz or ZoneInfo("UTC"))) - timedelta(seconds=offset)
39
+ return datetime.now(tz=(tz or UTC)) - timedelta(seconds=offset)
41
40
 
42
41
 
43
42
  def get_diff_sec(
@@ -52,17 +51,42 @@ def get_diff_sec(
52
51
  """
53
52
  return round(
54
53
  (
55
- dt
56
- - datetime.now(tz=(tz or ZoneInfo("UTC")))
57
- - timedelta(seconds=offset)
54
+ dt - datetime.now(tz=(tz or UTC)) - timedelta(seconds=offset)
58
55
  ).total_seconds()
59
56
  )
60
57
 
61
58
 
62
- def wait_a_minute(now: datetime, second: float = 2) -> None: # pragma: no cov
59
+ def reach_next_minute(
60
+ dt: datetime, tz: ZoneInfo | None = None, offset: float = 0.0
61
+ ) -> bool:
62
+ """Check this datetime object is not in range of minute level on the current
63
+ datetime.
64
+ """
65
+ diff: float = (
66
+ dt.replace(second=0, microsecond=0)
67
+ - (
68
+ get_dt_now(tz=(tz or UTC), offset=offset).replace(
69
+ second=0, microsecond=0
70
+ )
71
+ )
72
+ ).total_seconds()
73
+ if diff >= 60:
74
+ return True
75
+ elif diff >= 0:
76
+ return False
77
+
78
+ raise ValueError(
79
+ "Check reach the next minute function should check a datetime that not "
80
+ "less than the current date"
81
+ )
82
+
83
+
84
+ def wait_to_next_minute(
85
+ dt: datetime, second: float = 0
86
+ ) -> None: # pragma: no cov
63
87
  """Wait with sleep to the next minute with an offset second value."""
64
- future = now.replace(second=0, microsecond=0) + timedelta(minutes=1)
65
- time.sleep((future - now).total_seconds() + second)
88
+ future = dt.replace(second=0, microsecond=0) + timedelta(minutes=1)
89
+ time.sleep((future - dt).total_seconds() + second)
66
90
 
67
91
 
68
92
  def delay(second: float = 0) -> None: # pragma: no cov
@@ -92,6 +116,8 @@ def gen_id(
92
116
 
93
117
  :rtype: str
94
118
  """
119
+ from .conf import config
120
+
95
121
  if not isinstance(value, str):
96
122
  value: str = str(value)
97
123
 
@@ -177,7 +203,7 @@ def batch(iterable: Iterator[Any], n: int) -> Iterator[Any]:
177
203
  """Batch data into iterators of length n. The last batch may be shorter.
178
204
 
179
205
  Example:
180
- >>> for b in batch('ABCDEFG', 3):
206
+ >>> for b in batch(iter('ABCDEFG'), 3):
181
207
  ... print(list(b))
182
208
  ['A', 'B', 'C']
183
209
  ['D', 'E', 'F']