ddeutil-workflow 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,21 +27,15 @@ import time
27
27
  from concurrent.futures import (
28
28
  Future,
29
29
  ProcessPoolExecutor,
30
- ThreadPoolExecutor,
31
30
  as_completed,
32
31
  )
33
- from dataclasses import field
34
32
  from datetime import datetime, timedelta
35
33
  from functools import wraps
36
- from heapq import heappush
37
- from queue import Queue
38
34
  from textwrap import dedent
39
35
  from threading import Thread
40
36
  from typing import Callable, Optional
41
- from zoneinfo import ZoneInfo
42
37
 
43
38
  from pydantic import BaseModel, Field
44
- from pydantic.dataclasses import dataclass
45
39
  from pydantic.functional_validators import field_validator, model_validator
46
40
  from typing_extensions import Self
47
41
 
@@ -57,21 +51,15 @@ except ImportError: # pragma: no cov
57
51
 
58
52
  from .__cron import CronRunner
59
53
  from .__types import DictData, TupleStr
60
- from .conf import FileLog, Loader, Log, config, get_logger
61
- from .exceptions import JobException, WorkflowException
62
- from .job import Job
54
+ from .conf import Loader, config, get_logger
55
+ from .exceptions import WorkflowException
63
56
  from .on import On
64
57
  from .utils import (
65
- Param,
66
- Result,
67
58
  batch,
68
59
  delay,
69
- gen_id,
70
- get_diff_sec,
71
- has_template,
72
- param2template,
73
60
  queue2str,
74
61
  )
62
+ from .workflow import Workflow, WorkflowTaskData
75
63
 
76
64
  P = ParamSpec("P")
77
65
  logger = get_logger("ddeutil.workflow")
@@ -81,717 +69,15 @@ logging.getLogger("schedule").setLevel(logging.INFO)
81
69
 
82
70
 
83
71
  __all__: TupleStr = (
84
- "Workflow",
85
- "WorkflowTaskData",
86
72
  "Schedule",
87
73
  "ScheduleWorkflow",
88
- "workflow_task",
74
+ "workflow_task_release",
89
75
  "workflow_monitor",
90
76
  "workflow_control",
91
77
  "workflow_runner",
92
78
  )
93
79
 
94
80
 
95
- class Workflow(BaseModel):
96
- """Workflow Pydantic Model this is the main future of this project because
97
- it use to be workflow data for running everywhere that you want or using it
98
- to scheduler task in background. It use lightweight coding line from
99
- Pydantic Model and enhance execute method on it.
100
- """
101
-
102
- name: str = Field(description="A workflow name.")
103
- desc: Optional[str] = Field(
104
- default=None,
105
- description=(
106
- "A workflow description that can be string of markdown content."
107
- ),
108
- )
109
- params: dict[str, Param] = Field(
110
- default_factory=dict,
111
- description="A parameters that need to use on this workflow.",
112
- )
113
- on: list[On] = Field(
114
- default_factory=list,
115
- description="A list of On instance for this workflow schedule.",
116
- )
117
- jobs: dict[str, Job] = Field(
118
- default_factory=dict,
119
- description="A mapping of job ID and job model that already loaded.",
120
- )
121
- run_id: Optional[str] = Field(
122
- default=None,
123
- description=(
124
- "A running workflow ID that is able to change after initialize."
125
- ),
126
- repr=False,
127
- exclude=True,
128
- )
129
-
130
- @property
131
- def new_run_id(self) -> str:
132
- """Running ID of this workflow that always generate new unique value.
133
-
134
- :rtype: str
135
- """
136
- return gen_id(self.name, unique=True)
137
-
138
- @classmethod
139
- def from_loader(
140
- cls,
141
- name: str,
142
- externals: DictData | None = None,
143
- ) -> Self:
144
- """Create Workflow instance from the Loader object that only receive
145
- an input workflow name. The loader object will use this workflow name to
146
- searching configuration data of this workflow model in conf path.
147
-
148
- :param name: A workflow name that want to pass to Loader object.
149
- :param externals: An external parameters that want to pass to Loader
150
- object.
151
- :rtype: Self
152
- """
153
- loader: Loader = Loader(name, externals=(externals or {}))
154
-
155
- # NOTE: Validate the config type match with current connection model
156
- if loader.type != cls:
157
- raise ValueError(f"Type {loader.type} does not match with {cls}")
158
-
159
- loader_data: DictData = copy.deepcopy(loader.data)
160
-
161
- # NOTE: Add name to loader data
162
- loader_data["name"] = name.replace(" ", "_")
163
-
164
- # NOTE: Prepare `on` data
165
- cls.__bypass_on(loader_data)
166
- return cls.model_validate(obj=loader_data)
167
-
168
- @classmethod
169
- def __bypass_on(
170
- cls,
171
- data: DictData,
172
- externals: DictData | None = None,
173
- ) -> DictData:
174
- """Bypass the on data to loaded config data.
175
-
176
- :param data:
177
- :param externals:
178
- :rtype: DictData
179
- """
180
- if on := data.pop("on", []):
181
- if isinstance(on, str):
182
- on = [on]
183
- if any(not isinstance(i, (dict, str)) for i in on):
184
- raise TypeError("The ``on`` key should be list of str or dict")
185
-
186
- # NOTE: Pass on value to Loader and keep on model object to on field
187
- data["on"] = [
188
- (
189
- Loader(n, externals=(externals or {})).data
190
- if isinstance(n, str)
191
- else n
192
- )
193
- for n in on
194
- ]
195
- return data
196
-
197
- @model_validator(mode="before")
198
- def __prepare_model_before__(cls, values: DictData) -> DictData:
199
- """Prepare the params key."""
200
- # NOTE: Prepare params type if it passing with only type value.
201
- if params := values.pop("params", {}):
202
- values["params"] = {
203
- p: (
204
- {"type": params[p]}
205
- if isinstance(params[p], str)
206
- else params[p]
207
- )
208
- for p in params
209
- }
210
- return values
211
-
212
- @field_validator("desc", mode="after")
213
- def __dedent_desc__(cls, value: str) -> str:
214
- """Prepare description string that was created on a template.
215
-
216
- :param value: A description string value that want to dedent.
217
- :rtype: str
218
- """
219
- return dedent(value)
220
-
221
- @model_validator(mode="after")
222
- def __validate_jobs_need_and_prepare_running_id(self) -> Self:
223
- """Validate each need job in any jobs should exists.
224
-
225
- :rtype: Self
226
- """
227
- for job in self.jobs:
228
- if not_exist := [
229
- need for need in self.jobs[job].needs if need not in self.jobs
230
- ]:
231
- raise WorkflowException(
232
- f"The needed jobs: {not_exist} do not found in "
233
- f"{self.name!r}."
234
- )
235
-
236
- # NOTE: update a job id with its job id from workflow template
237
- self.jobs[job].id = job
238
-
239
- if self.run_id is None:
240
- self.run_id = self.new_run_id
241
-
242
- # VALIDATE: Validate workflow name should not dynamic with params
243
- # template.
244
- if has_template(self.name):
245
- raise ValueError(
246
- f"Workflow name should not has any template, please check, "
247
- f"{self.name!r}."
248
- )
249
-
250
- return self
251
-
252
- def get_running_id(self, run_id: str) -> Self:
253
- """Return Workflow model object that changing workflow running ID with
254
- an input running ID.
255
-
256
- :param run_id: A replace workflow running ID.
257
- :rtype: Self
258
- """
259
- return self.model_copy(update={"run_id": run_id})
260
-
261
- def job(self, name: str) -> Job:
262
- """Return this workflow's job that already created on this job field.
263
-
264
- :param name: A job name that want to get from a mapping of job models.
265
- :type name: str
266
-
267
- :rtype: Job
268
- :returns: A job model that exists on this workflow by input name.
269
- """
270
- if name not in self.jobs:
271
- raise ValueError(
272
- f"A Job {name!r} does not exists in this workflow, "
273
- f"{self.name!r}"
274
- )
275
- return self.jobs[name]
276
-
277
- def parameterize(self, params: DictData) -> DictData:
278
- """Prepare a passing parameters before use it in execution process.
279
- This method will validate keys of an incoming params with this object
280
- necessary params field and then create a jobs key to result mapping
281
- that will keep any execution result from its job.
282
-
283
- ... {
284
- ... "params": <an-incoming-params>,
285
- ... "jobs": {}
286
- ... }
287
-
288
- :param params: A parameter mapping that receive from workflow execution.
289
- :type params: DictData
290
- :rtype: DictData
291
- """
292
- # VALIDATE: Incoming params should have keys that set on this workflow.
293
- if check_key := tuple(
294
- f"{k!r}"
295
- for k in self.params
296
- if (k not in params and self.params[k].required)
297
- ):
298
- raise WorkflowException(
299
- f"Required Param on this workflow setting does not set: "
300
- f"{', '.join(check_key)}."
301
- )
302
-
303
- # NOTE: Mapping type of param before adding it to the ``params`` key.
304
- return {
305
- "params": (
306
- params
307
- | {
308
- k: self.params[k].receive(params[k])
309
- for k in params
310
- if k in self.params
311
- }
312
- ),
313
- "jobs": {},
314
- }
315
-
316
- def release(
317
- self,
318
- on: On,
319
- params: DictData,
320
- queue: list[datetime],
321
- *,
322
- waiting_sec: int = 60,
323
- sleep_interval: int = 15,
324
- log: Log = None,
325
- ) -> Result:
326
- """Start running workflow with the on schedule in period of 30 minutes.
327
- That mean it will still running at background 30 minutes until the
328
- schedule matching with its time.
329
-
330
- This method allow workflow use log object to save the execution
331
- result to log destination like file log to local `/logs` directory.
332
-
333
- :param on: An on schedule value.
334
- :param params: A workflow parameter that pass to execute method.
335
- :param queue: A list of release time that already running.
336
- :param waiting_sec: A second period value that allow workflow execute.
337
- :param sleep_interval: A second value that want to waiting until time
338
- to execute.
339
- :param log: A log object that want to save execution result.
340
- :rtype: Result
341
- """
342
- logger.debug(
343
- f"({self.run_id}) [CORE]: {self.name!r}: {on.cronjob} : run with "
344
- f"queue id: {id(queue)}"
345
- )
346
- log: Log = log or FileLog
347
- gen: CronRunner = on.generate(
348
- datetime.now(tz=config.tz).replace(second=0, microsecond=0)
349
- + timedelta(seconds=1)
350
- )
351
- cron_tz: ZoneInfo = gen.tz
352
-
353
- # NOTE: get next schedule time that generate from now.
354
- next_time: datetime = gen.next
355
-
356
- # NOTE: While-loop to getting next until it does not logger.
357
- while log.is_pointed(self.name, next_time, queue=queue):
358
- next_time: datetime = gen.next
359
-
360
- # NOTE: Heap-push this next running time to log queue list.
361
- heappush(queue, next_time)
362
-
363
- # VALIDATE: Check the different time between the next schedule time and
364
- # now that less than waiting period (second unit).
365
- if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
366
- logger.debug(
367
- f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
368
- f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
369
- )
370
-
371
- # NOTE: Remove next datetime from queue.
372
- queue.remove(next_time)
373
-
374
- time.sleep(0.15)
375
- return Result(
376
- status=0,
377
- context={
378
- "params": params,
379
- "release": {"status": "skipped", "cron": [str(on.cronjob)]},
380
- },
381
- )
382
-
383
- logger.debug(
384
- f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
385
- f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
386
- )
387
-
388
- # NOTE: Release when the time is nearly to schedule time.
389
- while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
390
- sleep_interval + 5
391
- ): # pragma: no cov
392
- logger.debug(
393
- f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
394
- f"Sleep until: {duration}"
395
- )
396
- time.sleep(sleep_interval)
397
-
398
- time.sleep(0.5)
399
-
400
- # NOTE: Release parameter that use to change if params has
401
- # templating.
402
- release_params: DictData = {
403
- "release": {
404
- "logical_date": next_time,
405
- },
406
- }
407
-
408
- # WARNING: Re-create workflow object that use new running workflow
409
- # ID.
410
- runner: Self = self.get_running_id(run_id=self.new_run_id)
411
- rs: Result = runner.execute(
412
- params=param2template(params, release_params),
413
- )
414
- logger.debug(
415
- f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
416
- f"End release {next_time:%Y-%m-%d %H:%M:%S}"
417
- )
418
-
419
- # NOTE: Delete a copied workflow instance for saving memory.
420
- del runner
421
-
422
- rs.set_parent_run_id(self.run_id)
423
- rs_log: Log = log.model_validate(
424
- {
425
- "name": self.name,
426
- "on": str(on.cronjob),
427
- "release": next_time,
428
- "context": rs.context,
429
- "parent_run_id": rs.run_id,
430
- "run_id": rs.run_id,
431
- }
432
- )
433
- # NOTE: Saving execution result to destination of the input log object.
434
- rs_log.save(excluded=None)
435
-
436
- queue.remove(next_time)
437
- time.sleep(0.05)
438
- return Result(
439
- status=0,
440
- context={
441
- "params": params,
442
- "release": {"status": "run", "cron": [str(on.cronjob)]},
443
- },
444
- )
445
-
446
- def poke(
447
- self,
448
- params: DictData | None = None,
449
- *,
450
- log: Log | None = None,
451
- ) -> list[Result]:
452
- """Poke workflow with threading executor pool for executing with all its
453
- schedules that was set on the `on` value. This method will observe its
454
- schedule that nearing to run with the ``self.release()`` method.
455
-
456
- :param params: A parameters that want to pass to the release method.
457
- :param log: A log object that want to use on this poking process.
458
- :rtype: list[Result]
459
- """
460
- logger.info(
461
- f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
462
- )
463
-
464
- # NOTE: If this workflow does not set the on schedule, it will return
465
- # empty result.
466
- if len(self.on) == 0:
467
- return []
468
-
469
- params: DictData = params or {}
470
- queue: list[datetime] = []
471
- results: list[Result] = []
472
-
473
- with ThreadPoolExecutor(
474
- max_workers=config.max_poking_pool_worker,
475
- thread_name_prefix="wf_poking_",
476
- ) as executor:
477
- futures: list[Future] = []
478
- for on in self.on:
479
- futures.append(
480
- executor.submit(
481
- self.release,
482
- on,
483
- params=params,
484
- log=log,
485
- queue=queue,
486
- )
487
- )
488
- delay(second=0.15)
489
-
490
- # WARNING: This poking method does not allow to use fail-fast logic
491
- # to catching parallel execution result.
492
- for future in as_completed(futures):
493
- results.append(future.result(timeout=60))
494
-
495
- if len(queue) > 0: # pragma: no cov
496
- logger.error(
497
- f"({self.run_id}) [POKING]: Log Queue does empty when poking "
498
- f"process was finishing."
499
- )
500
-
501
- return results
502
-
503
- def execute_job(
504
- self,
505
- job_id: str,
506
- params: DictData,
507
- *,
508
- raise_error: bool = True,
509
- ) -> Result:
510
- """Workflow Job execution with passing dynamic parameters from the
511
- workflow execution to the target job.
512
-
513
- This execution is the minimum level of execution of this workflow
514
- model. It different with ``self.execute`` because this method run only
515
- one job and return with context of this job data.
516
-
517
- :param job_id: A job ID that want to execute.
518
- :param params: A params that was parameterized from workflow execution.
519
- :param raise_error: A flag that raise error instead catching to result
520
- if it get exception from job execution.
521
- :rtype: Result
522
- """
523
- # VALIDATE: check a job ID that exists in this workflow or not.
524
- if job_id not in self.jobs:
525
- raise WorkflowException(
526
- f"The job ID: {job_id} does not exists in {self.name!r} "
527
- f"workflow."
528
- )
529
-
530
- logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
531
-
532
- # IMPORTANT:
533
- # Change any job running IDs to this workflow running ID.
534
- #
535
- try:
536
- job: Job = self.jobs[job_id].get_running_id(self.run_id)
537
- job.set_outputs(
538
- job.execute(params=params).context,
539
- to=params,
540
- )
541
- except JobException as err:
542
- logger.error(
543
- f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
544
- )
545
- if raise_error:
546
- raise WorkflowException(
547
- f"Get job execution error {job_id}: JobException: {err}"
548
- ) from None
549
- else:
550
- raise NotImplementedError() from None
551
-
552
- return Result(status=0, context=params)
553
-
554
- def execute(
555
- self,
556
- params: DictData | None = None,
557
- *,
558
- timeout: int = 60,
559
- ) -> Result:
560
- """Execute workflow with passing a dynamic parameters to all jobs that
561
- included in this workflow model with ``jobs`` field.
562
-
563
- The result of execution process for each jobs and stages on this
564
- workflow will keeping in dict which able to catch out with all jobs and
565
- stages by dot annotation.
566
-
567
- For example, when I want to use the output from previous stage, I
568
- can access it with syntax:
569
-
570
- ... ${job-name}.stages.${stage-id}.outputs.${key}
571
-
572
- :param params: An input parameters that use on workflow execution that
573
- will parameterize before using it. Default is None.
574
- :type params: DictData | None
575
- :param timeout: A workflow execution time out in second unit that use
576
- for limit time of execution and waiting job dependency. Default is
577
- 60 seconds.
578
- :type timeout: int
579
- :rtype: Result
580
- """
581
- logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
582
-
583
- # NOTE: I use this condition because this method allow passing empty
584
- # params and I do not want to create new dict object.
585
- params: DictData = {} if params is None else params
586
- ts: float = time.monotonic()
587
- rs: Result = Result()
588
-
589
- # NOTE: It should not do anything if it does not have job.
590
- if not self.jobs:
591
- logger.warning(
592
- f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
593
- f"does not have any jobs"
594
- )
595
- return rs.catch(status=0, context=params)
596
-
597
- # NOTE: Create a job queue that keep the job that want to running after
598
- # it dependency condition.
599
- jq: Queue = Queue()
600
- for job_id in self.jobs:
601
- jq.put(job_id)
602
-
603
- # NOTE: Create data context that will pass to any job executions
604
- # on this workflow.
605
- #
606
- # {
607
- # 'params': <input-params>,
608
- # 'jobs': {},
609
- # }
610
- #
611
- context: DictData = self.parameterize(params)
612
- status: int = 0
613
- try:
614
- if config.max_job_parallel == 1:
615
- self.__exec_non_threading(
616
- context=context,
617
- ts=ts,
618
- job_queue=jq,
619
- timeout=timeout,
620
- )
621
- else:
622
- self.__exec_threading(
623
- context=context,
624
- ts=ts,
625
- job_queue=jq,
626
- worker=config.max_job_parallel,
627
- timeout=timeout,
628
- )
629
- except WorkflowException as err:
630
- context.update(
631
- {
632
- "error": err,
633
- "error_message": f"{err.__class__.__name__}: {err}",
634
- },
635
- )
636
- status = 1
637
- return rs.catch(status=status, context=context)
638
-
639
- def __exec_threading(
640
- self,
641
- context: DictData,
642
- ts: float,
643
- job_queue: Queue,
644
- *,
645
- worker: int = 2,
646
- timeout: int = 600,
647
- ) -> DictData:
648
- """Workflow execution by threading strategy.
649
-
650
- If a job need dependency, it will check dependency job ID from
651
- context data before allow it run.
652
-
653
- :param context: A context workflow data that want to downstream passing.
654
- :param ts: A start timestamp that use for checking execute time should
655
- timeout.
656
- :param job_queue: A job queue object.
657
- :param timeout: A second value unit that bounding running time.
658
- :param worker: A number of threading executor pool size.
659
- :rtype: DictData
660
- """
661
- not_time_out_flag: bool = True
662
- logger.debug(
663
- f"({self.run_id}): [CORE]: Run {self.name} with threading job "
664
- f"executor"
665
- )
666
-
667
- # IMPORTANT: The job execution can run parallel and waiting by
668
- # needed.
669
- with ThreadPoolExecutor(max_workers=worker) as executor:
670
- futures: list[Future] = []
671
-
672
- while not job_queue.empty() and (
673
- not_time_out_flag := ((time.monotonic() - ts) < timeout)
674
- ):
675
- job_id: str = job_queue.get()
676
- job: Job = self.jobs[job_id]
677
-
678
- if any(need not in context["jobs"] for need in job.needs):
679
- job_queue.task_done()
680
- job_queue.put(job_id)
681
- time.sleep(0.25)
682
- continue
683
-
684
- # NOTE: Start workflow job execution with deep copy context data
685
- # before release.
686
- #
687
- # {
688
- # 'params': <input-params>,
689
- # 'jobs': {},
690
- # }
691
- futures.append(
692
- executor.submit(
693
- self.execute_job,
694
- job_id,
695
- params=context,
696
- ),
697
- )
698
-
699
- # NOTE: Mark this job queue done.
700
- job_queue.task_done()
701
-
702
- # NOTE: Wait for all items to finish processing
703
- job_queue.join()
704
-
705
- for future in as_completed(futures, timeout=1800):
706
- if err := future.exception():
707
- logger.error(f"({self.run_id}) [CORE]: {err}")
708
- raise WorkflowException(f"{err}")
709
- try:
710
- future.result(timeout=60)
711
- except TimeoutError as err: # pragma: no cove
712
- raise WorkflowException(
713
- "Timeout when getting result from future"
714
- ) from err
715
-
716
- if not_time_out_flag:
717
- return context
718
-
719
- # NOTE: Raise timeout error.
720
- logger.warning( # pragma: no cov
721
- f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
722
- f", was timeout"
723
- )
724
- raise WorkflowException( # pragma: no cov
725
- f"Execution of workflow: {self.name} was timeout"
726
- )
727
-
728
- def __exec_non_threading(
729
- self,
730
- context: DictData,
731
- ts: float,
732
- job_queue: Queue,
733
- *,
734
- timeout: int = 600,
735
- ) -> DictData:
736
- """Workflow execution with non-threading strategy that use sequential
737
- job running and waiting previous job was run successful.
738
-
739
- If a job need dependency, it will check dependency job ID from
740
- context data before allow it run.
741
-
742
- :param context: A context workflow data that want to downstream passing.
743
- :param ts: A start timestamp that use for checking execute time should
744
- timeout.
745
- :param timeout: A second value unit that bounding running time.
746
- :rtype: DictData
747
- """
748
- not_time_out_flag: bool = True
749
- logger.debug(
750
- f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
751
- f"executor"
752
- )
753
-
754
- while not job_queue.empty() and (
755
- not_time_out_flag := ((time.monotonic() - ts) < timeout)
756
- ):
757
- job_id: str = job_queue.get()
758
- job: Job = self.jobs[job_id]
759
-
760
- # NOTE: Waiting dependency job run successful before release.
761
- if any(need not in context["jobs"] for need in job.needs):
762
- job_queue.task_done()
763
- job_queue.put(job_id)
764
- time.sleep(0.05)
765
- continue
766
-
767
- # NOTE: Start workflow job execution with deep copy context data
768
- # before release. This job execution process will running until
769
- # done before checking all execution timeout or not.
770
- #
771
- # {
772
- # 'params': <input-params>,
773
- # 'jobs': {},
774
- # }
775
- self.execute_job(job_id=job_id, params=context)
776
-
777
- # NOTE: Mark this job queue done.
778
- job_queue.task_done()
779
-
780
- # NOTE: Wait for all items to finish processing
781
- job_queue.join()
782
-
783
- if not_time_out_flag:
784
- return context
785
-
786
- # NOTE: Raise timeout error.
787
- logger.warning( # pragma: no cov
788
- f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
789
- )
790
- raise WorkflowException( # pragma: no cov
791
- f"Execution of workflow: {self.name} was timeout"
792
- )
793
-
794
-
795
81
  class ScheduleWorkflow(BaseModel):
796
82
  """Schedule Workflow Pydantic model that use to keep workflow model for the
797
83
  Schedule model. it should not use Workflow model directly because on the
@@ -799,6 +85,10 @@ class ScheduleWorkflow(BaseModel):
799
85
  model.
800
86
  """
801
87
 
88
+ alias: Optional[str] = Field(
89
+ default=None,
90
+ description="An alias name of workflow.",
91
+ )
802
92
  name: str = Field(description="A workflow name.")
803
93
  on: list[On] = Field(
804
94
  default_factory=list,
@@ -806,7 +96,7 @@ class ScheduleWorkflow(BaseModel):
806
96
  )
807
97
  params: DictData = Field(
808
98
  default_factory=dict,
809
- description="A parameters that want to use to workflow execution.",
99
+ description="A parameters that want to use in workflow execution.",
810
100
  )
811
101
 
812
102
  @model_validator(mode="before")
@@ -817,16 +107,17 @@ class ScheduleWorkflow(BaseModel):
817
107
  """
818
108
  values["name"] = values["name"].replace(" ", "_")
819
109
 
110
+ if not values.get("alias"):
111
+ values["alias"] = values["name"]
112
+
820
113
  cls.__bypass_on(values)
821
114
  return values
822
115
 
823
116
  @classmethod
824
- def __bypass_on(
825
- cls,
826
- data: DictData,
827
- externals: DictData | None = None,
828
- ) -> DictData:
829
- """Bypass the on data to loaded config data.
117
+ def __bypass_on(cls, data: DictData) -> DictData:
118
+ """Bypass and prepare the on data to loaded config data.
119
+
120
+ :param data: A data that want to validate for model initialization.
830
121
 
831
122
  :rtype: DictData
832
123
  """
@@ -841,15 +132,32 @@ class ScheduleWorkflow(BaseModel):
841
132
  # NOTE: Pass on value to Loader and keep on model object to on
842
133
  # field.
843
134
  data["on"] = [
844
- (
845
- Loader(n, externals=(externals or {})).data
846
- if isinstance(n, str)
847
- else n
848
- )
135
+ Loader(n, externals={}).data if isinstance(n, str) else n
849
136
  for n in on
850
137
  ]
851
138
  return data
852
139
 
140
+ @field_validator("on", mode="after")
141
+ def __on_no_dup__(cls, value: list[On]) -> list[On]:
142
+ """Validate the on fields should not contain duplicate values and if it
143
+ contain every minute value, it should has only one on value.
144
+
145
+ :rtype: list[On]
146
+ """
147
+ set_ons: set[str] = {str(on.cronjob) for on in value}
148
+ if len(set_ons) != len(value):
149
+ raise ValueError(
150
+ "The on fields should not contain duplicate on value."
151
+ )
152
+
153
+ # WARNING:
154
+ # if '* * * * *' in set_ons and len(set_ons) > 1:
155
+ # raise ValueError(
156
+ # "If it has every minute cronjob on value, it should has only "
157
+ # "one value in the on field."
158
+ # )
159
+ return value
160
+
853
161
 
854
162
  class Schedule(BaseModel):
855
163
  """Schedule Pydantic Model that use to run with scheduler package. It does
@@ -868,6 +176,15 @@ class Schedule(BaseModel):
868
176
  description="A list of ScheduleWorkflow models.",
869
177
  )
870
178
 
179
+ @field_validator("desc", mode="after")
180
+ def __dedent_desc__(cls, value: str) -> str:
181
+ """Prepare description string that was created on a template.
182
+
183
+ :param value: A description string value that want to dedent.
184
+ :rtype: str
185
+ """
186
+ return dedent(value)
187
+
871
188
  @classmethod
872
189
  def from_loader(
873
190
  cls,
@@ -881,6 +198,7 @@ class Schedule(BaseModel):
881
198
  :param name: A schedule name that want to pass to Loader object.
882
199
  :param externals: An external parameters that want to pass to Loader
883
200
  object.
201
+
884
202
  :rtype: Self
885
203
  """
886
204
  loader: Loader = Loader(name, externals=(externals or {}))
@@ -900,7 +218,6 @@ class Schedule(BaseModel):
900
218
  self,
901
219
  start_date: datetime,
902
220
  queue: dict[str, list[datetime]],
903
- running: dict[str, list[datetime]],
904
221
  *,
905
222
  externals: DictData | None = None,
906
223
  ) -> list[WorkflowTaskData]:
@@ -909,44 +226,44 @@ class Schedule(BaseModel):
909
226
 
910
227
  :param start_date: A start date that get from the workflow schedule.
911
228
  :param queue: A mapping of name and list of datetime for queue.
912
- :param running: A mapping of name and list of datetime for running.
913
229
  :param externals: An external parameters that pass to the Loader object.
914
230
 
915
231
  :rtype: list[WorkflowTaskData]
232
+ :return: Return the list of WorkflowTaskData object from the specific
233
+ input datetime that mapping with the on field.
916
234
  """
917
235
 
918
236
  # NOTE: Create pair of workflow and on.
919
237
  workflow_tasks: list[WorkflowTaskData] = []
920
- externals: DictData = externals or {}
238
+ extras: DictData = externals or {}
239
+
240
+ for sch_wf in self.workflows:
921
241
 
922
- for wfs in self.workflows:
923
- wf: Workflow = Workflow.from_loader(wfs.name, externals=externals)
242
+ wf: Workflow = Workflow.from_loader(sch_wf.name, externals=extras)
924
243
 
925
244
  # NOTE: Create default list of release datetime.
926
- queue[wfs.name]: list[datetime] = []
927
- running[wfs.name]: list[datetime] = []
245
+ if sch_wf.alias not in queue:
246
+ queue[sch_wf.alias]: list[datetime] = []
928
247
 
929
- # NOTE: Create the default on value if it does not passing on the
930
- # Schedule object.
931
- _ons: list[On] = wf.on.copy() if len(wfs.on) == 0 else wfs.on
248
+ # IMPORTANT: Create the default 'on' value if it does not passing
249
+ # the on field to the Schedule object.
250
+ ons: list[On] = wf.on.copy() if len(sch_wf.on) == 0 else sch_wf.on
932
251
 
933
- for on in _ons:
934
- on_gen: CronRunner = on.generate(start_date)
935
- next_running_date = on_gen.next
252
+ for on in ons:
936
253
 
937
- while next_running_date in queue[wfs.name]:
938
- next_running_date = on_gen.next
254
+ # NOTE: Create CronRunner instance from the start_date param.
255
+ runner: CronRunner = on.generate(start_date)
256
+ next_running_date = runner.next
939
257
 
940
- # NOTE: Push the next running date to queue list.
941
- heappush(queue[wfs.name], next_running_date)
258
+ while next_running_date in queue[sch_wf.alias]:
259
+ next_running_date = runner.next
942
260
 
943
261
  workflow_tasks.append(
944
262
  WorkflowTaskData(
263
+ alias=sch_wf.alias,
945
264
  workflow=wf,
946
- on=on,
947
- params=wfs.params,
948
- queue=queue,
949
- running=running,
265
+ runner=runner,
266
+ params=sch_wf.params,
950
267
  ),
951
268
  )
952
269
 
@@ -967,10 +284,10 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
967
284
  :rtype: DecoratorCancelJob
968
285
  """
969
286
 
970
- def decorator(func: ReturnCancelJob) -> ReturnCancelJob:
287
+ def decorator(func: ReturnCancelJob) -> ReturnCancelJob: # pragma: no cov
971
288
  try:
972
289
  # NOTE: Check the function that want to handle is method or not.
973
- if inspect.ismethod(func): # pragma: no cov
290
+ if inspect.ismethod(func):
974
291
 
975
292
  @wraps(func)
976
293
  def wrapper(self, *args, **kwargs):
@@ -984,7 +301,7 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
984
301
 
985
302
  return wrapper
986
303
 
987
- except Exception as err: # pragma: no cov
304
+ except Exception as err:
988
305
  logger.exception(err)
989
306
  if cancel_on_failure:
990
307
  return CancelJob
@@ -993,158 +310,12 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
993
310
  return decorator
994
311
 
995
312
 
996
- @dataclass(frozen=True)
997
- class WorkflowTaskData:
998
- """Workflow task dataclass that use to keep mapping data and objects for
999
- passing in multithreading task.
1000
- """
1001
-
1002
- workflow: Workflow
1003
- on: On
1004
- params: DictData = field(compare=False, hash=False)
1005
- queue: dict[str, list[datetime]] = field(compare=False, hash=False)
1006
- running: dict[str, list[datetime]] = field(compare=False, hash=False)
1007
-
1008
- @catch_exceptions(cancel_on_failure=True)
1009
- def release(
1010
- self,
1011
- log: Log | None = None,
1012
- *,
1013
- waiting_sec: int = 60,
1014
- sleep_interval: int = 15,
1015
- ) -> None: # pragma: no cov
1016
- """Workflow release, it will use with the same logic of
1017
- `workflow.release` method.
1018
-
1019
- :param log: A log object for saving result logging from workflow
1020
- execution process.
1021
- :param waiting_sec: A second period value that allow workflow execute.
1022
- :param sleep_interval: A second value that want to waiting until time
1023
- to execute.
1024
- """
1025
- log: Log = log or FileLog
1026
- wf: Workflow = self.workflow
1027
- on: On = self.on
1028
-
1029
- gen: CronRunner = on.generate(
1030
- datetime.now(tz=config.tz).replace(second=0, microsecond=0)
1031
- )
1032
- cron_tz: ZoneInfo = gen.tz
1033
-
1034
- # NOTE: get next schedule time that generate from now.
1035
- next_time: datetime = gen.next
1036
-
1037
- # NOTE: get next utils it does not running.
1038
- while log.is_pointed(wf.name, next_time, queue=self.running[wf.name]):
1039
- next_time: datetime = gen.next
1040
-
1041
- logger.debug(
1042
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1043
- f"{next_time:%Y-%m-%d %H:%M:%S}"
1044
- )
1045
- heappush(self.running[wf.name], next_time)
1046
-
1047
- if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
1048
- logger.debug(
1049
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
1050
- f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
1051
- )
1052
-
1053
- # NOTE: Add this next running datetime that not in period to queue
1054
- # and remove it to running.
1055
- self.running[wf.name].remove(next_time)
1056
- heappush(self.queue[wf.name], next_time)
1057
-
1058
- time.sleep(0.2)
1059
- return
1060
-
1061
- logger.debug(
1062
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1063
- f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
1064
- )
1065
-
1066
- # NOTE: Release when the time is nearly to schedule time.
1067
- while (duration := get_diff_sec(next_time, tz=config.tz)) > (
1068
- sleep_interval + 5
1069
- ):
1070
- logger.debug(
1071
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
1072
- f": Sleep until: {duration}"
1073
- )
1074
- time.sleep(15)
1075
-
1076
- time.sleep(0.5)
1077
-
1078
- # NOTE: Release parameter that use to change if params has
1079
- # templating.
1080
- release_params: DictData = {
1081
- "release": {
1082
- "logical_date": next_time,
1083
- },
1084
- }
1085
-
1086
- # WARNING:
1087
- # Re-create workflow object that use new running workflow ID.
1088
- #
1089
- runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
1090
- rs: Result = runner.execute(
1091
- params=param2template(self.params, release_params),
1092
- )
1093
- logger.debug(
1094
- f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1095
- f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
1096
- )
1097
-
1098
- del runner
1099
-
1100
- # NOTE: Set parent ID on this result.
1101
- rs.set_parent_run_id(wf.run_id)
1102
-
1103
- # NOTE: Save result to log object saving.
1104
- rs_log: Log = log.model_validate(
1105
- {
1106
- "name": wf.name,
1107
- "on": str(on.cronjob),
1108
- "release": next_time,
1109
- "context": rs.context,
1110
- "parent_run_id": rs.run_id,
1111
- "run_id": rs.run_id,
1112
- }
1113
- )
1114
- rs_log.save(excluded=None)
1115
-
1116
- # NOTE: remove this release date from running
1117
- self.running[wf.name].remove(next_time)
1118
-
1119
- # IMPORTANT:
1120
- # Add the next running datetime to workflow queue
1121
- finish_time: datetime = datetime.now(tz=cron_tz).replace(
1122
- second=0, microsecond=0
1123
- )
1124
- future_running_time: datetime = gen.next
1125
- while (
1126
- future_running_time in self.running[wf.name]
1127
- or future_running_time in self.queue[wf.name]
1128
- or future_running_time < finish_time
1129
- ): # pragma: no cov
1130
- future_running_time: datetime = gen.next
1131
-
1132
- heappush(self.queue[wf.name], future_running_time)
1133
- logger.debug(f"[CORE]: {'-' * 100}")
1134
-
1135
- def __eq__(self, other) -> bool:
1136
- if isinstance(other, WorkflowTaskData):
1137
- return (
1138
- self.workflow.name == other.workflow.name
1139
- and self.on.cronjob == other.on.cronjob
1140
- )
1141
- return NotImplemented
1142
-
1143
-
1144
313
  @catch_exceptions(cancel_on_failure=True) # pragma: no cov
1145
- def workflow_task(
314
+ def workflow_task_release(
1146
315
  workflow_tasks: list[WorkflowTaskData],
1147
316
  stop: datetime,
317
+ queue,
318
+ running,
1148
319
  threads: dict[str, Thread],
1149
320
  ) -> CancelJob | None:
1150
321
  """Workflow task generator that create release pair of workflow and on to
@@ -1154,13 +325,14 @@ def workflow_task(
1154
325
 
1155
326
  :param workflow_tasks:
1156
327
  :param stop: A stop datetime object that force stop running scheduler.
328
+ :param queue:
329
+ :param running:
1157
330
  :param threads:
1158
331
  :rtype: CancelJob | None
1159
332
  """
1160
- start_date: datetime = datetime.now(tz=config.tz)
1161
- start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
333
+ current_date: datetime = datetime.now(tz=config.tz)
1162
334
 
1163
- if start_date > stop.replace(tzinfo=config.tz):
335
+ if current_date > stop.replace(tzinfo=config.tz):
1164
336
  logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
1165
337
  while len(threads) > 0:
1166
338
  logger.warning(
@@ -1188,45 +360,43 @@ def workflow_task(
1188
360
  # NOTE: Get incoming datetime queue.
1189
361
  logger.debug(
1190
362
  f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
1191
- f"{list(queue2str(task.queue[task.workflow.name]))}"
363
+ f"{list(queue2str(queue[task.alias]))}"
1192
364
  )
1193
365
 
1194
- # NOTE: Create minute unit value for any scheduler datetime that
1195
- # checking a workflow task should run in this datetime.
1196
- current_running_time: datetime = start_date_minute.astimezone(
1197
- tz=ZoneInfo(task.on.tz)
1198
- )
1199
366
  if (
1200
- len(task.queue[task.workflow.name]) > 0
1201
- and current_running_time != task.queue[task.workflow.name][0]
1202
- ) or (
1203
- task.on.next(current_running_time)
1204
- != task.queue[task.workflow.name][0]
367
+ len(queue[task.alias]) > 0
368
+ and task.runner.date != queue[task.alias][0]
1205
369
  ):
1206
370
  logger.debug(
1207
371
  f"[WORKFLOW]: Skip schedule "
1208
- f"{current_running_time:%Y-%m-%d %H:%M:%S} "
1209
- f"for : {task.workflow.name!r} : {task.on.cronjob}"
372
+ f"{task.runner.date:%Y-%m-%d %H:%M:%S} "
373
+ f"for : {task.workflow.name!r} : {task.runner.cron}"
1210
374
  )
1211
375
  continue
1212
- elif len(task.queue[task.workflow.name]) == 0:
376
+
377
+ elif len(queue[task.alias]) == 0:
1213
378
  logger.warning(
1214
379
  f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
1215
- f"{task.on.cronjob}"
380
+ f"{task.runner.cron}"
1216
381
  )
1217
382
  continue
1218
383
 
1219
384
  # NOTE: Remove this datetime from queue.
1220
- task.queue[task.workflow.name].pop(0)
385
+ queue[task.alias].pop(0)
1221
386
 
1222
387
  # NOTE: Create thread name that able to tracking with observe schedule
1223
388
  # job.
1224
389
  thread_name: str = (
1225
- f"{task.workflow.name}|{str(task.on.cronjob)}|"
1226
- f"{current_running_time:%Y%m%d%H%M}"
390
+ f"{task.workflow.name}|{str(task.runner.cron)}|"
391
+ f"{task.runner.date:%Y%m%d%H%M}"
1227
392
  )
393
+
1228
394
  wf_thread: Thread = Thread(
1229
- target=task.release,
395
+ target=catch_exceptions(cancel_on_failure=True)(task.release),
396
+ kwargs={
397
+ "queue": queue,
398
+ "running": running,
399
+ },
1230
400
  name=thread_name,
1231
401
  daemon=True,
1232
402
  )
@@ -1277,7 +447,7 @@ def workflow_control(
1277
447
  "Should install schedule package before use this module."
1278
448
  ) from None
1279
449
 
1280
- schedule: Scheduler = Scheduler()
450
+ scheduler: Scheduler = Scheduler()
1281
451
  start_date: datetime = datetime.now(tz=config.tz)
1282
452
 
1283
453
  # NOTE: Design workflow queue caching.
@@ -1285,7 +455,6 @@ def workflow_control(
1285
455
  # {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
1286
456
  #
1287
457
  wf_queue: dict[str, list[datetime]] = {}
1288
- wf_running: dict[str, list[datetime]] = {}
1289
458
  thread_releases: dict[str, Thread] = {}
1290
459
 
1291
460
  start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
@@ -1295,31 +464,33 @@ def workflow_control(
1295
464
  # NOTE: Create pair of workflow and on from schedule model.
1296
465
  workflow_tasks: list[WorkflowTaskData] = []
1297
466
  for name in schedules:
1298
- sch: Schedule = Schedule.from_loader(name, externals=externals)
467
+ schedule: Schedule = Schedule.from_loader(name, externals=externals)
468
+
469
+ # NOTE: Create a workflow task data instance from schedule object.
1299
470
  workflow_tasks.extend(
1300
- sch.tasks(
471
+ schedule.tasks(
1301
472
  start_date_waiting,
1302
473
  queue=wf_queue,
1303
- running=wf_running,
1304
474
  externals=externals,
1305
475
  ),
1306
476
  )
1307
477
 
1308
478
  # NOTE: This schedule job will start every minute at :02 seconds.
1309
479
  (
1310
- schedule.every(1)
480
+ scheduler.every(1)
1311
481
  .minutes.at(":02")
1312
482
  .do(
1313
- workflow_task,
483
+ workflow_task_release,
1314
484
  workflow_tasks=workflow_tasks,
1315
485
  stop=(stop or (start_date + config.stop_boundary_delta)),
486
+ queue=wf_queue,
1316
487
  threads=thread_releases,
1317
488
  )
1318
489
  .tag("control")
1319
490
  )
1320
491
 
1321
492
  # NOTE: Checking zombie task with schedule job will start every 5 minute.
1322
- schedule.every(5).minutes.at(":10").do(
493
+ scheduler.every(5).minutes.at(":10").do(
1323
494
  workflow_monitor,
1324
495
  threads=thread_releases,
1325
496
  ).tag("monitor")
@@ -1327,10 +498,12 @@ def workflow_control(
1327
498
  # NOTE: Start running schedule
1328
499
  logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
1329
500
  while True:
1330
- schedule.run_pending()
501
+ scheduler.run_pending()
1331
502
  time.sleep(1)
1332
- if not schedule.get_jobs("control"):
1333
- schedule.clear("monitor")
503
+
504
+ # NOTE: Break the scheduler when the control job does not exists.
505
+ if not scheduler.get_jobs("control"):
506
+ scheduler.clear("monitor")
1334
507
  logger.warning(
1335
508
  f"[WORKFLOW]: Workflow release thread: {thread_releases}"
1336
509
  )
@@ -1340,9 +513,6 @@ def workflow_control(
1340
513
  logger.warning(
1341
514
  f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
1342
515
  )
1343
- logger.warning(
1344
- f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
1345
- )
1346
516
  return schedules
1347
517
 
1348
518
 
@@ -1367,14 +537,14 @@ def workflow_runner(
1367
537
 
1368
538
  The current workflow logic that split to process will be below diagram:
1369
539
 
1370
- PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
1371
- workflow task 01 01
1372
- --> thread of release
1373
- workflow task 01 02
1374
- ==> process 02 ==> schedule 1 minute --> thread of release
1375
- workflow task 02 01
1376
- --> thread of release
1377
- workflow task 02 02
540
+ PIPELINES ==> process 01 ==> schedule --> thread of release
541
+ workflow task 01 01
542
+ --> thread of release
543
+ workflow task 01 02
544
+ ==> process 02 ==> schedule --> thread of release
545
+ workflow task 02 01
546
+ --> thread of release
547
+ workflow task 02 02
1378
548
  ==> ...
1379
549
  """
1380
550
  excluded: list[str] = excluded or []