ddeutil-workflow 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,21 +27,15 @@ import time
27
27
  from concurrent.futures import (
28
28
  Future,
29
29
  ProcessPoolExecutor,
30
- ThreadPoolExecutor,
31
30
  as_completed,
32
31
  )
33
- from dataclasses import field
34
32
  from datetime import datetime, timedelta
35
33
  from functools import wraps
36
- from heapq import heappush
37
- from queue import Queue
38
34
  from textwrap import dedent
39
35
  from threading import Thread
40
36
  from typing import Callable, Optional
41
- from zoneinfo import ZoneInfo
42
37
 
43
38
  from pydantic import BaseModel, Field
44
- from pydantic.dataclasses import dataclass
45
39
  from pydantic.functional_validators import field_validator, model_validator
46
40
  from typing_extensions import Self
47
41
 
@@ -57,21 +51,15 @@ except ImportError: # pragma: no cov
57
51
 
58
52
  from .__cron import CronRunner
59
53
  from .__types import DictData, TupleStr
60
- from .conf import FileLog, Loader, Log, config, get_logger
61
- from .exceptions import JobException, WorkflowException
62
- from .job import Job
54
+ from .conf import Loader, config, get_logger
55
+ from .exceptions import WorkflowException
63
56
  from .on import On
64
57
  from .utils import (
65
- Param,
66
- Result,
67
58
  batch,
68
59
  delay,
69
- gen_id,
70
- get_diff_sec,
71
- has_template,
72
- param2template,
73
60
  queue2str,
74
61
  )
62
+ from .workflow import Workflow, WorkflowTaskData
75
63
 
76
64
  P = ParamSpec("P")
77
65
  logger = get_logger("ddeutil.workflow")
@@ -81,754 +69,15 @@ logging.getLogger("schedule").setLevel(logging.INFO)
81
69
 
82
70
 
83
71
  __all__: TupleStr = (
84
- "Workflow",
85
- "WorkflowTaskData",
86
72
  "Schedule",
87
73
  "ScheduleWorkflow",
88
- "workflow_task",
74
+ "workflow_task_release",
89
75
  "workflow_monitor",
90
76
  "workflow_control",
91
77
  "workflow_runner",
92
78
  )
93
79
 
94
80
 
95
- class Workflow(BaseModel):
96
- """Workflow Pydantic Model this is the main future of this project because
97
- it use to be workflow data for running everywhere that you want or using it
98
- to scheduler task in background. It use lightweight coding line from
99
- Pydantic Model and enhance execute method on it.
100
- """
101
-
102
- name: str = Field(description="A workflow name.")
103
- desc: Optional[str] = Field(
104
- default=None,
105
- description=(
106
- "A workflow description that can be string of markdown content."
107
- ),
108
- )
109
- params: dict[str, Param] = Field(
110
- default_factory=dict,
111
- description="A parameters that need to use on this workflow.",
112
- )
113
- on: list[On] = Field(
114
- default_factory=list,
115
- description="A list of On instance for this workflow schedule.",
116
- )
117
- jobs: dict[str, Job] = Field(
118
- default_factory=dict,
119
- description="A mapping of job ID and job model that already loaded.",
120
- )
121
- run_id: Optional[str] = Field(
122
- default=None,
123
- description=(
124
- "A running workflow ID that is able to change after initialize."
125
- ),
126
- repr=False,
127
- exclude=True,
128
- )
129
-
130
- @property
131
- def new_run_id(self) -> str:
132
- """Running ID of this workflow that always generate new unique value.
133
-
134
- :rtype: str
135
- """
136
- return gen_id(self.name, unique=True)
137
-
138
- @classmethod
139
- def from_loader(
140
- cls,
141
- name: str,
142
- externals: DictData | None = None,
143
- ) -> Self:
144
- """Create Workflow instance from the Loader object that only receive
145
- an input workflow name. The loader object will use this workflow name to
146
- searching configuration data of this workflow model in conf path.
147
-
148
- :param name: A workflow name that want to pass to Loader object.
149
- :param externals: An external parameters that want to pass to Loader
150
- object.
151
- :rtype: Self
152
- """
153
- loader: Loader = Loader(name, externals=(externals or {}))
154
-
155
- # NOTE: Validate the config type match with current connection model
156
- if loader.type != cls:
157
- raise ValueError(f"Type {loader.type} does not match with {cls}")
158
-
159
- loader_data: DictData = copy.deepcopy(loader.data)
160
-
161
- # NOTE: Add name to loader data
162
- loader_data["name"] = name.replace(" ", "_")
163
-
164
- # NOTE: Prepare `on` data
165
- cls.__bypass_on(loader_data)
166
- return cls.model_validate(obj=loader_data)
167
-
168
- @classmethod
169
- def __bypass_on(
170
- cls,
171
- data: DictData,
172
- externals: DictData | None = None,
173
- ) -> DictData:
174
- """Bypass the on data to loaded config data.
175
-
176
- :param data:
177
- :param externals:
178
- :rtype: DictData
179
- """
180
- if on := data.pop("on", []):
181
- if isinstance(on, str):
182
- on = [on]
183
- if any(not isinstance(i, (dict, str)) for i in on):
184
- raise TypeError("The ``on`` key should be list of str or dict")
185
-
186
- # NOTE: Pass on value to Loader and keep on model object to on field
187
- data["on"] = [
188
- (
189
- Loader(n, externals=(externals or {})).data
190
- if isinstance(n, str)
191
- else n
192
- )
193
- for n in on
194
- ]
195
- return data
196
-
197
- @model_validator(mode="before")
198
- def __prepare_model_before__(cls, values: DictData) -> DictData:
199
- """Prepare the params key."""
200
- # NOTE: Prepare params type if it passing with only type value.
201
- if params := values.pop("params", {}):
202
- values["params"] = {
203
- p: (
204
- {"type": params[p]}
205
- if isinstance(params[p], str)
206
- else params[p]
207
- )
208
- for p in params
209
- }
210
- return values
211
-
212
- @field_validator("desc", mode="after")
213
- def __dedent_desc__(cls, value: str) -> str:
214
- """Prepare description string that was created on a template.
215
-
216
- :param value: A description string value that want to dedent.
217
- :rtype: str
218
- """
219
- return dedent(value)
220
-
221
- @field_validator("on", mode="after")
222
- def __on_no_dup__(cls, value: list[On]) -> list[On]:
223
- """Validate the on fields should not contain duplicate values and if it
224
- contain every minute value, it should has only one on value."""
225
- set_ons: set[str] = {str(on.cronjob) for on in value}
226
- if len(set_ons) != len(value):
227
- raise ValueError(
228
- "The on fields should not contain duplicate on value."
229
- )
230
-
231
- # WARNING:
232
- # if '* * * * *' in set_ons and len(set_ons) > 1:
233
- # raise ValueError(
234
- # "If it has every minute cronjob on value, it should has only "
235
- # "one value in the on field."
236
- # )
237
- return value
238
-
239
- @model_validator(mode="after")
240
- def __validate_jobs_need_and_prepare_running_id(self) -> Self:
241
- """Validate each need job in any jobs should exists.
242
-
243
- :rtype: Self
244
- """
245
- for job in self.jobs:
246
- if not_exist := [
247
- need for need in self.jobs[job].needs if need not in self.jobs
248
- ]:
249
- raise WorkflowException(
250
- f"The needed jobs: {not_exist} do not found in "
251
- f"{self.name!r}."
252
- )
253
-
254
- # NOTE: update a job id with its job id from workflow template
255
- self.jobs[job].id = job
256
-
257
- if self.run_id is None:
258
- self.run_id = self.new_run_id
259
-
260
- # VALIDATE: Validate workflow name should not dynamic with params
261
- # template.
262
- if has_template(self.name):
263
- raise ValueError(
264
- f"Workflow name should not has any template, please check, "
265
- f"{self.name!r}."
266
- )
267
-
268
- return self
269
-
270
- def get_running_id(self, run_id: str) -> Self:
271
- """Return Workflow model object that changing workflow running ID with
272
- an input running ID.
273
-
274
- :param run_id: A replace workflow running ID.
275
- :rtype: Self
276
- """
277
- return self.model_copy(update={"run_id": run_id})
278
-
279
- def job(self, name: str) -> Job:
280
- """Return this workflow's job that already created on this job field.
281
-
282
- :param name: A job name that want to get from a mapping of job models.
283
- :type name: str
284
-
285
- :rtype: Job
286
- :return: A job model that exists on this workflow by input name.
287
- """
288
- if name not in self.jobs:
289
- raise ValueError(
290
- f"A Job {name!r} does not exists in this workflow, "
291
- f"{self.name!r}"
292
- )
293
- return self.jobs[name]
294
-
295
- def parameterize(self, params: DictData) -> DictData:
296
- """Prepare a passing parameters before use it in execution process.
297
- This method will validate keys of an incoming params with this object
298
- necessary params field and then create a jobs key to result mapping
299
- that will keep any execution result from its job.
300
-
301
- ... {
302
- ... "params": <an-incoming-params>,
303
- ... "jobs": {}
304
- ... }
305
-
306
- :param params: A parameter mapping that receive from workflow execution.
307
- :type params: DictData
308
-
309
- :raise WorkflowException: If parameter value that want to validate does
310
- not include the necessary parameter that had required flag.
311
-
312
- :rtype: DictData
313
- :return: The parameter value that validate with its parameter fields and
314
- adding jobs key to this parameter.
315
- """
316
- # VALIDATE: Incoming params should have keys that set on this workflow.
317
- if check_key := tuple(
318
- f"{k!r}"
319
- for k in self.params
320
- if (k not in params and self.params[k].required)
321
- ):
322
- raise WorkflowException(
323
- f"Required Param on this workflow setting does not set: "
324
- f"{', '.join(check_key)}."
325
- )
326
-
327
- # NOTE: Mapping type of param before adding it to the ``params`` key.
328
- return {
329
- "params": (
330
- params
331
- | {
332
- k: self.params[k].receive(params[k])
333
- for k in params
334
- if k in self.params
335
- }
336
- ),
337
- "jobs": {},
338
- }
339
-
340
- def release(
341
- self,
342
- runner: CronRunner,
343
- params: DictData,
344
- queue: list[datetime],
345
- *,
346
- waiting_sec: int = 60,
347
- sleep_interval: int = 15,
348
- log: Log = None,
349
- ) -> Result:
350
- """Start running workflow with the on schedule in period of 30 minutes.
351
- That mean it will still running at background 30 minutes until the
352
- schedule matching with its time.
353
-
354
- This method allow workflow use log object to save the execution
355
- result to log destination like file log to local `/logs` directory.
356
-
357
- I will add sleep with 0.15 seconds on every step that interact with
358
- the queue object.
359
-
360
- :param runner: A CronRunner instance.
361
- :param params: A workflow parameter that pass to execute method.
362
- :param queue: A list of release time that already running.
363
- :param waiting_sec: A second period value that allow workflow execute.
364
- :param sleep_interval: A second value that want to waiting until time
365
- to execute.
366
- :param log: A log object that want to save execution result.
367
-
368
- :rtype: Result
369
- """
370
- logger.debug(
371
- f"({self.run_id}) [CORE]: {self.name!r}: {runner.cron} : run with "
372
- f"queue id: {id(queue)}"
373
- )
374
- log: Log = log or FileLog
375
- cron_tz: ZoneInfo = runner.tz
376
-
377
- # NOTE: get next schedule time that generate from now.
378
- next_time: datetime = runner.next
379
-
380
- # NOTE: While-loop to getting next until it does not logger.
381
- while log.is_pointed(self.name, next_time) or (next_time in queue):
382
- next_time: datetime = runner.next
383
-
384
- # NOTE: Heap-push this next running time to log queue list.
385
- heappush(queue, next_time)
386
- time.sleep(0.15)
387
-
388
- # VALIDATE: Check the different time between the next schedule time and
389
- # now that less than waiting period (second unit).
390
- if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
391
- logger.debug(
392
- f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
393
- f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
394
- )
395
-
396
- # NOTE: Remove next datetime from queue.
397
- queue.remove(next_time)
398
-
399
- time.sleep(0.15)
400
- return Result(
401
- status=0,
402
- context={
403
- "params": params,
404
- "release": {
405
- "status": "skipped",
406
- "cron": [str(runner.cron)],
407
- },
408
- },
409
- )
410
-
411
- logger.debug(
412
- f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
413
- f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
414
- )
415
-
416
- # NOTE: Release when the time is nearly to schedule time.
417
- while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
418
- sleep_interval + 5
419
- ): # pragma: no cov
420
- logger.debug(
421
- f"({self.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
422
- f"Sleep until: {duration}"
423
- )
424
- time.sleep(sleep_interval)
425
-
426
- time.sleep(0.15)
427
-
428
- # NOTE: Release parameter that use to change if params has templating.
429
- release_params: DictData = {"release": {"logical_date": next_time}}
430
-
431
- # WARNING: Re-create workflow object that use new running workflow ID.
432
- workflow: Self = self.get_running_id(run_id=self.new_run_id)
433
- rs: Result = workflow.execute(
434
- params=param2template(params, release_params),
435
- )
436
- logger.debug(
437
- f"({workflow.run_id}) [CORE]: {self.name!r} : {runner.cron} : "
438
- f"End release {next_time:%Y-%m-%d %H:%M:%S}"
439
- )
440
-
441
- # NOTE: Delete a copied workflow instance for saving memory.
442
- del workflow
443
-
444
- rs.set_parent_run_id(self.run_id)
445
- rs_log: Log = log.model_validate(
446
- {
447
- "name": self.name,
448
- "on": str(runner.cron),
449
- "release": next_time,
450
- "context": rs.context,
451
- "parent_run_id": rs.run_id,
452
- "run_id": rs.run_id,
453
- }
454
- )
455
- # NOTE: Saving execution result to destination of the input log object.
456
- rs_log.save(excluded=None)
457
-
458
- queue.remove(next_time)
459
- time.sleep(0.15)
460
- return Result(
461
- status=0,
462
- context={
463
- "params": params,
464
- "release": {"status": "run", "cron": [str(runner.cron)]},
465
- },
466
- )
467
-
468
- def poke(
469
- self,
470
- start_date: datetime | None = None,
471
- params: DictData | None = None,
472
- *,
473
- log: Log | None = None,
474
- ) -> list[Result]:
475
- """Poke workflow with the ``on`` field with threading executor pool for
476
- executing with all its schedules that was set on the `on` value.
477
- This method will observe its schedule that nearing to run with the
478
- ``self.release()`` method.
479
-
480
- :param start_date: A start datetime object.
481
- :param params: A parameters that want to pass to the release method.
482
- :param log: A log object that want to use on this poking process.
483
-
484
- :rtype: list[Result]
485
- """
486
- logger.info(
487
- f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
488
- )
489
-
490
- # NOTE: If this workflow does not set the on schedule, it will return
491
- # empty result.
492
- if len(self.on) == 0:
493
- return []
494
-
495
- params: DictData = params or {}
496
- queue: list[datetime] = []
497
- results: list[Result] = []
498
-
499
- start_date: datetime = start_date or datetime.now(tz=config.tz).replace(
500
- second=0, microsecond=0
501
- ) + timedelta(seconds=1)
502
-
503
- with ThreadPoolExecutor(
504
- max_workers=config.max_poking_pool_worker,
505
- thread_name_prefix="wf_poking_",
506
- ) as executor:
507
-
508
- futures: list[Future] = []
509
-
510
- # NOTE: For-loop the on values that exists in this workflow object.
511
- for on in self.on:
512
- futures.append(
513
- executor.submit(
514
- self.release,
515
- on.generate(start_date),
516
- params=params,
517
- log=log,
518
- queue=queue,
519
- )
520
- )
521
-
522
- # NOTE: Delay release date because it run so fast and making
523
- # queue object can not handle release date that will duplicate
524
- # by the cron runner object.
525
- delay(second=0.15)
526
-
527
- # WARNING: This poking method does not allow to use fail-fast logic
528
- # to catching parallel execution result.
529
- for future in as_completed(futures):
530
- results.append(future.result(timeout=60))
531
-
532
- if len(queue) > 0: # pragma: no cov
533
- logger.error(
534
- f"({self.run_id}) [POKING]: Log Queue does empty when poking "
535
- f"process was finishing."
536
- )
537
-
538
- return results
539
-
540
- def execute_job(
541
- self,
542
- job_id: str,
543
- params: DictData,
544
- *,
545
- raise_error: bool = True,
546
- ) -> Result:
547
- """Workflow Job execution with passing dynamic parameters from the
548
- workflow execution to the target job.
549
-
550
- This execution is the minimum level of execution of this workflow
551
- model. It different with ``self.execute`` because this method run only
552
- one job and return with context of this job data.
553
-
554
- :param job_id: A job ID that want to execute.
555
- :param params: A params that was parameterized from workflow execution.
556
- :param raise_error: A flag that raise error instead catching to result
557
- if it get exception from job execution.
558
- :rtype: Result
559
- """
560
- # VALIDATE: check a job ID that exists in this workflow or not.
561
- if job_id not in self.jobs:
562
- raise WorkflowException(
563
- f"The job ID: {job_id} does not exists in {self.name!r} "
564
- f"workflow."
565
- )
566
-
567
- logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
568
-
569
- # IMPORTANT:
570
- # Change any job running IDs to this workflow running ID.
571
- #
572
- try:
573
- job: Job = self.jobs[job_id].get_running_id(self.run_id)
574
- job.set_outputs(
575
- job.execute(params=params).context,
576
- to=params,
577
- )
578
- except JobException as err:
579
- logger.error(
580
- f"({self.run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
581
- )
582
- if raise_error:
583
- raise WorkflowException(
584
- f"Get job execution error {job_id}: JobException: {err}"
585
- ) from None
586
- else:
587
- raise NotImplementedError() from None
588
-
589
- return Result(status=0, context=params)
590
-
591
- def execute(
592
- self,
593
- params: DictData | None = None,
594
- *,
595
- timeout: int = 60,
596
- ) -> Result:
597
- """Execute workflow with passing a dynamic parameters to all jobs that
598
- included in this workflow model with ``jobs`` field.
599
-
600
- The result of execution process for each jobs and stages on this
601
- workflow will keeping in dict which able to catch out with all jobs and
602
- stages by dot annotation.
603
-
604
- For example, when I want to use the output from previous stage, I
605
- can access it with syntax:
606
-
607
- ... ${job-name}.stages.${stage-id}.outputs.${key}
608
-
609
- :param params: An input parameters that use on workflow execution that
610
- will parameterize before using it. Default is None.
611
- :type params: DictData | None
612
- :param timeout: A workflow execution time out in second unit that use
613
- for limit time of execution and waiting job dependency. Default is
614
- 60 seconds.
615
- :type timeout: int
616
- :rtype: Result
617
- """
618
- logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
619
-
620
- # NOTE: I use this condition because this method allow passing empty
621
- # params and I do not want to create new dict object.
622
- params: DictData = {} if params is None else params
623
- ts: float = time.monotonic()
624
- rs: Result = Result()
625
-
626
- # NOTE: It should not do anything if it does not have job.
627
- if not self.jobs:
628
- logger.warning(
629
- f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
630
- f"does not have any jobs"
631
- )
632
- return rs.catch(status=0, context=params)
633
-
634
- # NOTE: Create a job queue that keep the job that want to running after
635
- # it dependency condition.
636
- jq: Queue = Queue()
637
- for job_id in self.jobs:
638
- jq.put(job_id)
639
-
640
- # NOTE: Create data context that will pass to any job executions
641
- # on this workflow.
642
- #
643
- # {
644
- # 'params': <input-params>,
645
- # 'jobs': {},
646
- # }
647
- #
648
- context: DictData = self.parameterize(params)
649
- status: int = 0
650
- try:
651
- if config.max_job_parallel == 1:
652
- self.__exec_non_threading(
653
- context=context,
654
- ts=ts,
655
- job_queue=jq,
656
- timeout=timeout,
657
- )
658
- else:
659
- self.__exec_threading(
660
- context=context,
661
- ts=ts,
662
- job_queue=jq,
663
- worker=config.max_job_parallel,
664
- timeout=timeout,
665
- )
666
- except WorkflowException as err:
667
- context.update(
668
- {
669
- "error": err,
670
- "error_message": f"{err.__class__.__name__}: {err}",
671
- },
672
- )
673
- status = 1
674
- return rs.catch(status=status, context=context)
675
-
676
- def __exec_threading(
677
- self,
678
- context: DictData,
679
- ts: float,
680
- job_queue: Queue,
681
- *,
682
- worker: int = 2,
683
- timeout: int = 600,
684
- ) -> DictData:
685
- """Workflow execution by threading strategy.
686
-
687
- If a job need dependency, it will check dependency job ID from
688
- context data before allow it run.
689
-
690
- :param context: A context workflow data that want to downstream passing.
691
- :param ts: A start timestamp that use for checking execute time should
692
- timeout.
693
- :param job_queue: A job queue object.
694
- :param timeout: A second value unit that bounding running time.
695
- :param worker: A number of threading executor pool size.
696
- :rtype: DictData
697
- """
698
- not_time_out_flag: bool = True
699
- logger.debug(
700
- f"({self.run_id}): [CORE]: Run {self.name} with threading job "
701
- f"executor"
702
- )
703
-
704
- # IMPORTANT: The job execution can run parallel and waiting by
705
- # needed.
706
- with ThreadPoolExecutor(max_workers=worker) as executor:
707
- futures: list[Future] = []
708
-
709
- while not job_queue.empty() and (
710
- not_time_out_flag := ((time.monotonic() - ts) < timeout)
711
- ):
712
- job_id: str = job_queue.get()
713
- job: Job = self.jobs[job_id]
714
-
715
- if any(need not in context["jobs"] for need in job.needs):
716
- job_queue.task_done()
717
- job_queue.put(job_id)
718
- time.sleep(0.25)
719
- continue
720
-
721
- # NOTE: Start workflow job execution with deep copy context data
722
- # before release.
723
- #
724
- # {
725
- # 'params': <input-params>,
726
- # 'jobs': {},
727
- # }
728
- futures.append(
729
- executor.submit(
730
- self.execute_job,
731
- job_id,
732
- params=context,
733
- ),
734
- )
735
-
736
- # NOTE: Mark this job queue done.
737
- job_queue.task_done()
738
-
739
- # NOTE: Wait for all items to finish processing
740
- job_queue.join()
741
-
742
- for future in as_completed(futures, timeout=1800):
743
- if err := future.exception():
744
- logger.error(f"({self.run_id}) [CORE]: {err}")
745
- raise WorkflowException(f"{err}")
746
- try:
747
- future.result(timeout=60)
748
- except TimeoutError as err: # pragma: no cove
749
- raise WorkflowException(
750
- "Timeout when getting result from future"
751
- ) from err
752
-
753
- if not_time_out_flag:
754
- return context
755
-
756
- # NOTE: Raise timeout error.
757
- logger.warning( # pragma: no cov
758
- f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
759
- f", was timeout"
760
- )
761
- raise WorkflowException( # pragma: no cov
762
- f"Execution of workflow: {self.name} was timeout"
763
- )
764
-
765
- def __exec_non_threading(
766
- self,
767
- context: DictData,
768
- ts: float,
769
- job_queue: Queue,
770
- *,
771
- timeout: int = 600,
772
- ) -> DictData:
773
- """Workflow execution with non-threading strategy that use sequential
774
- job running and waiting previous job was run successful.
775
-
776
- If a job need dependency, it will check dependency job ID from
777
- context data before allow it run.
778
-
779
- :param context: A context workflow data that want to downstream passing.
780
- :param ts: A start timestamp that use for checking execute time should
781
- timeout.
782
- :param timeout: A second value unit that bounding running time.
783
- :rtype: DictData
784
- """
785
- not_time_out_flag: bool = True
786
- logger.debug(
787
- f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
788
- f"executor"
789
- )
790
-
791
- while not job_queue.empty() and (
792
- not_time_out_flag := ((time.monotonic() - ts) < timeout)
793
- ):
794
- job_id: str = job_queue.get()
795
- job: Job = self.jobs[job_id]
796
-
797
- # NOTE: Waiting dependency job run successful before release.
798
- if any(need not in context["jobs"] for need in job.needs):
799
- job_queue.task_done()
800
- job_queue.put(job_id)
801
- time.sleep(0.05)
802
- continue
803
-
804
- # NOTE: Start workflow job execution with deep copy context data
805
- # before release. This job execution process will running until
806
- # done before checking all execution timeout or not.
807
- #
808
- # {
809
- # 'params': <input-params>,
810
- # 'jobs': {},
811
- # }
812
- self.execute_job(job_id=job_id, params=context)
813
-
814
- # NOTE: Mark this job queue done.
815
- job_queue.task_done()
816
-
817
- # NOTE: Wait for all items to finish processing
818
- job_queue.join()
819
-
820
- if not_time_out_flag:
821
- return context
822
-
823
- # NOTE: Raise timeout error.
824
- logger.warning( # pragma: no cov
825
- f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
826
- )
827
- raise WorkflowException( # pragma: no cov
828
- f"Execution of workflow: {self.name} was timeout"
829
- )
830
-
831
-
832
81
  class ScheduleWorkflow(BaseModel):
833
82
  """Schedule Workflow Pydantic model that use to keep workflow model for the
834
83
  Schedule model. it should not use Workflow model directly because on the
@@ -836,6 +85,10 @@ class ScheduleWorkflow(BaseModel):
836
85
  model.
837
86
  """
838
87
 
88
+ alias: Optional[str] = Field(
89
+ default=None,
90
+ description="An alias name of workflow.",
91
+ )
839
92
  name: str = Field(description="A workflow name.")
840
93
  on: list[On] = Field(
841
94
  default_factory=list,
@@ -854,19 +107,17 @@ class ScheduleWorkflow(BaseModel):
854
107
  """
855
108
  values["name"] = values["name"].replace(" ", "_")
856
109
 
110
+ if not values.get("alias"):
111
+ values["alias"] = values["name"]
112
+
857
113
  cls.__bypass_on(values)
858
114
  return values
859
115
 
860
116
  @classmethod
861
- def __bypass_on(
862
- cls,
863
- data: DictData,
864
- externals: DictData | None = None,
865
- ) -> DictData:
117
+ def __bypass_on(cls, data: DictData) -> DictData:
866
118
  """Bypass and prepare the on data to loaded config data.
867
119
 
868
- :param data:
869
- :param externals:
120
+ :param data: A data that want to validate for model initialization.
870
121
 
871
122
  :rtype: DictData
872
123
  """
@@ -881,11 +132,7 @@ class ScheduleWorkflow(BaseModel):
881
132
  # NOTE: Pass on value to Loader and keep on model object to on
882
133
  # field.
883
134
  data["on"] = [
884
- (
885
- Loader(n, externals=(externals or {})).data
886
- if isinstance(n, str)
887
- else n
888
- )
135
+ Loader(n, externals={}).data if isinstance(n, str) else n
889
136
  for n in on
890
137
  ]
891
138
  return data
@@ -893,7 +140,10 @@ class ScheduleWorkflow(BaseModel):
893
140
  @field_validator("on", mode="after")
894
141
  def __on_no_dup__(cls, value: list[On]) -> list[On]:
895
142
  """Validate the on fields should not contain duplicate values and if it
896
- contain every minute value, it should has only one on value."""
143
+ contain every minute value, it should has only one on value.
144
+
145
+ :rtype: list[On]
146
+ """
897
147
  set_ons: set[str] = {str(on.cronjob) for on in value}
898
148
  if len(set_ons) != len(value):
899
149
  raise ValueError(
@@ -948,6 +198,7 @@ class Schedule(BaseModel):
948
198
  :param name: A schedule name that want to pass to Loader object.
949
199
  :param externals: An external parameters that want to pass to Loader
950
200
  object.
201
+
951
202
  :rtype: Self
952
203
  """
953
204
  loader: Loader = Loader(name, externals=(externals or {}))
@@ -967,7 +218,6 @@ class Schedule(BaseModel):
967
218
  self,
968
219
  start_date: datetime,
969
220
  queue: dict[str, list[datetime]],
970
- running: dict[str, list[datetime]],
971
221
  *,
972
222
  externals: DictData | None = None,
973
223
  ) -> list[WorkflowTaskData]:
@@ -976,7 +226,6 @@ class Schedule(BaseModel):
976
226
 
977
227
  :param start_date: A start date that get from the workflow schedule.
978
228
  :param queue: A mapping of name and list of datetime for queue.
979
- :param running: A mapping of name and list of datetime for running.
980
229
  :param externals: An external parameters that pass to the Loader object.
981
230
 
982
231
  :rtype: list[WorkflowTaskData]
@@ -989,33 +238,32 @@ class Schedule(BaseModel):
989
238
  extras: DictData = externals or {}
990
239
 
991
240
  for sch_wf in self.workflows:
241
+
992
242
  wf: Workflow = Workflow.from_loader(sch_wf.name, externals=extras)
993
243
 
994
244
  # NOTE: Create default list of release datetime.
995
- queue[sch_wf.name]: list[datetime] = []
996
- running[sch_wf.name]: list[datetime] = []
245
+ if sch_wf.alias not in queue:
246
+ queue[sch_wf.alias]: list[datetime] = []
997
247
 
998
248
  # IMPORTANT: Create the default 'on' value if it does not passing
999
249
  # the on field to the Schedule object.
1000
250
  ons: list[On] = wf.on.copy() if len(sch_wf.on) == 0 else sch_wf.on
1001
251
 
1002
252
  for on in ons:
1003
- gen: CronRunner = on.generate(start_date)
1004
- next_running_date = gen.next
1005
253
 
1006
- while next_running_date in queue[sch_wf.name]:
1007
- next_running_date = gen.next
254
+ # NOTE: Create CronRunner instance from the start_date param.
255
+ runner: CronRunner = on.generate(start_date)
256
+ next_running_date = runner.next
1008
257
 
1009
- # NOTE: Push the next running date to queue list.
1010
- heappush(queue[sch_wf.name], next_running_date)
258
+ while next_running_date in queue[sch_wf.alias]:
259
+ next_running_date = runner.next
1011
260
 
1012
261
  workflow_tasks.append(
1013
262
  WorkflowTaskData(
263
+ alias=sch_wf.alias,
1014
264
  workflow=wf,
1015
- on=on,
265
+ runner=runner,
1016
266
  params=sch_wf.params,
1017
- queue=queue[sch_wf.name],
1018
- running=running[sch_wf.name],
1019
267
  ),
1020
268
  )
1021
269
 
@@ -1036,10 +284,10 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
1036
284
  :rtype: DecoratorCancelJob
1037
285
  """
1038
286
 
1039
- def decorator(func: ReturnCancelJob) -> ReturnCancelJob:
287
+ def decorator(func: ReturnCancelJob) -> ReturnCancelJob: # pragma: no cov
1040
288
  try:
1041
289
  # NOTE: Check the function that want to handle is method or not.
1042
- if inspect.ismethod(func): # pragma: no cov
290
+ if inspect.ismethod(func):
1043
291
 
1044
292
  @wraps(func)
1045
293
  def wrapper(self, *args, **kwargs):
@@ -1053,7 +301,7 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
1053
301
 
1054
302
  return wrapper
1055
303
 
1056
- except Exception as err: # pragma: no cov
304
+ except Exception as err:
1057
305
  logger.exception(err)
1058
306
  if cancel_on_failure:
1059
307
  return CancelJob
@@ -1062,160 +310,12 @@ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
1062
310
  return decorator
1063
311
 
1064
312
 
1065
- @dataclass(frozen=True)
1066
- class WorkflowTaskData:
1067
- """Workflow task dataclass that use to keep mapping data and objects for
1068
- passing in multithreading task.
1069
-
1070
- This dataclass will be 1-1 mapping with workflow and on objects.
1071
- """
1072
-
1073
- workflow: Workflow
1074
- on: On
1075
- params: DictData = field(compare=False, hash=False)
1076
- queue: list[datetime] = field(compare=False, hash=False)
1077
- running: list[datetime] = field(compare=False, hash=False)
1078
-
1079
- @catch_exceptions(cancel_on_failure=True)
1080
- def release(
1081
- self,
1082
- log: Log | None = None,
1083
- *,
1084
- waiting_sec: int = 60,
1085
- sleep_interval: int = 15,
1086
- ) -> None: # pragma: no cov
1087
- """Workflow task release that use the same logic of `workflow.release`
1088
- method.
1089
-
1090
- :param log: A log object for saving result logging from workflow
1091
- execution process.
1092
- :param waiting_sec: A second period value that allow workflow execute.
1093
- :param sleep_interval: A second value that want to waiting until time
1094
- to execute.
1095
- """
1096
- log: Log = log or FileLog
1097
- wf: Workflow = self.workflow
1098
- on: On = self.on
1099
-
1100
- gen: CronRunner = on.generate(
1101
- datetime.now(tz=config.tz).replace(second=0, microsecond=0)
1102
- )
1103
- cron_tz: ZoneInfo = gen.tz
1104
-
1105
- # NOTE: get next schedule time that generate from now.
1106
- next_time: datetime = gen.next
1107
-
1108
- # NOTE: get next utils it does not running.
1109
- while log.is_pointed(wf.name, next_time) or (next_time in self.running):
1110
- next_time: datetime = gen.next
1111
-
1112
- logger.debug(
1113
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1114
- f"{next_time:%Y-%m-%d %H:%M:%S}"
1115
- )
1116
- heappush(self.running, next_time)
1117
-
1118
- if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
1119
- logger.debug(
1120
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
1121
- f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
1122
- )
1123
-
1124
- # NOTE: Add this next running datetime that not in period to queue
1125
- # and remove it to running.
1126
- self.running.remove(next_time)
1127
- heappush(self.queue, next_time)
1128
-
1129
- time.sleep(0.2)
1130
- return
1131
-
1132
- logger.debug(
1133
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1134
- f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
1135
- )
1136
-
1137
- # NOTE: Release when the time is nearly to schedule time.
1138
- while (duration := get_diff_sec(next_time, tz=config.tz)) > (
1139
- sleep_interval + 5
1140
- ):
1141
- logger.debug(
1142
- f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
1143
- f": Sleep until: {duration}"
1144
- )
1145
- time.sleep(15)
1146
-
1147
- time.sleep(0.5)
1148
-
1149
- # NOTE: Release parameter that use to change if params has
1150
- # templating.
1151
- release_params: DictData = {
1152
- "release": {
1153
- "logical_date": next_time,
1154
- },
1155
- }
1156
-
1157
- # WARNING:
1158
- # Re-create workflow object that use new running workflow ID.
1159
- #
1160
- runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
1161
- rs: Result = runner.execute(
1162
- params=param2template(self.params, release_params),
1163
- )
1164
- logger.debug(
1165
- f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
1166
- f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
1167
- )
1168
-
1169
- del runner
1170
-
1171
- # NOTE: Set parent ID on this result.
1172
- rs.set_parent_run_id(wf.run_id)
1173
-
1174
- # NOTE: Save result to log object saving.
1175
- rs_log: Log = log.model_validate(
1176
- {
1177
- "name": wf.name,
1178
- "on": str(on.cronjob),
1179
- "release": next_time,
1180
- "context": rs.context,
1181
- "parent_run_id": rs.run_id,
1182
- "run_id": rs.run_id,
1183
- }
1184
- )
1185
- rs_log.save(excluded=None)
1186
-
1187
- # NOTE: remove this release date from running
1188
- self.running.remove(next_time)
1189
-
1190
- # IMPORTANT:
1191
- # Add the next running datetime to workflow queue
1192
- finish_time: datetime = datetime.now(tz=cron_tz).replace(
1193
- second=0, microsecond=0
1194
- )
1195
- future_running_time: datetime = gen.next
1196
- while (
1197
- future_running_time in self.running
1198
- or future_running_time in self.queue
1199
- or future_running_time < finish_time
1200
- ): # pragma: no cov
1201
- future_running_time: datetime = gen.next
1202
-
1203
- heappush(self.queue, future_running_time)
1204
- logger.debug(f"[CORE]: {'-' * 100}")
1205
-
1206
- def __eq__(self, other) -> bool:
1207
- if isinstance(other, WorkflowTaskData):
1208
- return (
1209
- self.workflow.name == other.workflow.name
1210
- and self.on.cronjob == other.on.cronjob
1211
- )
1212
- return NotImplemented
1213
-
1214
-
1215
313
  @catch_exceptions(cancel_on_failure=True) # pragma: no cov
1216
- def workflow_task(
314
+ def workflow_task_release(
1217
315
  workflow_tasks: list[WorkflowTaskData],
1218
316
  stop: datetime,
317
+ queue,
318
+ running,
1219
319
  threads: dict[str, Thread],
1220
320
  ) -> CancelJob | None:
1221
321
  """Workflow task generator that create release pair of workflow and on to
@@ -1225,13 +325,14 @@ def workflow_task(
1225
325
 
1226
326
  :param workflow_tasks:
1227
327
  :param stop: A stop datetime object that force stop running scheduler.
328
+ :param queue:
329
+ :param running:
1228
330
  :param threads:
1229
331
  :rtype: CancelJob | None
1230
332
  """
1231
- start_date: datetime = datetime.now(tz=config.tz)
1232
- start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
333
+ current_date: datetime = datetime.now(tz=config.tz)
1233
334
 
1234
- if start_date > stop.replace(tzinfo=config.tz):
335
+ if current_date > stop.replace(tzinfo=config.tz):
1235
336
  logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
1236
337
  while len(threads) > 0:
1237
338
  logger.warning(
@@ -1259,45 +360,43 @@ def workflow_task(
1259
360
  # NOTE: Get incoming datetime queue.
1260
361
  logger.debug(
1261
362
  f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
1262
- f"{list(queue2str(task.queue[task.workflow.name]))}"
363
+ f"{list(queue2str(queue[task.alias]))}"
1263
364
  )
1264
365
 
1265
- # NOTE: Create minute unit value for any scheduler datetime that
1266
- # checking a workflow task should run in this datetime.
1267
- current_running_time: datetime = start_date_minute.astimezone(
1268
- tz=ZoneInfo(task.on.tz)
1269
- )
1270
366
  if (
1271
- len(task.queue[task.workflow.name]) > 0
1272
- and current_running_time != task.queue[task.workflow.name][0]
1273
- ) or (
1274
- task.on.next(current_running_time)
1275
- != task.queue[task.workflow.name][0]
367
+ len(queue[task.alias]) > 0
368
+ and task.runner.date != queue[task.alias][0]
1276
369
  ):
1277
370
  logger.debug(
1278
371
  f"[WORKFLOW]: Skip schedule "
1279
- f"{current_running_time:%Y-%m-%d %H:%M:%S} "
1280
- f"for : {task.workflow.name!r} : {task.on.cronjob}"
372
+ f"{task.runner.date:%Y-%m-%d %H:%M:%S} "
373
+ f"for : {task.workflow.name!r} : {task.runner.cron}"
1281
374
  )
1282
375
  continue
1283
- elif len(task.queue[task.workflow.name]) == 0:
376
+
377
+ elif len(queue[task.alias]) == 0:
1284
378
  logger.warning(
1285
379
  f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
1286
- f"{task.on.cronjob}"
380
+ f"{task.runner.cron}"
1287
381
  )
1288
382
  continue
1289
383
 
1290
384
  # NOTE: Remove this datetime from queue.
1291
- task.queue[task.workflow.name].pop(0)
385
+ queue[task.alias].pop(0)
1292
386
 
1293
387
  # NOTE: Create thread name that able to tracking with observe schedule
1294
388
  # job.
1295
389
  thread_name: str = (
1296
- f"{task.workflow.name}|{str(task.on.cronjob)}|"
1297
- f"{current_running_time:%Y%m%d%H%M}"
390
+ f"{task.workflow.name}|{str(task.runner.cron)}|"
391
+ f"{task.runner.date:%Y%m%d%H%M}"
1298
392
  )
393
+
1299
394
  wf_thread: Thread = Thread(
1300
- target=task.release,
395
+ target=catch_exceptions(cancel_on_failure=True)(task.release),
396
+ kwargs={
397
+ "queue": queue,
398
+ "running": running,
399
+ },
1301
400
  name=thread_name,
1302
401
  daemon=True,
1303
402
  )
@@ -1356,7 +455,6 @@ def workflow_control(
1356
455
  # {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
1357
456
  #
1358
457
  wf_queue: dict[str, list[datetime]] = {}
1359
- wf_running: dict[str, list[datetime]] = {}
1360
458
  thread_releases: dict[str, Thread] = {}
1361
459
 
1362
460
  start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
@@ -1373,7 +471,6 @@ def workflow_control(
1373
471
  schedule.tasks(
1374
472
  start_date_waiting,
1375
473
  queue=wf_queue,
1376
- running=wf_running,
1377
474
  externals=externals,
1378
475
  ),
1379
476
  )
@@ -1383,9 +480,10 @@ def workflow_control(
1383
480
  scheduler.every(1)
1384
481
  .minutes.at(":02")
1385
482
  .do(
1386
- workflow_task,
483
+ workflow_task_release,
1387
484
  workflow_tasks=workflow_tasks,
1388
485
  stop=(stop or (start_date + config.stop_boundary_delta)),
486
+ queue=wf_queue,
1389
487
  threads=thread_releases,
1390
488
  )
1391
489
  .tag("control")
@@ -1402,6 +500,8 @@ def workflow_control(
1402
500
  while True:
1403
501
  scheduler.run_pending()
1404
502
  time.sleep(1)
503
+
504
+ # NOTE: Break the scheduler when the control job does not exists.
1405
505
  if not scheduler.get_jobs("control"):
1406
506
  scheduler.clear("monitor")
1407
507
  logger.warning(
@@ -1413,9 +513,6 @@ def workflow_control(
1413
513
  logger.warning(
1414
514
  f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
1415
515
  )
1416
- logger.warning(
1417
- f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
1418
- )
1419
516
  return schedules
1420
517
 
1421
518