ddeutil-workflow 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1132 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ """The main schedule running is ``workflow_runner`` function that trigger the
7
+ multiprocess of ``workflow_control`` function for listing schedules on the
8
+ config by ``Loader.finds(Schedule)``.
9
+
10
+ The ``workflow_control`` is the scheduler function that release 2 schedule
11
+ functions; ``workflow_task``, and ``workflow_monitor``.
12
+
13
+ ``workflow_control`` --- Every minute at :02 --> ``workflow_task``
14
+
15
+ --- Every 5 minutes --> ``workflow_monitor``
16
+
17
+ The ``workflow_task`` will run ``task.release`` method in threading object
18
+ for multithreading strategy. This ``release`` method will run only one crontab
19
+ value with the on field.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import copy
24
+ import time
25
+ from concurrent.futures import (
26
+ Future,
27
+ ThreadPoolExecutor,
28
+ as_completed,
29
+ )
30
+ from dataclasses import field
31
+ from datetime import datetime, timedelta
32
+ from functools import total_ordering
33
+ from heapq import heappop, heappush
34
+ from queue import Queue
35
+ from textwrap import dedent
36
+ from typing import Optional
37
+
38
+ from pydantic import BaseModel, ConfigDict, Field
39
+ from pydantic.dataclasses import dataclass
40
+ from pydantic.functional_validators import field_validator, model_validator
41
+ from typing_extensions import Self
42
+
43
+ from .__cron import CronJob, CronRunner
44
+ from .__types import DictData, TupleStr
45
+ from .conf import FileLog, Loader, Log, config, get_logger
46
+ from .exceptions import JobException, WorkflowException
47
+ from .job import Job
48
+ from .on import On
49
+ from .utils import (
50
+ Param,
51
+ Result,
52
+ delay,
53
+ gen_id,
54
+ get_diff_sec,
55
+ get_dt_now,
56
+ has_template,
57
+ param2template,
58
+ )
59
+
60
+ logger = get_logger("ddeutil.workflow")
61
+
62
+ __all__: TupleStr = (
63
+ "Workflow",
64
+ "WorkflowRelease",
65
+ "WorkflowQueue",
66
+ "WorkflowTaskData",
67
+ )
68
+
69
+
70
+ @total_ordering
71
+ @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
72
+ class WorkflowRelease:
73
+ """Workflow release Pydantic dataclass object."""
74
+
75
+ date: datetime
76
+ offset: float
77
+ end_date: datetime
78
+ runner: CronRunner
79
+ type: str
80
+
81
+ def __repr__(self) -> str:
82
+ return repr(f"{self.date:%Y-%m-%d %H:%M:%S}")
83
+
84
+ def __str__(self) -> str:
85
+ return f"{self.date:%Y-%m-%d %H:%M:%S}"
86
+
87
+ @classmethod
88
+ def from_dt(cls, dt: datetime) -> Self:
89
+ """Construct WorkflowRelease via datetime object only.
90
+
91
+ :param dt: A datetime object.
92
+
93
+ :rtype: Self
94
+ """
95
+ return cls(
96
+ date=dt,
97
+ offset=0,
98
+ end_date=dt + timedelta(days=1),
99
+ runner=CronJob("* * * * *").schedule(dt.replace(tzinfo=config.tz)),
100
+ type="manual",
101
+ )
102
+
103
+ def __eq__(self, other: WorkflowRelease | datetime) -> bool:
104
+ """Override equal property that will compare only the same type or
105
+ datetime.
106
+ """
107
+ if isinstance(other, self.__class__):
108
+ return self.date == other.date
109
+ elif isinstance(other, datetime):
110
+ return self.date == other
111
+ return NotImplemented
112
+
113
+ def __lt__(self, other: WorkflowRelease | datetime) -> bool:
114
+ """Override equal property that will compare only the same type or
115
+ datetime.
116
+ """
117
+ if isinstance(other, self.__class__):
118
+ return self.date < other.date
119
+ elif isinstance(other, datetime):
120
+ return self.date < other
121
+ return NotImplemented
122
+
123
+
124
+ @dataclass
125
+ class WorkflowQueue:
126
+ """Workflow Queue object."""
127
+
128
+ queue: list[WorkflowRelease] = field(default_factory=list)
129
+ running: list[WorkflowRelease] = field(default_factory=list)
130
+ complete: list[WorkflowRelease] = field(default_factory=list)
131
+
132
+ @property
133
+ def is_queued(self) -> bool:
134
+ """Return True if it has workflow release object in the queue.
135
+
136
+ :rtype: bool
137
+ """
138
+ return len(self.queue) > 0
139
+
140
+ def check_queue(self, data: WorkflowRelease) -> bool:
141
+ """Check a WorkflowRelease value already exists in list of tracking
142
+ queues.
143
+
144
+ :param data: A workflow release object.
145
+
146
+ :rtype: bool
147
+ """
148
+ return (
149
+ (data in self.queue)
150
+ or (data in self.running)
151
+ or (data in self.complete)
152
+ )
153
+
154
+ def push_queue(self, data: WorkflowRelease) -> Self:
155
+ """Push data to the queue."""
156
+ heappush(self.queue, data)
157
+ return self
158
+
159
+ def push_running(self, data: WorkflowRelease) -> Self:
160
+ """Push data to the running."""
161
+ heappush(self.running, data)
162
+ return self
163
+
164
+ def remove_running(self, data: WorkflowRelease) -> Self:
165
+ """Remove data on the running if it exists."""
166
+ if data in self.running:
167
+ self.running.remove(data)
168
+
169
+
170
+ class Workflow(BaseModel):
171
+ """Workflow Pydantic model.
172
+
173
+ This is the main future of this project because it use to be workflow
174
+ data for running everywhere that you want or using it to scheduler task in
175
+ background. It use lightweight coding line from Pydantic Model and enhance
176
+ execute method on it.
177
+ """
178
+
179
+ name: str = Field(description="A workflow name.")
180
+ desc: Optional[str] = Field(
181
+ default=None,
182
+ description=(
183
+ "A workflow description that can be string of markdown content."
184
+ ),
185
+ )
186
+ params: dict[str, Param] = Field(
187
+ default_factory=dict,
188
+ description="A parameters that need to use on this workflow.",
189
+ )
190
+ on: list[On] = Field(
191
+ default_factory=list,
192
+ description="A list of On instance for this workflow schedule.",
193
+ )
194
+ jobs: dict[str, Job] = Field(
195
+ default_factory=dict,
196
+ description="A mapping of job ID and job model that already loaded.",
197
+ )
198
+
199
+ @classmethod
200
+ def from_loader(
201
+ cls,
202
+ name: str,
203
+ externals: DictData | None = None,
204
+ ) -> Self:
205
+ """Create Workflow instance from the Loader object that only receive
206
+ an input workflow name. The loader object will use this workflow name to
207
+ searching configuration data of this workflow model in conf path.
208
+
209
+ :raise ValueError: If the type does not match with current object.
210
+
211
+ :param name: A workflow name that want to pass to Loader object.
212
+ :param externals: An external parameters that want to pass to Loader
213
+ object.
214
+
215
+ :rtype: Self
216
+ """
217
+ loader: Loader = Loader(name, externals=(externals or {}))
218
+
219
+ # NOTE: Validate the config type match with current connection model
220
+ if loader.type != cls:
221
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
222
+
223
+ loader_data: DictData = copy.deepcopy(loader.data)
224
+
225
+ # NOTE: Add name to loader data
226
+ loader_data["name"] = name.replace(" ", "_")
227
+
228
+ # NOTE: Prepare `on` data
229
+ cls.__bypass_on(loader_data, externals=externals)
230
+ return cls.model_validate(obj=loader_data)
231
+
232
+ @classmethod
233
+ def __bypass_on(
234
+ cls,
235
+ data: DictData,
236
+ externals: DictData | None = None,
237
+ ) -> DictData:
238
+ """Bypass the on data to loaded config data.
239
+
240
+ :param data:
241
+ :param externals:
242
+ :rtype: DictData
243
+ """
244
+ if on := data.pop("on", []):
245
+ if isinstance(on, str):
246
+ on = [on]
247
+ if any(not isinstance(i, (dict, str)) for i in on):
248
+ raise TypeError("The ``on`` key should be list of str or dict")
249
+
250
+ # NOTE: Pass on value to Loader and keep on model object to on field
251
+ data["on"] = [
252
+ (
253
+ Loader(n, externals=(externals or {})).data
254
+ if isinstance(n, str)
255
+ else n
256
+ )
257
+ for n in on
258
+ ]
259
+ return data
260
+
261
+ @model_validator(mode="before")
262
+ def __prepare_model_before__(cls, values: DictData) -> DictData:
263
+ """Prepare the params key in the data model before validating."""
264
+ # NOTE: Prepare params type if it passing with only type value.
265
+ if params := values.pop("params", {}):
266
+ values["params"] = {
267
+ p: (
268
+ {"type": params[p]}
269
+ if isinstance(params[p], str)
270
+ else params[p]
271
+ )
272
+ for p in params
273
+ }
274
+ return values
275
+
276
+ @field_validator("desc", mode="after")
277
+ def __dedent_desc__(cls, value: str) -> str:
278
+ """Prepare description string that was created on a template.
279
+
280
+ :param value: A description string value that want to dedent.
281
+ :rtype: str
282
+ """
283
+ return dedent(value)
284
+
285
+ @field_validator("on", mode="after")
286
+ def __on_no_dup__(cls, value: list[On]) -> list[On]:
287
+ """Validate the on fields should not contain duplicate values and if it
288
+ contain the every minute value more than one value, it will remove to
289
+ only one value.
290
+
291
+ :raise ValueError: If it has some duplicate value.
292
+
293
+ :param value: A list of on object.
294
+
295
+ :rtype: list[On]
296
+ """
297
+ set_ons: set[str] = {str(on.cronjob) for on in value}
298
+ if len(set_ons) != len(value):
299
+ raise ValueError(
300
+ "The on fields should not contain duplicate on value."
301
+ )
302
+
303
+ # WARNING:
304
+ # if '* * * * *' in set_ons and len(set_ons) > 1:
305
+ # raise ValueError(
306
+ # "If it has every minute cronjob on value, it should has only "
307
+ # "one value in the on field."
308
+ # )
309
+ return value
310
+
311
+ @model_validator(mode="after")
312
+ def __validate_jobs_need__(self) -> Self:
313
+ """Validate each need job in any jobs should exists.
314
+
315
+ :raise WorkflowException: If it has not exists need value in this
316
+ workflow job.
317
+
318
+ :rtype: Self
319
+ """
320
+ for job in self.jobs:
321
+ if not_exist := [
322
+ need for need in self.jobs[job].needs if need not in self.jobs
323
+ ]:
324
+ raise WorkflowException(
325
+ f"The needed jobs: {not_exist} do not found in "
326
+ f"{self.name!r}."
327
+ )
328
+
329
+ # NOTE: update a job id with its job id from workflow template
330
+ self.jobs[job].id = job
331
+
332
+ # VALIDATE: Validate workflow name should not dynamic with params
333
+ # template.
334
+ if has_template(self.name):
335
+ raise ValueError(
336
+ f"Workflow name should not has any template, please check, "
337
+ f"{self.name!r}."
338
+ )
339
+
340
+ return self
341
+
342
+ def job(self, name: str) -> Job:
343
+ """Return this workflow's jobs that passing with the Job model.
344
+
345
+ :param name: A job name that want to get from a mapping of job models.
346
+ :type name: str
347
+
348
+ :rtype: Job
349
+ :return: A job model that exists on this workflow by input name.
350
+ """
351
+ if name not in self.jobs:
352
+ raise ValueError(
353
+ f"A Job {name!r} does not exists in this workflow, "
354
+ f"{self.name!r}"
355
+ )
356
+ return self.jobs[name]
357
+
358
+ def parameterize(self, params: DictData) -> DictData:
359
+ """Prepare a passing parameters before use it in execution process.
360
+ This method will validate keys of an incoming params with this object
361
+ necessary params field and then create a jobs key to result mapping
362
+ that will keep any execution result from its job.
363
+
364
+ ... {
365
+ ... "params": <an-incoming-params>,
366
+ ... "jobs": {}
367
+ ... }
368
+
369
+ :param params: A parameter mapping that receive from workflow execution.
370
+ :type params: DictData
371
+
372
+ :raise WorkflowException: If parameter value that want to validate does
373
+ not include the necessary parameter that had required flag.
374
+
375
+ :rtype: DictData
376
+ :return: The parameter value that validate with its parameter fields and
377
+ adding jobs key to this parameter.
378
+ """
379
+ # VALIDATE: Incoming params should have keys that set on this workflow.
380
+ if check_key := tuple(
381
+ f"{k!r}"
382
+ for k in self.params
383
+ if (k not in params and self.params[k].required)
384
+ ):
385
+ raise WorkflowException(
386
+ f"Required Param on this workflow setting does not set: "
387
+ f"{', '.join(check_key)}."
388
+ )
389
+
390
+ # NOTE: Mapping type of param before adding it to the ``params`` key.
391
+ return {
392
+ "params": (
393
+ params
394
+ | {
395
+ k: self.params[k].receive(params[k])
396
+ for k in params
397
+ if k in self.params
398
+ }
399
+ ),
400
+ "jobs": {},
401
+ }
402
+
403
+ def release(
404
+ self,
405
+ release: datetime | WorkflowRelease,
406
+ params: DictData,
407
+ run_id: str | None = None,
408
+ *,
409
+ log: type[Log] = None,
410
+ queue: WorkflowQueue | list[datetime] | None = None,
411
+ ) -> Result:
412
+ """Release the workflow execution with overriding parameter with the
413
+ release templating that include logical date (release date), execution
414
+ date, or running id to the params.
415
+
416
+ This method allow workflow use log object to save the execution
417
+ result to log destination like file log to local `/logs` directory.
418
+
419
+ I will add sleep with 0.15 seconds on every step that interact with
420
+ the queue object.
421
+
422
+ :param release: A release datetime.
423
+ :param params: A workflow parameter that pass to execute method.
424
+ :param queue: A list of release time that already queue.
425
+ :param run_id: A workflow running ID for this release.
426
+ :param log: A log class that want to save the execution result.
427
+ :param queue: A WorkflowQueue object.
428
+
429
+ :rtype: Result
430
+ """
431
+ log: type[Log] = log or FileLog
432
+ run_id: str = run_id or gen_id(self.name, unique=True)
433
+
434
+ # VALIDATE: Change queue value to WorkflowQueue object.
435
+ if queue is None:
436
+ queue: WorkflowQueue = WorkflowQueue()
437
+ elif isinstance(queue, list):
438
+ queue: WorkflowQueue = WorkflowQueue(queue=queue)
439
+
440
+ # VALIDATE: Change release value to WorkflowRelease object.
441
+ if isinstance(release, datetime):
442
+ release: WorkflowRelease = WorkflowRelease.from_dt(release)
443
+
444
+ logger.debug(
445
+ f"({run_id}) [RELEASE]: {self.name!r} : "
446
+ f"Closely to run >> {release.date:%Y-%m-%d %H:%M:%S}"
447
+ )
448
+
449
+ # NOTE: Release parameter that use to change if params has templating.
450
+ release_params: DictData = {
451
+ "release": {
452
+ "logical_date": release.date,
453
+ "execute_date": datetime.now(tz=config.tz),
454
+ "run_id": run_id,
455
+ "timezone": config.tz,
456
+ }
457
+ }
458
+
459
+ # WARNING: Re-create workflow object that use new running workflow ID.
460
+ rs: Result = self.execute(
461
+ params=param2template(params, release_params),
462
+ run_id=run_id,
463
+ )
464
+ logger.debug(
465
+ f"({run_id}) [RELEASE]: {self.name!r} : "
466
+ f"End release {release.date:%Y-%m-%d %H:%M:%S}"
467
+ )
468
+
469
+ rs.set_parent_run_id(run_id)
470
+ rs_log: Log = log.model_validate(
471
+ {
472
+ "name": self.name,
473
+ "release": release.date,
474
+ "type": release.type,
475
+ "context": rs.context,
476
+ "parent_run_id": rs.parent_run_id,
477
+ "run_id": rs.run_id,
478
+ }
479
+ )
480
+
481
+ # NOTE: Saving execution result to destination of the input log object.
482
+ rs_log.save(excluded=None)
483
+
484
+ # NOTE: Remove this release from running.
485
+ queue.remove_running(release)
486
+ heappush(queue.complete, release)
487
+
488
+ return Result(
489
+ status=0,
490
+ context={
491
+ "params": params,
492
+ "release": {
493
+ "status": "success",
494
+ "logical_date": release.date,
495
+ },
496
+ },
497
+ run_id=run_id,
498
+ )
499
+
500
+ def queue_poking(
501
+ self,
502
+ offset: float,
503
+ end_date: datetime,
504
+ queue: WorkflowQueue,
505
+ log: type[Log],
506
+ ) -> WorkflowQueue:
507
+ """Generate queue of datetime from the cron runner that initialize from
508
+ the on field. with offset value.
509
+
510
+ :param offset:
511
+ :param end_date:
512
+ :param queue:
513
+ :param log:
514
+ """
515
+ for on in self.on:
516
+
517
+ runner: CronRunner = on.next(
518
+ get_dt_now(tz=config.tz, offset=offset).replace(microsecond=0)
519
+ )
520
+
521
+ if runner.date > end_date:
522
+ continue
523
+
524
+ workflow_release = WorkflowRelease(
525
+ date=runner.date,
526
+ offset=offset,
527
+ end_date=end_date,
528
+ runner=runner,
529
+ type="poking",
530
+ )
531
+
532
+ while queue.check_queue(data=workflow_release) or (
533
+ log.is_pointed(name=self.name, release=workflow_release.date)
534
+ ):
535
+ workflow_release = WorkflowRelease(
536
+ date=runner.next,
537
+ offset=offset,
538
+ end_date=end_date,
539
+ runner=runner,
540
+ type="poking",
541
+ )
542
+
543
+ if runner.date > end_date:
544
+ continue
545
+
546
+ queue.push_queue(workflow_release)
547
+ return queue
548
+
549
+ def poke(
550
+ self,
551
+ start_date: datetime | None = None,
552
+ params: DictData | None = None,
553
+ run_id: str | None = None,
554
+ periods: int = 1,
555
+ *,
556
+ log: Log | None = None,
557
+ ) -> list[Result]:
558
+ """Poke workflow with the ``on`` field with threading executor pool for
559
+ executing with all its schedules that was set on the `on` value.
560
+ This method will observe its schedule that nearing to run with the
561
+ ``self.release()`` method.
562
+
563
+ :param start_date: A start datetime object.
564
+ :param params: A parameters that want to pass to the release method.
565
+ :param run_id: A workflow running ID for this poke.
566
+ :param periods: A periods of minutes value to running poke.
567
+ :param log: A log object that want to use on this poking process.
568
+
569
+ :rtype: list[Result]
570
+ """
571
+ # NOTE: If this workflow does not set the on schedule, it will return
572
+ # empty result.
573
+ if len(self.on) == 0:
574
+ logger.info(
575
+ f"({run_id}) [POKING]: {self.name!r} does not have any "
576
+ f"schedule to run."
577
+ )
578
+ return []
579
+
580
+ if periods <= 0:
581
+ raise WorkflowException(
582
+ "The period of poking should be int and grater or equal than 1."
583
+ )
584
+
585
+ # NOTE: Create start_date and offset variables.
586
+ current_date: datetime = datetime.now(tz=config.tz)
587
+
588
+ if start_date and start_date <= current_date:
589
+ start_date = start_date.replace(tzinfo=config.tz)
590
+ offset: float = (current_date - start_date).total_seconds()
591
+ else:
592
+ start_date: datetime = current_date
593
+ offset: float = 0
594
+
595
+ end_date: datetime = start_date + timedelta(minutes=periods)
596
+
597
+ log: type[Log] = log or FileLog
598
+ run_id: str = run_id or gen_id(self.name, unique=True)
599
+ logger.info(
600
+ f"({run_id}) [POKING]: Start Poking: {self.name!r} from "
601
+ f"{start_date:%Y-%m-%d %H:%M:%S} to {end_date:%Y-%m-%d %H:%M:%S}"
602
+ )
603
+
604
+ params: DictData = params or {}
605
+ workflow_queue: WorkflowQueue = WorkflowQueue()
606
+ results: list[Result] = []
607
+ futures: list[Future] = []
608
+
609
+ self.queue_poking(
610
+ offset, end_date=end_date, queue=workflow_queue, log=log
611
+ )
612
+
613
+ if len(workflow_queue.queue) == 0:
614
+ logger.info(
615
+ f"({run_id}) [POKING]: {self.name!r} does not have any "
616
+ f"queue to run."
617
+ )
618
+ return []
619
+
620
+ with ThreadPoolExecutor(
621
+ max_workers=config.max_poking_pool_worker,
622
+ thread_name_prefix="workflow_poking_",
623
+ ) as executor:
624
+
625
+ while workflow_queue.is_queued:
626
+
627
+ wf_release: WorkflowRelease = heappop(workflow_queue.queue)
628
+ if (
629
+ wf_release.date - get_dt_now(tz=config.tz, offset=offset)
630
+ ).total_seconds() > 60:
631
+ logger.debug(
632
+ f"({run_id}) [POKING]: Waiting because the latest "
633
+ f"release has diff time more than 60 seconds "
634
+ )
635
+ heappush(workflow_queue.queue, wf_release)
636
+ delay(60)
637
+ self.queue_poking(
638
+ offset, end_date, queue=workflow_queue, log=log
639
+ )
640
+ continue
641
+
642
+ # NOTE: Push the workflow release to running queue
643
+ workflow_queue.push_running(wf_release)
644
+
645
+ futures.append(
646
+ executor.submit(
647
+ self.release,
648
+ release=wf_release,
649
+ params=params,
650
+ log=log,
651
+ queue=workflow_queue,
652
+ )
653
+ )
654
+
655
+ self.queue_poking(
656
+ offset, end_date, queue=workflow_queue, log=log
657
+ )
658
+
659
+ # WARNING: This poking method does not allow to use fail-fast
660
+ # logic to catching parallel execution result.
661
+ for future in as_completed(futures):
662
+ results.append(future.result().set_parent_run_id(run_id))
663
+
664
+ while len(workflow_queue.running) > 0: # pragma: no cov
665
+ logger.warning(
666
+ f"({run_id}) [POKING]: Running does empty when poking "
667
+ f"process was finishing."
668
+ )
669
+ delay(10)
670
+
671
+ return results
672
+
673
+ def execute_job(
674
+ self,
675
+ job_id: str,
676
+ params: DictData,
677
+ *,
678
+ run_id: str | None = None,
679
+ raise_error: bool = True,
680
+ ) -> Result:
681
+ """Job execution with passing dynamic parameters from the main workflow
682
+ execution to the target job object via job's ID.
683
+
684
+ This execution is the minimum level of execution of this workflow
685
+ model. It different with ``self.execute`` because this method run only
686
+ one job and return with context of this job data.
687
+
688
+ :raise NotImplementedError: If set raise_error argument to False.
689
+
690
+ :param job_id: A job ID that want to execute.
691
+ :param params: A params that was parameterized from workflow execution.
692
+ :param run_id: A workflow running ID for this job execution.
693
+ :param raise_error: A flag that raise error instead catching to result
694
+ if it get exception from job execution.
695
+
696
+ :rtype: Result
697
+ """
698
+ run_id: str = run_id or gen_id(self.name, unique=True)
699
+
700
+ # VALIDATE: check a job ID that exists in this workflow or not.
701
+ if job_id not in self.jobs:
702
+ raise WorkflowException(
703
+ f"The job: {job_id!r} does not exists in {self.name!r} "
704
+ f"workflow."
705
+ )
706
+
707
+ logger.info(f"({run_id}) [WORKFLOW]: Start execute job: {job_id!r}")
708
+
709
+ # IMPORTANT:
710
+ # Change any job running IDs to this workflow running ID.
711
+ #
712
+ try:
713
+ job: Job = self.jobs[job_id]
714
+ job.set_outputs(
715
+ job.execute(params=params, run_id=run_id).context,
716
+ to=params,
717
+ )
718
+ except JobException as err:
719
+ logger.error(
720
+ f"({run_id}) [WORKFLOW]: {err.__class__.__name__}: {err}"
721
+ )
722
+ if raise_error:
723
+ raise WorkflowException(
724
+ f"Get job execution error {job_id}: JobException: {err}"
725
+ ) from None
726
+ raise NotImplementedError(
727
+ "Handle error from the job execution does not support yet."
728
+ ) from None
729
+
730
+ return Result(status=0, context=params).set_run_id(run_id)
731
+
732
+ def execute(
733
+ self,
734
+ params: DictData,
735
+ *,
736
+ run_id: str | None = None,
737
+ timeout: int = 0,
738
+ ) -> Result:
739
+ """Execute workflow with passing a dynamic parameters to all jobs that
740
+ included in this workflow model with ``jobs`` field.
741
+
742
+ The result of execution process for each jobs and stages on this
743
+ workflow will keeping in dict which able to catch out with all jobs and
744
+ stages by dot annotation.
745
+
746
+ For example, when I want to use the output from previous stage, I
747
+ can access it with syntax:
748
+
749
+ ... ${job-name}.stages.${stage-id}.outputs.${key}
750
+
751
+ :param params: An input parameters that use on workflow execution that
752
+ will parameterize before using it. Default is None.
753
+ :type params: DictData
754
+
755
+ :param run_id: A workflow running ID for this job execution.
756
+ :type run_id: str | None (default: None)
757
+ :param timeout: A workflow execution time out in second unit that use
758
+ for limit time of execution and waiting job dependency.
759
+ :type timeout: int (default: 0)
760
+
761
+ :rtype: Result
762
+ """
763
+ run_id: str = run_id or gen_id(self.name, unique=True)
764
+ logger.info(f"({run_id}) [WORKFLOW]: Start Execute: {self.name!r} ...")
765
+
766
+ # NOTE: I use this condition because this method allow passing empty
767
+ # params and I do not want to create new dict object.
768
+ ts: float = time.monotonic()
769
+ rs: Result = Result(run_id=run_id)
770
+
771
+ # NOTE: It should not do anything if it does not have job.
772
+ if not self.jobs:
773
+ logger.warning(
774
+ f"({run_id}) [WORKFLOW]: This workflow: {self.name!r} "
775
+ f"does not have any jobs"
776
+ )
777
+ return rs.catch(status=0, context=params)
778
+
779
+ # NOTE: Create a job queue that keep the job that want to running after
780
+ # it dependency condition.
781
+ jq: Queue = Queue()
782
+ for job_id in self.jobs:
783
+ jq.put(job_id)
784
+
785
+ # NOTE: Create data context that will pass to any job executions
786
+ # on this workflow.
787
+ #
788
+ # {
789
+ # 'params': <input-params>,
790
+ # 'jobs': {},
791
+ # }
792
+ #
793
+ context: DictData = self.parameterize(params)
794
+ status: int = 0
795
+ try:
796
+ if config.max_job_parallel == 1:
797
+ self.__exec_non_threading(
798
+ run_id=run_id,
799
+ context=context,
800
+ ts=ts,
801
+ job_queue=jq,
802
+ timeout=timeout,
803
+ )
804
+ else:
805
+ self.__exec_threading(
806
+ run_id=run_id,
807
+ context=context,
808
+ ts=ts,
809
+ job_queue=jq,
810
+ timeout=timeout,
811
+ )
812
+ except WorkflowException as err:
813
+ status: int = 1
814
+ context.update(
815
+ {
816
+ "error": err,
817
+ "error_message": f"{err.__class__.__name__}: {err}",
818
+ },
819
+ )
820
+ return rs.catch(status=status, context=context)
821
+
822
+ def __exec_threading(
823
+ self,
824
+ run_id: str,
825
+ context: DictData,
826
+ ts: float,
827
+ job_queue: Queue,
828
+ *,
829
+ timeout: int = 0,
830
+ thread_timeout: int = 1800,
831
+ ) -> DictData:
832
+ """Workflow execution by threading strategy that use multithreading.
833
+
834
+ If a job need dependency, it will check dependency job ID from
835
+ context data before allow it run.
836
+
837
+ :param context: A context workflow data that want to downstream passing.
838
+ :param ts: A start timestamp that use for checking execute time should
839
+ timeout.
840
+ :param job_queue: A job queue object.
841
+ :param timeout: A second value unit that bounding running time.
842
+ :param thread_timeout: A timeout to waiting all futures complete.
843
+
844
+ :rtype: DictData
845
+ """
846
+ not_timeout_flag: bool = True
847
+ timeout: int = timeout or config.max_job_exec_timeout
848
+ logger.debug(
849
+ f"({run_id}) [WORKFLOW]: Run {self.name} with threading executor."
850
+ )
851
+
852
+ # IMPORTANT: The job execution can run parallel and waiting by
853
+ # needed.
854
+ with ThreadPoolExecutor(
855
+ max_workers=config.max_job_parallel,
856
+ thread_name_prefix="workflow_exec_threading_",
857
+ ) as executor:
858
+ futures: list[Future] = []
859
+
860
+ while not job_queue.empty() and (
861
+ not_timeout_flag := ((time.monotonic() - ts) < timeout)
862
+ ):
863
+ job_id: str = job_queue.get()
864
+ job: Job = self.jobs[job_id]
865
+
866
+ if any(need not in context["jobs"] for need in job.needs):
867
+ job_queue.task_done()
868
+ job_queue.put(job_id)
869
+ time.sleep(0.25)
870
+ continue
871
+
872
+ # NOTE: Start workflow job execution with deep copy context data
873
+ # before release.
874
+ #
875
+ # {
876
+ # 'params': <input-params>,
877
+ # 'jobs': {},
878
+ # }
879
+ futures.append(
880
+ executor.submit(
881
+ self.execute_job,
882
+ job_id,
883
+ params=context,
884
+ ),
885
+ )
886
+
887
+ # NOTE: Mark this job queue done.
888
+ job_queue.task_done()
889
+
890
+ if not_timeout_flag:
891
+
892
+ # NOTE: Wait for all items to finish processing by `task_done()`
893
+ # method.
894
+ job_queue.join()
895
+
896
+ for future in as_completed(futures, timeout=thread_timeout):
897
+ if err := future.exception():
898
+ logger.error(f"({run_id}) [WORKFLOW]: {err}")
899
+ raise WorkflowException(f"{err}")
900
+
901
+ # NOTE: This getting result does not do anything.
902
+ future.result()
903
+
904
+ return context
905
+
906
+ for future in futures:
907
+ future.cancel()
908
+
909
+ # NOTE: Raise timeout error.
910
+ logger.warning(
911
+ f"({run_id}) [WORKFLOW]: Execution of workflow, {self.name!r}, "
912
+ f"was timeout"
913
+ )
914
+ raise WorkflowException(
915
+ f"Execution of workflow: {self.name} was timeout"
916
+ )
917
+
918
+ def __exec_non_threading(
919
+ self,
920
+ run_id: str,
921
+ context: DictData,
922
+ ts: float,
923
+ job_queue: Queue,
924
+ *,
925
+ timeout: int = 0,
926
+ ) -> DictData:
927
+ """Workflow execution with non-threading strategy that use sequential
928
+ job running and waiting previous job was run successful.
929
+
930
+ If a job need dependency, it will check dependency job ID from
931
+ context data before allow it run.
932
+
933
+ :param context: A context workflow data that want to downstream passing.
934
+ :param ts: A start timestamp that use for checking execute time should
935
+ timeout.
936
+ :param timeout: A second value unit that bounding running time.
937
+
938
+ :rtype: DictData
939
+ """
940
+ not_timeout_flag: bool = True
941
+ timeout: int = timeout or config.max_job_exec_timeout
942
+ logger.debug(
943
+ f"({run_id}) [WORKFLOW]: Run {self.name} with non-threading "
944
+ f"executor."
945
+ )
946
+
947
+ while not job_queue.empty() and (
948
+ not_timeout_flag := ((time.monotonic() - ts) < timeout)
949
+ ):
950
+ job_id: str = job_queue.get()
951
+ job: Job = self.jobs[job_id]
952
+
953
+ # NOTE: Waiting dependency job run successful before release.
954
+ if any(need not in context["jobs"] for need in job.needs):
955
+ job_queue.task_done()
956
+ job_queue.put(job_id)
957
+ time.sleep(0.075)
958
+ continue
959
+
960
+ # NOTE: Start workflow job execution with deep copy context data
961
+ # before release. This job execution process will running until
962
+ # done before checking all execution timeout or not.
963
+ #
964
+ # {
965
+ # 'params': <input-params>,
966
+ # 'jobs': {},
967
+ # }
968
+ self.execute_job(job_id=job_id, params=context, run_id=run_id)
969
+
970
+ # NOTE: Mark this job queue done.
971
+ job_queue.task_done()
972
+
973
+ if not_timeout_flag:
974
+
975
+ # NOTE: Wait for all items to finish processing by `task_done()`
976
+ # method.
977
+ job_queue.join()
978
+
979
+ return context
980
+
981
+ # NOTE: Raise timeout error.
982
+ logger.warning(
983
+ f"({run_id}) [WORKFLOW]: Execution of workflow was timeout"
984
+ )
985
+ raise WorkflowException(
986
+ f"Execution of workflow: {self.name} was timeout"
987
+ )
988
+
989
+
990
+ @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
991
+ class WorkflowTaskData:
992
+ """Workflow task Pydantic dataclass object that use to keep mapping data and
993
+ workflow model for passing to the multithreading task.
994
+
995
+ This dataclass object is mapping 1-to-1 with workflow and cron runner
996
+ objects.
997
+ """
998
+
999
+ alias: str
1000
+ workflow: Workflow
1001
+ runner: CronRunner
1002
+ params: DictData
1003
+
1004
+ def release(
1005
+ self,
1006
+ queue: dict[str, list[datetime]],
1007
+ log: Log | None = None,
1008
+ run_id: str | None = None,
1009
+ *,
1010
+ waiting_sec: int = 60,
1011
+ sleep_interval: int = 15,
1012
+ ) -> None: # pragma: no cov
1013
+ """Workflow task release that use the same logic of `workflow.release`
1014
+ method.
1015
+
1016
+ :param queue:
1017
+ :param log: A log object for saving result logging from workflow
1018
+ execution process.
1019
+ :param run_id: A workflow running ID for this release.
1020
+ :param waiting_sec: A second period value that allow workflow execute.
1021
+ :param sleep_interval: A second value that want to waiting until time
1022
+ to execute.
1023
+ """
1024
+ log: Log = log or FileLog
1025
+ run_id: str = run_id or gen_id(self.workflow.name, unique=True)
1026
+ runner: CronRunner = self.runner
1027
+
1028
+ # NOTE: get next schedule time that generate from now.
1029
+ next_time: datetime = runner.date
1030
+
1031
+ # NOTE: get next utils it does not running.
1032
+ while log.is_pointed(self.workflow.name, next_time) or (
1033
+ next_time in queue[self.alias]
1034
+ ):
1035
+ next_time: datetime = runner.next
1036
+
1037
+ logger.debug(
1038
+ f"({run_id}) [CORE]: {self.workflow.name!r} : {runner.cron} : "
1039
+ f"{next_time:%Y-%m-%d %H:%M:%S}"
1040
+ )
1041
+ heappush(queue[self.alias], next_time)
1042
+ start_sec: float = time.monotonic()
1043
+
1044
+ if get_diff_sec(next_time, tz=runner.tz) > waiting_sec:
1045
+ logger.debug(
1046
+ f"({run_id}) [WORKFLOW]: {self.workflow.name!r} : "
1047
+ f"{runner.cron} "
1048
+ f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
1049
+ )
1050
+
1051
+ # NOTE: Add this next running datetime that not in period to queue
1052
+ # and remove it to running.
1053
+ queue[self.alias].remove(next_time)
1054
+
1055
+ time.sleep(0.2)
1056
+ return
1057
+
1058
+ logger.debug(
1059
+ f"({run_id}) [CORE]: {self.workflow.name!r} : {runner.cron} : "
1060
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
1061
+ )
1062
+
1063
+ # NOTE: Release when the time is nearly to schedule time.
1064
+ while (duration := get_diff_sec(next_time, tz=config.tz)) > (
1065
+ sleep_interval + 5
1066
+ ):
1067
+ logger.debug(
1068
+ f"({run_id}) [CORE]: {self.workflow.name!r} : {runner.cron} "
1069
+ f": Sleep until: {duration}"
1070
+ )
1071
+ time.sleep(15)
1072
+
1073
+ time.sleep(0.5)
1074
+
1075
+ # NOTE: Release parameter that use to change if params has
1076
+ # templating.
1077
+ release_params: DictData = {
1078
+ "release": {
1079
+ "logical_date": next_time,
1080
+ },
1081
+ }
1082
+
1083
+ # WARNING: Re-create workflow object that use new running workflow ID.
1084
+ rs: Result = self.workflow.execute(
1085
+ params=param2template(self.params, release_params),
1086
+ )
1087
+ logger.debug(
1088
+ f"({run_id}) [CORE]: {self.workflow.name!r} : {runner.cron} : "
1089
+ f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
1090
+ )
1091
+
1092
+ # NOTE: Set parent ID on this result.
1093
+ rs.set_parent_run_id(run_id)
1094
+
1095
+ # NOTE: Save result to log object saving.
1096
+ rs_log: Log = log.model_validate(
1097
+ {
1098
+ "name": self.workflow.name,
1099
+ "type": "schedule",
1100
+ "release": next_time,
1101
+ "context": rs.context,
1102
+ "parent_run_id": rs.run_id,
1103
+ "run_id": rs.run_id,
1104
+ }
1105
+ )
1106
+ rs_log.save(excluded=None)
1107
+
1108
+ # NOTE: Remove the current release date from the running.
1109
+ queue[self.alias].remove(next_time)
1110
+ total_sec: float = time.monotonic() - start_sec
1111
+
1112
+ # IMPORTANT:
1113
+ # Add the next running datetime to workflow task queue.
1114
+ future_running_time: datetime = runner.next
1115
+
1116
+ while (
1117
+ future_running_time in queue[self.alias]
1118
+ or (future_running_time - next_time).total_seconds() < total_sec
1119
+ ): # pragma: no cov
1120
+ future_running_time: datetime = runner.next
1121
+
1122
+ # NOTE: Queue next release date.
1123
+ logger.debug(f"[CORE]: {'-' * 100}")
1124
+
1125
+ def __eq__(self, other: WorkflowTaskData) -> bool:
1126
+ """Override equal property that will compare only the same type."""
1127
+ if isinstance(other, WorkflowTaskData):
1128
+ return (
1129
+ self.workflow.name == other.workflow.name
1130
+ and self.runner.cron == other.runner.cron
1131
+ )
1132
+ return NotImplemented