ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,59 +5,815 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
+ import copy
9
+ import json
8
10
  import logging
9
11
  import os
10
12
  import time
11
13
  from collections.abc import Iterator
12
- from concurrent.futures import Future, ProcessPoolExecutor, as_completed
13
- from dataclasses import dataclass
14
+ from concurrent.futures import (
15
+ Future,
16
+ ProcessPoolExecutor,
17
+ ThreadPoolExecutor,
18
+ as_completed,
19
+ )
20
+ from dataclasses import dataclass, field
14
21
  from datetime import datetime, timedelta
15
22
  from functools import wraps
16
23
  from heapq import heappush
24
+ from queue import Queue
25
+ from textwrap import dedent
17
26
  from threading import Thread
27
+ from typing import Optional
18
28
  from zoneinfo import ZoneInfo
19
29
 
20
- from ddeutil.workflow.__types import DictData
21
- from ddeutil.workflow.cron import CronRunner
22
- from ddeutil.workflow.exceptions import WorkflowException
23
- from ddeutil.workflow.log import FileLog, Log
24
- from ddeutil.workflow.on import On
25
- from ddeutil.workflow.pipeline import Pipeline
26
- from ddeutil.workflow.utils import (
30
+ from dotenv import load_dotenv
31
+ from pydantic import BaseModel, Field
32
+ from pydantic.functional_validators import field_validator, model_validator
33
+ from typing_extensions import Self
34
+
35
+ try:
36
+ from schedule import CancelJob
37
+ except ImportError:
38
+ CancelJob = None
39
+
40
+ from .__types import DictData
41
+ from .cron import CronRunner
42
+ from .exceptions import JobException, WorkflowException
43
+ from .job import Job
44
+ from .log import FileLog, Log, get_logger
45
+ from .on import On
46
+ from .utils import (
47
+ Loader,
48
+ Param,
27
49
  Result,
28
50
  batch,
29
51
  delay,
52
+ gen_id,
30
53
  get_diff_sec,
54
+ has_template,
31
55
  param2template,
32
56
  )
33
- from dotenv import load_dotenv
34
- from schedule import CancelJob, Scheduler
35
-
36
- load_dotenv("../../../.env")
37
- logging.basicConfig(
38
- level=logging.DEBUG,
39
- format=(
40
- "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
41
- "[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
42
- ),
43
- handlers=[logging.StreamHandler()],
44
- datefmt="%Y-%m-%d %H:%M:%S",
45
- )
57
+
58
+ load_dotenv()
59
+ logger = get_logger("ddeutil.workflow")
46
60
  logging.getLogger("schedule").setLevel(logging.INFO)
47
61
 
48
- tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
62
+
63
+ __all__ = (
64
+ "Workflow",
65
+ "WorkflowSchedule",
66
+ "WorkflowTask",
67
+ "Schedule",
68
+ "workflow_runner",
69
+ "workflow_task",
70
+ )
71
+
72
+
73
+ class Workflow(BaseModel):
74
+ """Workflow Model this is the main future of this project because it use to
75
+ be workflow data for running everywhere that you want or using it to
76
+ scheduler task in background. It use lightweight coding line from Pydantic
77
+ Model and enhance execute method on it.
78
+ """
79
+
80
+ name: str = Field(description="A workflow name.")
81
+ desc: Optional[str] = Field(
82
+ default=None,
83
+ description=(
84
+ "A workflow description that can be string of markdown content."
85
+ ),
86
+ )
87
+ params: dict[str, Param] = Field(
88
+ default_factory=dict,
89
+ description="A parameters that want to use on this workflow.",
90
+ )
91
+ on: list[On] = Field(
92
+ default_factory=list,
93
+ description="A list of On instance for this workflow schedule.",
94
+ )
95
+ jobs: dict[str, Job] = Field(
96
+ default_factory=dict,
97
+ description="A mapping of job ID and job model that already loaded.",
98
+ )
99
+ run_id: Optional[str] = Field(
100
+ default=None,
101
+ description="A running workflow ID.",
102
+ repr=False,
103
+ exclude=True,
104
+ )
105
+
106
+ @property
107
+ def new_run_id(self) -> str:
108
+ """Running ID of this workflow that always generate new unique value."""
109
+ return gen_id(self.name, unique=True)
110
+
111
+ @classmethod
112
+ def from_loader(
113
+ cls,
114
+ name: str,
115
+ externals: DictData | None = None,
116
+ ) -> Self:
117
+ """Create Workflow instance from the Loader object that only receive
118
+ an input workflow name. The loader object will use this workflow name to
119
+ searching configuration data of this workflow model in conf path.
120
+
121
+ :param name: A workflow name that want to pass to Loader object.
122
+ :param externals: An external parameters that want to pass to Loader
123
+ object.
124
+ :rtype: Self
125
+ """
126
+ loader: Loader = Loader(name, externals=(externals or {}))
127
+
128
+ # NOTE: Validate the config type match with current connection model
129
+ if loader.type != cls:
130
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
131
+
132
+ loader_data: DictData = copy.deepcopy(loader.data)
133
+
134
+ # NOTE: Add name to loader data
135
+ loader_data["name"] = name.replace(" ", "_")
136
+
137
+ # NOTE: Prepare `on` data
138
+ cls.__bypass_on(loader_data)
139
+ return cls.model_validate(obj=loader_data)
140
+
141
+ @classmethod
142
+ def __bypass_on(cls, data: DictData, externals: DictData | None = None):
143
+ """Bypass the on data to loaded config data."""
144
+ if on := data.pop("on", []):
145
+ if isinstance(on, str):
146
+ on = [on]
147
+ if any(not isinstance(i, (dict, str)) for i in on):
148
+ raise TypeError("The ``on`` key should be list of str or dict")
149
+
150
+ # NOTE: Pass on value to Loader and keep on model object to on field
151
+ data["on"] = [
152
+ (
153
+ Loader(n, externals=(externals or {})).data
154
+ if isinstance(n, str)
155
+ else n
156
+ )
157
+ for n in on
158
+ ]
159
+ return data
160
+
161
+ @model_validator(mode="before")
162
+ def __prepare_params(cls, values: DictData) -> DictData:
163
+ """Prepare the params key."""
164
+ # NOTE: Prepare params type if it passing with only type value.
165
+ if params := values.pop("params", {}):
166
+ values["params"] = {
167
+ p: (
168
+ {"type": params[p]}
169
+ if isinstance(params[p], str)
170
+ else params[p]
171
+ )
172
+ for p in params
173
+ }
174
+ return values
175
+
176
+ @field_validator("desc", mode="after")
177
+ def ___prepare_desc(cls, value: str) -> str:
178
+ """Prepare description string that was created on a template."""
179
+ return dedent(value)
180
+
181
+ @model_validator(mode="after")
182
+ def __validate_jobs_need_and_prepare_running_id(self):
183
+ """Validate each need job in any jobs should exists."""
184
+ for job in self.jobs:
185
+ if not_exist := [
186
+ need for need in self.jobs[job].needs if need not in self.jobs
187
+ ]:
188
+ raise WorkflowException(
189
+ f"This needed jobs: {not_exist} do not exist in this "
190
+ f"workflow, {self.name!r}"
191
+ )
192
+
193
+ # NOTE: update a job id with its job id from workflow template
194
+ self.jobs[job].id = job
195
+
196
+ if self.run_id is None:
197
+ self.run_id = self.new_run_id
198
+
199
+ # VALIDATE: Validate workflow name should not dynamic with params
200
+ # template.
201
+ if has_template(self.name):
202
+ raise ValueError(
203
+ f"Workflow name should not has any template, please check, "
204
+ f"{self.name!r}."
205
+ )
206
+
207
+ return self
208
+
209
+ def get_running_id(self, run_id: str) -> Self:
210
+ """Return Workflow model object that changing workflow running ID with
211
+ an input running ID.
212
+
213
+ :param run_id: A replace workflow running ID.
214
+ :rtype: Self
215
+ """
216
+ return self.model_copy(update={"run_id": run_id})
217
+
218
+ def job(self, name: str) -> Job:
219
+ """Return Job model that exists on this workflow.
220
+
221
+ :param name: A job name that want to get from a mapping of job models.
222
+ :type name: str
223
+
224
+ :rtype: Job
225
+ :returns: A job model that exists on this workflow by input name.
226
+ """
227
+ if name not in self.jobs:
228
+ raise ValueError(
229
+ f"A Job {name!r} does not exists in this workflow, "
230
+ f"{self.name!r}"
231
+ )
232
+ return self.jobs[name]
233
+
234
+ def parameterize(self, params: DictData) -> DictData:
235
+ """Prepare parameters before passing to execution process. This method
236
+ will create jobs key to params mapping that will keep any result from
237
+ job execution.
238
+
239
+ :param params: A parameter mapping that receive from workflow execution.
240
+ :rtype: DictData
241
+ """
242
+ # VALIDATE: Incoming params should have keys that set on this workflow.
243
+ if check_key := tuple(
244
+ f"{k!r}"
245
+ for k in self.params
246
+ if (k not in params and self.params[k].required)
247
+ ):
248
+ raise WorkflowException(
249
+ f"Required Param on this workflow setting does not set: "
250
+ f"{', '.join(check_key)}."
251
+ )
252
+
253
+ # NOTE: mapping type of param before adding it to params variable.
254
+ return {
255
+ "params": (
256
+ params
257
+ | {
258
+ k: self.params[k].receive(params[k])
259
+ for k in params
260
+ if k in self.params
261
+ }
262
+ ),
263
+ "jobs": {},
264
+ }
265
+
266
+ def release(
267
+ self,
268
+ on: On,
269
+ params: DictData,
270
+ queue: list[datetime],
271
+ *,
272
+ waiting_sec: int = 60,
273
+ sleep_interval: int = 15,
274
+ log: Log = None,
275
+ ) -> Result:
276
+ """Start running workflow with the on schedule in period of 30 minutes.
277
+ That mean it will still running at background 30 minutes until the
278
+ schedule matching with its time.
279
+
280
+ This method allow workflow use log object to save the execution
281
+ result to log destination like file log to local `/logs` directory.
282
+
283
+ :param on: An on schedule value.
284
+ :param params: A workflow parameter that pass to execute method.
285
+ :param queue: A list of release time that already running.
286
+ :param waiting_sec: A second period value that allow workflow execute.
287
+ :param sleep_interval: A second value that want to waiting until time
288
+ to execute.
289
+ :param log: A log object that want to save execution result.
290
+ :rtype: Result
291
+ """
292
+ logger.debug(
293
+ f"({self.run_id}) [CORE]: {self.name!r}: {on.cronjob} : run with "
294
+ f"queue id: {id(queue)}"
295
+ )
296
+ log: Log = log or FileLog
297
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
298
+ gen: CronRunner = on.generate(
299
+ datetime.now(tz=tz).replace(second=0, microsecond=0)
300
+ + timedelta(seconds=1)
301
+ )
302
+ cron_tz: ZoneInfo = gen.tz
303
+
304
+ # NOTE: get next schedule time that generate from now.
305
+ next_time: datetime = gen.next
306
+
307
+ # NOTE: get next utils it does not logger.
308
+ while log.is_pointed(self.name, next_time, queue=queue):
309
+ next_time: datetime = gen.next
310
+
311
+ # NOTE: push this next running time to log queue
312
+ heappush(queue, next_time)
313
+
314
+ # VALIDATE: Check the different time between the next schedule time and
315
+ # now that less than waiting period (second unit).
316
+ if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
317
+ logger.debug(
318
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
319
+ f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
320
+ )
321
+
322
+ # NOTE: Remove next datetime from queue.
323
+ queue.remove(next_time)
324
+
325
+ time.sleep(0.15)
326
+ return Result(
327
+ status=0,
328
+ context={
329
+ "params": params,
330
+ "poking": {"skipped": [str(on.cronjob)], "run": []},
331
+ },
332
+ )
333
+
334
+ logger.debug(
335
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
336
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
337
+ )
338
+
339
+ # NOTE: Release when the time is nearly to schedule time.
340
+ while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
341
+ sleep_interval + 5
342
+ ):
343
+ logger.debug(
344
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
345
+ f"Sleep until: {duration}"
346
+ )
347
+ time.sleep(sleep_interval)
348
+
349
+ time.sleep(0.5)
350
+
351
+ # NOTE: Release parameter that use to change if params has
352
+ # templating.
353
+ release_params: DictData = {
354
+ "release": {
355
+ "logical_date": next_time,
356
+ },
357
+ }
358
+
359
+ # WARNING: Re-create workflow object that use new running workflow
360
+ # ID.
361
+ runner: Self = self.get_running_id(run_id=self.new_run_id)
362
+ rs: Result = runner.execute(
363
+ params=param2template(params, release_params),
364
+ )
365
+ logger.debug(
366
+ f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
367
+ f"End release {next_time:%Y-%m-%d %H:%M:%S}"
368
+ )
369
+
370
+ # NOTE: Delete a copied workflow instance for saving memory.
371
+ del runner
372
+
373
+ rs.set_parent_run_id(self.run_id)
374
+ rs_log: Log = log.model_validate(
375
+ {
376
+ "name": self.name,
377
+ "on": str(on.cronjob),
378
+ "release": next_time,
379
+ "context": rs.context,
380
+ "parent_run_id": rs.run_id,
381
+ "run_id": rs.run_id,
382
+ }
383
+ )
384
+ # NOTE: Saving execution result to destination of the input log object.
385
+ rs_log.save(excluded=None)
386
+
387
+ queue.remove(next_time)
388
+ time.sleep(0.05)
389
+ return Result(
390
+ status=0,
391
+ context={
392
+ "params": params,
393
+ "poking": {"skipped": [], "run": [str(on.cronjob)]},
394
+ },
395
+ )
396
+
397
+ def poke(
398
+ self,
399
+ params: DictData | None = None,
400
+ *,
401
+ log: Log | None = None,
402
+ ) -> list[Result]:
403
+ """Poke workflow with threading executor pool for executing with all its
404
+ schedules that was set on the `on` value. This method will observe its
405
+ schedule that nearing to run with the ``self.release()`` method.
406
+
407
+ :param params: A parameters that want to pass to the release method.
408
+ :param log: A log object that want to use on this poking process.
409
+ :rtype: list[Result]
410
+ """
411
+ logger.info(
412
+ f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
413
+ )
414
+
415
+ # NOTE: If this workflow does not set the on schedule, it will return
416
+ # empty result.
417
+ if len(self.on) == 0:
418
+ return []
419
+
420
+ params: DictData = params or {}
421
+ queue: list[datetime] = []
422
+ results: list[Result] = []
423
+
424
+ worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
425
+ with ThreadPoolExecutor(max_workers=worker) as executor:
426
+ # TODO: If I want to run infinite loop.
427
+ futures: list[Future] = []
428
+ for on in self.on:
429
+ futures.append(
430
+ executor.submit(
431
+ self.release,
432
+ on,
433
+ params=params,
434
+ log=log,
435
+ queue=queue,
436
+ )
437
+ )
438
+ delay(second=0.15)
439
+
440
+ # WARNING: This poking method does not allow to use fail-fast logic
441
+ # to catching parallel execution result.
442
+ for future in as_completed(futures):
443
+ results.append(future.result(timeout=60))
444
+
445
+ if len(queue) > 0:
446
+ logger.error(
447
+ f"({self.run_id}) [POKING]: Log Queue does empty when poking "
448
+ f"process was finishing."
449
+ )
450
+
451
+ return results
452
+
453
+ def execute_job(
454
+ self,
455
+ job: str,
456
+ params: DictData,
457
+ ) -> Result:
458
+ """Job Executor that use on workflow executor.
459
+
460
+ :param job: A job ID that want to execute.
461
+ :param params: A params that was parameterized from workflow execution.
462
+ :rtype: Result
463
+ """
464
+ # VALIDATE: check a job ID that exists in this workflow or not.
465
+ if job not in self.jobs:
466
+ raise WorkflowException(
467
+ f"The job ID: {job} does not exists on {self.name!r} workflow."
468
+ )
469
+ try:
470
+ logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job!r}")
471
+
472
+ # IMPORTANT:
473
+ # Change any job running IDs to this workflow running ID.
474
+ job_obj: Job = self.jobs[job].get_running_id(self.run_id)
475
+ j_rs: Result = job_obj.execute(params=params)
476
+
477
+ except JobException as err:
478
+ raise WorkflowException(f"{job}: JobException: {err}") from None
479
+
480
+ return Result(
481
+ status=j_rs.status,
482
+ context={job: job_obj.set_outputs(j_rs.context)},
483
+ )
484
+
485
+ def execute(
486
+ self,
487
+ params: DictData | None = None,
488
+ *,
489
+ timeout: int = 60,
490
+ ) -> Result:
491
+ """Execute workflow with passing dynamic parameters to any jobs that
492
+ included in the workflow.
493
+
494
+ :param params: An input parameters that use on workflow execution that
495
+ will parameterize before using it.
496
+ :param timeout: A workflow execution time out in second unit that use
497
+ for limit time of execution and waiting job dependency.
498
+ :rtype: Result
499
+
500
+ See Also:
501
+ ---
502
+
503
+ The result of execution process for each jobs and stages on this
504
+ workflow will keeping in dict which able to catch out with all jobs and
505
+ stages by dot annotation.
506
+
507
+ For example, when I want to use the output from previous stage, I
508
+ can access it with syntax:
509
+
510
+ ... ${job-name}.stages.${stage-id}.outputs.${key}
511
+
512
+ """
513
+ logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
514
+ params: DictData = params or {}
515
+ ts: float = time.monotonic()
516
+
517
+ # NOTE: It should not do anything if it does not have job.
518
+ if not self.jobs:
519
+ logger.warning(
520
+ f"({self.run_id}) [WORKFLOW]: This workflow: {self.name!r} "
521
+ f"does not have any jobs"
522
+ )
523
+ return Result(status=0, context=params)
524
+
525
+ # NOTE: Create a job queue that keep the job that want to running after
526
+ # it dependency condition.
527
+ jq: Queue = Queue()
528
+ for job_id in self.jobs:
529
+ jq.put(job_id)
530
+
531
+ # NOTE: Create result context that will pass this context to any
532
+ # execution dependency.
533
+ context: DictData = self.parameterize(params)
534
+ try:
535
+ worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
536
+ (
537
+ self.__exec_non_threading(context, ts, jq, timeout=timeout)
538
+ if worker == 1
539
+ else self.__exec_threading(
540
+ context, ts, jq, worker=worker, timeout=timeout
541
+ )
542
+ )
543
+ return Result(status=0, context=context)
544
+ except WorkflowException as err:
545
+ context.update(
546
+ {"error_message": f"{err.__class__.__name__}: {err}"}
547
+ )
548
+ return Result(status=1, context=context)
549
+
550
+ def __exec_threading(
551
+ self,
552
+ context: DictData,
553
+ ts: float,
554
+ job_queue: Queue,
555
+ *,
556
+ worker: int = 2,
557
+ timeout: int = 600,
558
+ ) -> DictData:
559
+ """Workflow threading execution.
560
+
561
+ :param context: A context workflow data that want to downstream passing.
562
+ :param ts: A start timestamp that use for checking execute time should
563
+ timeout.
564
+ :param timeout: A second value unit that bounding running time.
565
+ :param worker: A number of threading executor pool size.
566
+ :rtype: DictData
567
+ """
568
+ not_time_out_flag: bool = True
569
+ logger.debug(
570
+ f"({self.run_id}): [CORE]: Run {self.name} with threading job "
571
+ f"executor"
572
+ )
573
+
574
+ # IMPORTANT: The job execution can run parallel and waiting by
575
+ # needed.
576
+ with ThreadPoolExecutor(max_workers=worker) as executor:
577
+ futures: list[Future] = []
578
+
579
+ while not job_queue.empty() and (
580
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
581
+ ):
582
+ job_id: str = job_queue.get()
583
+ job: Job = self.jobs[job_id]
584
+
585
+ if any(need not in context["jobs"] for need in job.needs):
586
+ job_queue.put(job_id)
587
+ time.sleep(0.25)
588
+ continue
589
+
590
+ futures.append(
591
+ executor.submit(
592
+ self.execute_job,
593
+ job_id,
594
+ params=copy.deepcopy(context),
595
+ ),
596
+ )
597
+ job_queue.task_done()
598
+
599
+ # NOTE: Wait for all items to finish processing
600
+ job_queue.join()
601
+
602
+ for future in as_completed(futures):
603
+ if err := future.exception():
604
+ logger.error(f"{err}")
605
+ raise WorkflowException(f"{err}")
606
+
607
+ # NOTE: Update job result to workflow result.
608
+ context["jobs"].update(future.result(timeout=20).conext)
609
+
610
+ if not_time_out_flag:
611
+ return context
612
+
613
+ # NOTE: Raise timeout error.
614
+ logger.warning(
615
+ f"({self.run_id}) [WORKFLOW]: Execution of workflow, {self.name!r} "
616
+ f", was timeout"
617
+ )
618
+ raise WorkflowException(
619
+ f"Execution of workflow: {self.name} was timeout"
620
+ )
621
+
622
+ def __exec_non_threading(
623
+ self,
624
+ context: DictData,
625
+ ts: float,
626
+ job_queue: Queue,
627
+ *,
628
+ timeout: int = 600,
629
+ ) -> DictData:
630
+ """Workflow non-threading execution that use sequential job running
631
+ and waiting previous run successful.
632
+
633
+ :param context: A context workflow data that want to downstream passing.
634
+ :param ts: A start timestamp that use for checking execute time should
635
+ timeout.
636
+ :param timeout: A second value unit that bounding running time.
637
+ :rtype: DictData
638
+ """
639
+ not_time_out_flag: bool = True
640
+ logger.debug(
641
+ f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
642
+ f"executor"
643
+ )
644
+
645
+ while not job_queue.empty() and (
646
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
647
+ ):
648
+ job_id: str = job_queue.get()
649
+ job: Job = self.jobs[job_id]
650
+
651
+ # NOTE:
652
+ if any(need not in context["jobs"] for need in job.needs):
653
+ job_queue.put(job_id)
654
+ time.sleep(0.25)
655
+ continue
656
+
657
+ # NOTE: Start job execution.
658
+ job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
659
+ context["jobs"].update(job_rs.context)
660
+ job_queue.task_done()
661
+
662
+ # NOTE: Wait for all items to finish processing
663
+ job_queue.join()
664
+
665
+ if not_time_out_flag:
666
+ return context
667
+
668
+ # NOTE: Raise timeout error.
669
+ logger.warning(
670
+ f"({self.run_id}) [WORKFLOW]: Execution of workflow was timeout"
671
+ )
672
+ raise WorkflowException(
673
+ f"Execution of workflow: {self.name} was timeout"
674
+ )
675
+
676
+
677
+ class WorkflowSchedule(BaseModel):
678
+ """Workflow schedule Pydantic Model."""
679
+
680
+ name: str = Field(description="A workflow name.")
681
+ on: list[On] = Field(
682
+ default_factory=list,
683
+ description="An override On instance value.",
684
+ )
685
+ params: DictData = Field(
686
+ default_factory=dict,
687
+ description="A parameters that want to use to workflow execution.",
688
+ )
689
+
690
+ @model_validator(mode="before")
691
+ def __prepare__values(cls, values: DictData) -> DictData:
692
+ """Prepare incoming values before validating with model fields."""
693
+
694
+ values["name"] = values["name"].replace(" ", "_")
695
+
696
+ cls.__bypass_on(values)
697
+ return values
698
+
699
+ @classmethod
700
+ def __bypass_on(cls, data: DictData, externals: DictData | None = None):
701
+ """Bypass the on data to loaded config data."""
702
+ if on := data.pop("on", []):
703
+
704
+ if isinstance(on, str):
705
+ on = [on]
706
+
707
+ if any(not isinstance(n, (dict, str)) for n in on):
708
+ raise TypeError("The ``on`` key should be list of str or dict")
709
+
710
+ # NOTE: Pass on value to Loader and keep on model object to on field
711
+ data["on"] = [
712
+ (
713
+ Loader(n, externals=(externals or {})).data
714
+ if isinstance(n, str)
715
+ else n
716
+ )
717
+ for n in on
718
+ ]
719
+ return data
720
+
721
+
722
+ class Schedule(BaseModel):
723
+ """Schedule Pydantic Model that use to run with scheduler package. It does
724
+ not equal the on value in Workflow model but it use same logic to running
725
+ release date with crontab interval.
726
+ """
727
+
728
+ desc: Optional[str] = Field(
729
+ default=None,
730
+ description=(
731
+ "A schedule description that can be string of markdown content."
732
+ ),
733
+ )
734
+ workflows: list[WorkflowSchedule] = Field(
735
+ default_factory=list,
736
+ description="A list of WorkflowSchedule models.",
737
+ )
738
+
739
+ @classmethod
740
+ def from_loader(
741
+ cls,
742
+ name: str,
743
+ externals: DictData | None = None,
744
+ ) -> Self:
745
+ loader: Loader = Loader(name, externals=(externals or {}))
746
+
747
+ # NOTE: Validate the config type match with current connection model
748
+ if loader.type != cls:
749
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
750
+
751
+ loader_data: DictData = copy.deepcopy(loader.data)
752
+
753
+ # NOTE: Add name to loader data
754
+ loader_data["name"] = name.replace(" ", "_")
755
+
756
+ return cls.model_validate(obj=loader_data)
757
+
758
+ def tasks(
759
+ self,
760
+ start_date: datetime,
761
+ queue: dict[str, list[datetime]],
762
+ running: dict[str, list[datetime]],
763
+ *,
764
+ externals: DictData | None = None,
765
+ ) -> list[WorkflowTask]:
766
+ """Generate Task from the current datetime.
767
+
768
+ :param start_date: A start date that get from the workflow schedule.
769
+ :param queue: A mapping of name and list of datetime for queue.
770
+ :param running: A mapping of name and list of datetime for running.
771
+ :param externals: An external parameters that pass to the Loader object.
772
+ :rtype: list[WorkflowTask]
773
+ """
774
+
775
+ # NOTE: Create pair of workflow and on.
776
+ workflow_tasks: list[WorkflowTask] = []
777
+ externals: DictData = externals or {}
778
+
779
+ for wfs in self.workflows:
780
+ wf: Workflow = Workflow.from_loader(wfs.name, externals=externals)
781
+
782
+ # NOTE: Create default list of release datetime.
783
+ queue[wfs.name]: list[datetime] = []
784
+ running[wfs.name]: list[datetime] = []
785
+
786
+ for on in wf.on:
787
+ on_gen = on.generate(start_date)
788
+ next_running_date = on_gen.next
789
+ while next_running_date in queue[wfs.name]:
790
+ next_running_date = on_gen.next
791
+
792
+ heappush(queue[wfs.name], next_running_date)
793
+
794
+ workflow_tasks.append(
795
+ WorkflowTask(
796
+ workflow=wf,
797
+ on=on,
798
+ params=wfs.params,
799
+ queue=queue,
800
+ running=running,
801
+ ),
802
+ )
803
+
804
+ return workflow_tasks
49
805
 
50
806
 
51
807
  def catch_exceptions(cancel_on_failure=False):
52
808
  """Catch exception error from scheduler job."""
53
809
 
54
- def catch_exceptions_decorator(job_func):
55
- @wraps(job_func)
810
+ def catch_exceptions_decorator(func):
811
+ @wraps(func)
56
812
  def wrapper(*args, **kwargs):
57
813
  try:
58
- return job_func(*args, **kwargs)
814
+ return func(*args, **kwargs)
59
815
  except Exception as err:
60
- logging.exception(err)
816
+ logger.exception(err)
61
817
  if cancel_on_failure:
62
818
  return CancelJob
63
819
 
@@ -66,157 +822,188 @@ def catch_exceptions(cancel_on_failure=False):
66
822
  return catch_exceptions_decorator
67
823
 
68
824
 
69
- @dataclass
70
- class PipelineTask:
71
- pipeline: Pipeline
72
- on: On
73
- queue: list[datetime]
74
- running: list[datetime]
825
+ def catch_exceptions_method(cancel_on_failure=False):
826
+ """Catch exception error from scheduler job."""
75
827
 
828
+ def catch_exceptions_decorator(func):
829
+ @wraps(func)
830
+ def wrapper(self, *args, **kwargs):
831
+ try:
832
+ return func(self, *args, **kwargs)
833
+ except Exception as err:
834
+ logger.exception(err)
835
+ if cancel_on_failure:
836
+ return CancelJob
76
837
 
77
- def queue2str(queue: list[datetime]) -> Iterator[str]:
78
- return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
838
+ return wrapper
79
839
 
840
+ return catch_exceptions_decorator
80
841
 
81
- def pipeline_release(
82
- task: PipelineTask,
83
- *,
84
- log: Log | None = None,
85
- ) -> None:
86
- """Pipeline release, it will use with the same logic of `pipeline.release`
87
- method.
88
842
 
89
- :param task: A PipelineTask dataclass.
90
- :param log: A log object.
843
+ @dataclass(frozen=True)
844
+ class WorkflowTask:
845
+ """Workflow task dataclass that use to keep mapping data and objects for
846
+ passing in multithreading task.
91
847
  """
92
- log: Log = log or FileLog
93
- pipeline: Pipeline = task.pipeline
94
- on: On = task.on
95
848
 
96
- gen: CronRunner = on.generate(
97
- datetime.now(tz=tz).replace(second=0, microsecond=0)
98
- )
99
- cron_tz: ZoneInfo = gen.tz
849
+ workflow: Workflow
850
+ on: On
851
+ params: DictData = field(compare=False, hash=False)
852
+ queue: list[datetime] = field(compare=False, hash=False)
853
+ running: list[datetime] = field(compare=False, hash=False)
854
+
855
+ @catch_exceptions_method(cancel_on_failure=True)
856
+ def release(self, log: Log | None = None) -> None:
857
+ """Workflow release, it will use with the same logic of
858
+ `workflow.release` method.
859
+
860
+ :param log: A log object.
861
+ """
862
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
863
+ log: Log = log or FileLog
864
+ wf: Workflow = self.workflow
865
+ on: On = self.on
866
+
867
+ gen: CronRunner = on.generate(
868
+ datetime.now(tz=tz).replace(second=0, microsecond=0)
869
+ )
870
+ cron_tz: ZoneInfo = gen.tz
100
871
 
101
- next_running_time: datetime = gen.next
102
- while next_running_time in task.running[pipeline.name]:
103
- next_running_time: datetime = gen.next
872
+ # NOTE: get next schedule time that generate from now.
873
+ next_time: datetime = gen.next
104
874
 
105
- logging.debug(
106
- f"[CORE]: {pipeline.name!r} : {on.cronjob} : "
107
- f"{next_running_time:%Y-%m-%d %H:%M:%S}"
108
- )
109
- heappush(task.running[pipeline.name], next_running_time)
875
+ # NOTE: get next utils it does not running.
876
+ while log.is_pointed(wf.name, next_time, queue=self.running[wf.name]):
877
+ next_time: datetime = gen.next
110
878
 
111
- # TODO: event should set on this step for release next pipeline task?
879
+ logger.debug(
880
+ f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
881
+ f"{next_time:%Y-%m-%d %H:%M:%S}"
882
+ )
883
+ heappush(self.running[wf.name], next_time)
112
884
 
113
- if get_diff_sec(next_running_time, tz=cron_tz) > 55:
114
- logging.debug(
115
- f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
116
- f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
885
+ if get_diff_sec(next_time, tz=cron_tz) > 55:
886
+ logger.debug(
887
+ f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
888
+ f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
889
+ )
890
+
891
+ # NOTE: Add this next running datetime that not in period to queue
892
+ # and remove it to running.
893
+ self.running[wf.name].remove(next_time)
894
+ heappush(self.queue[wf.name], next_time)
895
+
896
+ time.sleep(0.2)
897
+ return
898
+
899
+ logger.debug(
900
+ f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
901
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
117
902
  )
118
903
 
119
- # NOTE: Add this next running datetime to queue
120
- heappush(task.queue[pipeline.name], next_running_time)
121
- task.running[pipeline.name].remove(next_running_time)
904
+ # NOTE: Release when the time is nearly to schedule time.
905
+ while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
906
+ logger.debug(
907
+ f"({wf.run_id}) [CORE]: {wf.name!r} : {on.cronjob} "
908
+ f": Sleep until: {duration}"
909
+ )
910
+ time.sleep(15)
911
+
122
912
  time.sleep(0.5)
123
- return
124
913
 
125
- logging.debug(
126
- f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
127
- f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
128
- )
914
+ # NOTE: Release parameter that use to change if params has
915
+ # templating.
916
+ release_params: DictData = {
917
+ "release": {
918
+ "logical_date": next_time,
919
+ },
920
+ }
129
921
 
130
- # NOTE: Release when the time is nearly to schedule time.
131
- while (duration := get_diff_sec(next_running_time, tz=tz)) > (15 + 5):
132
- logging.debug(
133
- f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
134
- f"Sleep until: {duration}"
922
+ # WARNING: Re-create workflow object that use new running workflow
923
+ # ID.
924
+ runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
925
+ rs: Result = runner.execute(
926
+ params=param2template(self.params, release_params),
927
+ )
928
+ logger.debug(
929
+ f"({runner.run_id}) [CORE]: {wf.name!r} : {on.cronjob} : "
930
+ f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
135
931
  )
136
- time.sleep(15)
137
-
138
- time.sleep(0.5)
139
-
140
- # NOTE: Release parameter that use to change if params has
141
- # templating.
142
- release_params: DictData = {
143
- "release": {
144
- "logical_date": next_running_time,
145
- },
146
- }
147
-
148
- # WARNING: Re-create pipeline object that use new running pipeline
149
- # ID.
150
- runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
151
- rs: Result = runner.execute(
152
- # FIXME: replace fix parameters on this execution process.
153
- params=param2template(
154
- {"asat-dt": "${{ release.logical_date }}"}, release_params
155
- ),
156
- )
157
- logging.debug(
158
- f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
159
- f"End release"
160
- )
161
932
 
162
- del runner
933
+ del runner
934
+
935
+ # NOTE: Set parent ID on this result.
936
+ rs.set_parent_run_id(wf.run_id)
937
+
938
+ # NOTE: Save result to log object saving.
939
+ rs_log: Log = log.model_validate(
940
+ {
941
+ "name": wf.name,
942
+ "on": str(on.cronjob),
943
+ "release": next_time,
944
+ "context": rs.context,
945
+ "parent_run_id": rs.run_id,
946
+ "run_id": rs.run_id,
947
+ }
948
+ )
949
+ rs_log.save(excluded=None)
163
950
 
164
- # NOTE: remove this release date from running
165
- task.running[pipeline.name].remove(next_running_time)
951
+ # NOTE: remove this release date from running
952
+ self.running[wf.name].remove(next_time)
166
953
 
167
- # IMPORTANT:
168
- # Add the next running datetime to pipeline queue
169
- finish_time: datetime = datetime.now(tz=cron_tz).replace(
170
- second=0, microsecond=0
171
- )
172
- future_running_time: datetime = gen.next
173
- while (
174
- future_running_time in task.running[pipeline.name]
175
- or future_running_time in task.queue[pipeline.name]
176
- or future_running_time < finish_time
177
- ):
954
+ # IMPORTANT:
955
+ # Add the next running datetime to workflow queue
956
+ finish_time: datetime = datetime.now(tz=cron_tz).replace(
957
+ second=0, microsecond=0
958
+ )
178
959
  future_running_time: datetime = gen.next
960
+ while (
961
+ future_running_time in self.running[wf.name]
962
+ or future_running_time in self.queue[wf.name]
963
+ or future_running_time < finish_time
964
+ ):
965
+ future_running_time: datetime = gen.next
179
966
 
180
- heappush(task.queue[pipeline.name], future_running_time)
967
+ heappush(self.queue[wf.name], future_running_time)
968
+ logger.debug(f"[CORE]: {'-' * 100}")
181
969
 
182
- # NOTE: Set parent ID on this result.
183
- rs.set_parent_run_id(pipeline.run_id)
970
+ def __eq__(self, other):
971
+ if isinstance(other, WorkflowTask):
972
+ return (
973
+ self.workflow.name == other.workflow.name
974
+ and self.on.cronjob == other.on.cronjob
975
+ )
184
976
 
185
- # NOTE: Save result to log object saving.
186
- rs_log: Log = log.model_validate(
187
- {
188
- "name": pipeline.name,
189
- "on": str(on.cronjob),
190
- "release": next_running_time,
191
- "context": rs.context,
192
- "parent_run_id": rs.run_id,
193
- "run_id": rs.run_id,
194
- }
195
- )
196
- rs_log.save()
197
977
 
198
- logging.debug(f"[CORE]: {rs}")
978
+ def queue2str(queue: list[datetime]) -> Iterator[str]:
979
+ return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
199
980
 
200
981
 
201
982
  @catch_exceptions(cancel_on_failure=True)
202
983
  def workflow_task(
203
- pipeline_tasks: list[PipelineTask],
984
+ workflow_tasks: list[WorkflowTask],
204
985
  stop: datetime,
205
986
  threads: dict[str, Thread],
206
987
  ) -> CancelJob | None:
207
- """Workflow task generator that create release pair of pipeline and on to
988
+ """Workflow task generator that create release pair of workflow and on to
208
989
  the threading in background.
209
990
 
210
991
  This workflow task will start every minute at :02 second.
992
+
993
+ :param workflow_tasks:
994
+ :param stop:
995
+ :param threads:
996
+ :rtype: CancelJob | None
211
997
  """
998
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
212
999
  start_date: datetime = datetime.now(tz=tz)
213
- start_date_minute = start_date.replace(second=0, microsecond=0)
1000
+ start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
214
1001
 
215
- if start_date > stop:
216
- logging.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
1002
+ if start_date > stop.replace(tzinfo=tz):
1003
+ logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
217
1004
  while len(threads) > 0:
218
- logging.warning(
219
- "[WORKFLOW]: Waiting pipeline release thread that still "
1005
+ logger.warning(
1006
+ "[WORKFLOW]: Waiting workflow release thread that still "
220
1007
  "running in background."
221
1008
  )
222
1009
  time.sleep(15)
@@ -224,72 +1011,72 @@ def workflow_task(
224
1011
  return CancelJob
225
1012
 
226
1013
  # IMPORTANT:
227
- # Filter pipeline & on that should to run with `pipeline_release`
1014
+ # Filter workflow & on that should to run with `workflow_release`
228
1015
  # function. It will deplicate running with different schedule value
229
1016
  # because I use current time in this condition.
230
1017
  #
231
- # For example, if a pipeline A queue has '00:02:00' time that
1018
+ # For example, if a workflow A queue has '00:02:00' time that
232
1019
  # should to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
233
1020
  # This condition will release with 2 threading job.
234
1021
  #
235
1022
  # '00:02:00' --> '*/2 * * * *' --> running
236
1023
  # --> '*/35 * * * *' --> skip
237
1024
  #
238
- for task in pipeline_tasks:
1025
+ for task in workflow_tasks:
239
1026
 
240
1027
  # NOTE: Get incoming datetime queue.
241
- logging.debug(
242
- f"[WORKFLOW]: Current queue: {task.pipeline.name!r} : "
243
- f"{list(queue2str(task.queue[task.pipeline.name]))}"
1028
+ logger.debug(
1029
+ f"[WORKFLOW]: Current queue: {task.workflow.name!r} : "
1030
+ f"{list(queue2str(task.queue[task.workflow.name]))}"
244
1031
  )
245
1032
 
246
1033
  # NOTE: Create minute unit value for any scheduler datetime that
247
- # checking a pipeline task should run in this datetime.
1034
+ # checking a workflow task should run in this datetime.
248
1035
  current_running_time: datetime = start_date_minute.astimezone(
249
1036
  tz=ZoneInfo(task.on.tz)
250
1037
  )
251
1038
  if (
252
- len(task.queue[task.pipeline.name]) > 0
253
- and current_running_time != task.queue[task.pipeline.name][0]
1039
+ len(task.queue[task.workflow.name]) > 0
1040
+ and current_running_time != task.queue[task.workflow.name][0]
254
1041
  ) or (
255
1042
  task.on.next(current_running_time)
256
- != task.queue[task.pipeline.name][0]
1043
+ != task.queue[task.workflow.name][0]
257
1044
  ):
258
- logging.debug(
1045
+ logger.debug(
259
1046
  f"[WORKFLOW]: Skip schedule "
260
1047
  f"{current_running_time:%Y-%m-%d %H:%M:%S} "
261
- f"for : {task.pipeline.name!r} : {task.on.cronjob}"
1048
+ f"for : {task.workflow.name!r} : {task.on.cronjob}"
262
1049
  )
263
1050
  continue
264
- elif len(task.queue[task.pipeline.name]) == 0:
265
- # TODO: Should auto add new queue?
266
- logging.warning(
267
- f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
1051
+ elif len(task.queue[task.workflow.name]) == 0:
1052
+ logger.warning(
1053
+ f"[WORKFLOW]: Queue is empty for : {task.workflow.name!r} : "
268
1054
  f"{task.on.cronjob}"
269
1055
  )
270
1056
  continue
271
1057
 
272
1058
  # NOTE: Remove this datetime from queue.
273
- task.queue[task.pipeline.name].pop(0)
1059
+ task.queue[task.workflow.name].pop(0)
274
1060
 
1061
+ # NOTE: Create thread name that able to tracking with observe schedule
1062
+ # job.
275
1063
  thread_name: str = (
276
- f"{task.pipeline.name}|{str(task.on.cronjob)}|"
1064
+ f"{task.workflow.name}|{str(task.on.cronjob)}|"
277
1065
  f"{current_running_time:%Y%m%d%H%M}"
278
1066
  )
279
- pipe_thread: Thread = Thread(
280
- target=pipeline_release,
281
- args=(task,),
1067
+ wf_thread: Thread = Thread(
1068
+ target=task.release,
282
1069
  name=thread_name,
283
1070
  daemon=True,
284
1071
  )
285
1072
 
286
- threads[thread_name] = pipe_thread
1073
+ threads[thread_name] = wf_thread
287
1074
 
288
- pipe_thread.start()
1075
+ wf_thread.start()
289
1076
 
290
1077
  delay()
291
1078
 
292
- logging.debug(f"[WORKFLOW]: {'=' * 100}")
1079
+ logger.debug(f"[WORKFLOW]: {'=' * 100}")
293
1080
 
294
1081
 
295
1082
  def workflow_long_running_task(threads: dict[str, Thread]) -> None:
@@ -297,8 +1084,11 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
297
1084
  control.
298
1085
 
299
1086
  :param threads: A mapping of Thread object and its name.
1087
+ :rtype: None
300
1088
  """
301
- logging.debug("[MONITOR]: Start checking long running pipeline release.")
1089
+ logger.debug(
1090
+ "[MONITOR]: Start checking long running workflow release task."
1091
+ )
302
1092
  snapshot_threads = list(threads.keys())
303
1093
  for t_name in snapshot_threads:
304
1094
 
@@ -308,22 +1098,31 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
308
1098
 
309
1099
 
310
1100
  def workflow_control(
311
- pipelines: list[str],
312
- until: datetime | None = None,
1101
+ schedules: list[str],
1102
+ stop: datetime | None = None,
313
1103
  externals: DictData | None = None,
314
1104
  ) -> list[str]:
315
1105
  """Workflow scheduler control.
316
1106
 
317
- :param pipelines: A list of pipeline names that want to schedule running.
318
- :param until:
1107
+ :param schedules: A list of workflow names that want to schedule running.
1108
+ :param stop: An datetime value that use to stop running schedule.
319
1109
  :param externals: An external parameters that pass to Loader.
1110
+ :rtype: list[str]
320
1111
  """
1112
+ try:
1113
+ from schedule import Scheduler
1114
+ except ImportError:
1115
+ raise ImportError(
1116
+ "Should install schedule package before use this module."
1117
+ ) from None
1118
+
1119
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
321
1120
  schedule: Scheduler = Scheduler()
322
1121
  start_date: datetime = datetime.now(tz=tz)
323
1122
 
324
1123
  # NOTE: Design workflow queue caching.
325
1124
  # ---
326
- # {"pipeline-name": [<release-datetime>, <release-datetime>, ...]}
1125
+ # {"workflow-name": [<release-datetime>, <release-datetime>, ...]}
327
1126
  #
328
1127
  wf_queue: dict[str, list[datetime]] = {}
329
1128
  wf_running: dict[str, list[datetime]] = {}
@@ -333,35 +1132,30 @@ def workflow_control(
333
1132
  second=0, microsecond=0
334
1133
  )
335
1134
 
336
- # NOTE: Create pair of pipeline and on.
337
- pipeline_tasks: list[PipelineTask] = []
338
-
339
- for name in pipelines:
340
- pipeline: Pipeline = Pipeline.from_loader(name, externals=externals)
341
-
342
- # NOTE: Create default list of release datetime.
343
- wf_queue[name]: list[datetime] = []
344
- wf_running[name]: list[datetime] = []
345
-
346
- for on in pipeline.on:
347
-
348
- on_gen = on.generate(start_date_waiting)
349
- next_running_date = on_gen.next
350
- while next_running_date in wf_queue[name]:
351
- next_running_date = on_gen.next
352
-
353
- heappush(wf_queue[name], next_running_date)
354
- pipeline_tasks.append(
355
- PipelineTask(
356
- pipeline=pipeline, on=on, queue=wf_queue, running=wf_running
357
- ),
358
- )
1135
+ # NOTE: Create pair of workflow and on from schedule model.
1136
+ workflow_tasks: list[WorkflowTask] = []
1137
+ for name in schedules:
1138
+ sch: Schedule = Schedule.from_loader(name, externals=externals)
1139
+ workflow_tasks.extend(
1140
+ sch.tasks(
1141
+ start_date_waiting, wf_queue, wf_running, externals=externals
1142
+ ),
1143
+ )
359
1144
 
360
1145
  # NOTE: This schedule job will start every minute at :02 seconds.
361
1146
  schedule.every(1).minutes.at(":02").do(
362
1147
  workflow_task,
363
- pipeline_tasks=pipeline_tasks,
364
- stop=until or (start_date + timedelta(minutes=5, seconds=20)),
1148
+ workflow_tasks=workflow_tasks,
1149
+ stop=stop
1150
+ or (
1151
+ start_date
1152
+ + timedelta(
1153
+ **json.loads(
1154
+ os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
1155
+ or '{"minutes": 5, "seconds": 20}'
1156
+ )
1157
+ )
1158
+ ),
365
1159
  threads=thread_releases,
366
1160
  ).tag("control")
367
1161
 
@@ -372,81 +1166,78 @@ def workflow_control(
372
1166
  ).tag("monitor")
373
1167
 
374
1168
  # NOTE: Start running schedule
375
- logging.info(f"[WORKFLOW]: Start schedule: {pipelines}")
1169
+ logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
376
1170
  while True:
377
1171
  schedule.run_pending()
378
1172
  time.sleep(1)
379
1173
  if not schedule.get_jobs("control"):
380
1174
  schedule.clear("monitor")
381
- logging.warning(
382
- f"[WORKFLOW]: Pipeline release thread: {thread_releases}"
1175
+ logger.warning(
1176
+ f"[WORKFLOW]: Workflow release thread: {thread_releases}"
383
1177
  )
384
- logging.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
1178
+ logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
385
1179
  break
386
1180
 
387
- logging.warning(f"Queue: {[wf_queue[wf] for wf in wf_queue]}")
388
- logging.warning(f"Running: {[wf_running[wf] for wf in wf_running]}")
389
- return pipelines
1181
+ logger.warning(
1182
+ f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
1183
+ )
1184
+ logger.warning(
1185
+ f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
1186
+ )
1187
+ return schedules
390
1188
 
391
1189
 
392
- def workflow(
393
- until: datetime | None = None,
1190
+ def workflow_runner(
1191
+ stop: datetime | None = None,
394
1192
  externals: DictData | None = None,
395
1193
  excluded: list[str] | None = None,
396
- ):
1194
+ ) -> list[str]:
397
1195
  """Workflow application that running multiprocessing schedule with chunk of
398
- pipelines that exists in config path.
1196
+ workflows that exists in config path.
399
1197
 
400
- :param until:
1198
+ :param stop:
401
1199
  :param excluded:
402
1200
  :param externals:
1201
+ :rtype: list[str]
403
1202
 
404
- This function will get all pipelines that include on value that was
405
- created in config path and chuck it with WORKFLOW_APP_PIPELINE_PER_PROCESS
1203
+ This function will get all workflows that include on value that was
1204
+ created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
406
1205
  value to multiprocess executor pool.
407
1206
 
408
1207
  The current workflow logic:
409
1208
  ---
410
1209
  PIPELINES ==> process 01 ==> schedule 1 minute --> thread of release
411
- pipeline task 01 01
1210
+ workflow task 01 01
412
1211
  --> thread of release
413
- pipeline task 01 02
1212
+ workflow task 01 02
414
1213
  ==> process 02 ==> schedule 1 minute --> thread of release
415
- pipeline task 02 01
1214
+ workflow task 02 01
416
1215
  --> thread of release
417
- pipeline task 02 02
1216
+ workflow task 02 02
418
1217
  ==> ...
419
1218
  """
420
- excluded: list = excluded or []
1219
+ excluded: list[str] = excluded or []
421
1220
 
422
- with ProcessPoolExecutor(max_workers=2) as executor:
1221
+ with ProcessPoolExecutor(
1222
+ max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
1223
+ ) as executor:
423
1224
  futures: list[Future] = [
424
1225
  executor.submit(
425
1226
  workflow_control,
426
- pipelines=[load[0] for load in loader],
427
- until=until,
1227
+ schedules=[load[0] for load in loader],
1228
+ stop=stop,
428
1229
  externals=(externals or {}),
429
1230
  )
430
1231
  for loader in batch(
431
- # Loader.find(Pipeline, include=["on"], excluded=excluded),
432
- [
433
- ("pipe-scheduling", None),
434
- # ("pipe-scheduling-minute", None),
435
- ],
436
- n=1,
1232
+ Loader.finds(Schedule, excluded=excluded),
1233
+ n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
437
1234
  )
438
1235
  ]
439
1236
 
440
1237
  results: list[str] = []
441
1238
  for future in as_completed(futures):
442
1239
  if err := future.exception():
443
- logging.error(str(err))
1240
+ logger.error(str(err))
444
1241
  raise WorkflowException(str(err)) from err
445
1242
  results.extend(future.result(timeout=1))
446
1243
  return results
447
-
448
-
449
- if __name__ == "__main__":
450
- # TODO: Define input arguments that want to manage this application.
451
- workflow_rs: list[str] = workflow()
452
- logging.info(f"Application run success: {workflow_rs}")