ddeutil-workflow 0.0.62__py3-none-any.whl → 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,813 +0,0 @@
1
- # ------------------------------------------------------------------------------
2
- # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
- # Licensed under the MIT License. See LICENSE in the project root for
4
- # license information.
5
- # ------------------------------------------------------------------------------
6
- """The main schedule running is `schedule_runner` function that trigger the
7
- multiprocess of `schedule_control` function for listing schedules on the
8
- config by `Loader.finds(Schedule)`.
9
-
10
- The `schedule_control` is the scheduler function that release 2 schedule
11
- functions; `workflow_task`, and `workflow_monitor`.
12
-
13
- `schedule_control` ---( Every minute at :02 )--> `schedule_task`
14
- ---( Every 5 minutes )--> `monitor`
15
-
16
- The `schedule_task` will run `task.release` method in threading object
17
- for multithreading strategy. This `release` method will run only one crontab
18
- value with the on field.
19
-
20
- Steps:
21
- - Extract all schedule config on the conf path.
22
- - Slice schedules to multiprocess
23
- - Start running task.
24
- """
25
- from __future__ import annotations
26
-
27
- import copy
28
- import logging
29
- import time
30
- from concurrent.futures import (
31
- Future,
32
- ProcessPoolExecutor,
33
- as_completed,
34
- )
35
- from datetime import datetime, timedelta
36
- from functools import wraps
37
- from heapq import heappop, heappush
38
- from pathlib import Path
39
- from textwrap import dedent
40
- from threading import Thread
41
- from typing import Any, Callable, Optional, TypedDict, Union
42
-
43
- from pydantic import BaseModel, Field, ValidationInfo
44
- from pydantic.functional_validators import field_validator, model_validator
45
- from typing_extensions import Self
46
-
47
- try:
48
- from typing import ParamSpec
49
- except ImportError: # pragma: no cov
50
- from typing_extensions import ParamSpec
51
-
52
- try:
53
- from schedule import CancelJob
54
- except ImportError: # pragma: no cov
55
- CancelJob = None
56
-
57
- from .__cron import CronRunner
58
- from .__types import DictData, TupleStr
59
- from .conf import FileLoad, Loader, dynamic
60
- from .event import Crontab
61
- from .exceptions import ScheduleException, WorkflowException
62
- from .logs import Audit, get_audit
63
- from .result import SUCCESS, Result
64
- from .utils import batch, delay
65
- from .workflow import Release, ReleaseQueue, Workflow, WorkflowTask
66
-
67
- P = ParamSpec("P")
68
-
69
- logging.getLogger("schedule").setLevel(logging.INFO)
70
-
71
-
72
- __all__: TupleStr = (
73
- "Schedule",
74
- "ScheduleWorkflow",
75
- "schedule_task",
76
- "monitor",
77
- "schedule_control",
78
- "schedule_runner",
79
- "ReleaseThreads",
80
- "ReleaseThread",
81
- )
82
-
83
-
84
- class ScheduleWorkflow(BaseModel):
85
- """Schedule Workflow Pydantic model that use to keep workflow model for
86
- the Schedule model. it should not use Workflow model directly because on the
87
- schedule config it can adjust crontab value that different from the Workflow
88
- model.
89
-
90
- This on field does not equal to the on field of Workflow model, but it
91
- uses same logic to generate running release date with crontab object. It
92
- uses for override the on field if the schedule time was change, but you do
93
- not want to change on the workflow model.
94
- """
95
-
96
- extras: DictData = Field(
97
- default_factory=dict,
98
- description="An extra parameters that want to override config values.",
99
- )
100
-
101
- alias: Optional[str] = Field(
102
- default=None,
103
- description="An alias name of workflow that use for schedule model.",
104
- )
105
- name: str = Field(description="A workflow name.")
106
- on: list[Crontab] = Field(
107
- default_factory=list,
108
- description="An override the list of Crontab object values.",
109
- )
110
- values: DictData = Field(
111
- default_factory=dict,
112
- description=(
113
- "A value that want to pass to the workflow params field when auto "
114
- "calling release method."
115
- ),
116
- alias="params",
117
- )
118
-
119
- @model_validator(mode="before")
120
- def __prepare_before__(cls, data: Any) -> Any:
121
- """Prepare incoming values before validating with model fields."""
122
- if isinstance(data, dict):
123
- # VALIDATE: Add default the alias field with the name.
124
- if "alias" not in data:
125
- data["alias"] = data.get("name")
126
-
127
- cls.__bypass_on(data, extras=data.get("extras"))
128
- return data
129
-
130
- @classmethod
131
- def __bypass_on(
132
- cls, data: DictData, *, extras: Optional[DictData] = None
133
- ) -> DictData:
134
- """Bypass and prepare the on data to loaded config data.
135
-
136
- :param data: (DictData) A data that want to validate for the model
137
- initialization.
138
- :param extras: (DictData) An extra parameter that want to override core
139
- config values.
140
-
141
- :rtype: DictData
142
- """
143
- if on := data.pop("on", []):
144
-
145
- if isinstance(on, str):
146
- on: list[str] = [on]
147
-
148
- if any(not isinstance(n, (dict, str)) for n in on):
149
- raise TypeError("The `on` key should be list of str or dict")
150
-
151
- # NOTE: Pass on value to Loader and keep on model object to on
152
- # field.
153
- data["on"] = [
154
- FileLoad(n, externals=extras).data if isinstance(n, str) else n
155
- for n in on
156
- ]
157
-
158
- return data
159
-
160
- @field_validator("on", mode="after")
161
- def __on_no_dup__(
162
- cls, value: list[Crontab], info: ValidationInfo
163
- ) -> list[Crontab]:
164
- """Validate the on fields should not contain duplicate values and if it
165
- contains every minute value, it should have only one on value.
166
-
167
- :param value: (list[Crontab]) A list of `Crontab` object.
168
- :param info: (ValidationInfo) An validation info object for getting an
169
- extra parameter.
170
-
171
- :rtype: list[Crontab]
172
- """
173
- set_ons: set[str] = {str(on.cronjob) for on in value}
174
- if len(set_ons) != len(value):
175
- raise ValueError(
176
- "The on fields should not contain duplicate on value."
177
- )
178
-
179
- extras: Optional[DictData] = info.data.get("extras")
180
- if len(set_ons) > (
181
- conf := dynamic("max_cron_per_workflow", extras=extras)
182
- ):
183
- raise ValueError(
184
- f"The number of the on should not more than {conf} crontabs."
185
- )
186
-
187
- return value
188
-
189
- def tasks(
190
- self,
191
- start_date: datetime,
192
- queue: dict[str, ReleaseQueue],
193
- ) -> list[WorkflowTask]:
194
- """Return the list of WorkflowTask object from the specific input
195
- datetime that mapping with the on field.
196
-
197
- This task creation need queue to tracking release date already
198
- mapped or not.
199
-
200
- :param start_date: (datetime) A start datetime that get from the
201
- workflow schedule.
202
- :param queue: (dict[str, ReleaseQueue]) A mapping of name and list of
203
- datetime for queue.
204
-
205
- :rtype: list[WorkflowTask]
206
- :return: Return the list of WorkflowTask object from the specific
207
- input datetime that mapping with the on field.
208
- """
209
- wf: Workflow = Workflow.from_conf(self.name, extras=self.extras)
210
- wf_queue: ReleaseQueue = queue[self.alias]
211
-
212
- # IMPORTANT: Create the default 'on' value if it does not pass the `on`
213
- # field to the Schedule object.
214
- ons: list[Crontab] = self.on or wf.on.copy()
215
- workflow_tasks: list[WorkflowTask] = []
216
- for on in ons:
217
-
218
- # NOTE: Create CronRunner instance from the start_date param.
219
- runner: CronRunner = on.generate(start_date)
220
- next_running_date = runner.next
221
-
222
- while wf_queue.check_queue(next_running_date):
223
- next_running_date = runner.next
224
-
225
- workflow_tasks.append(
226
- WorkflowTask(
227
- alias=self.alias,
228
- workflow=wf,
229
- runner=runner,
230
- values=self.values,
231
- extras=self.extras,
232
- ),
233
- )
234
-
235
- return workflow_tasks
236
-
237
-
238
- class Schedule(BaseModel):
239
- """Schedule Pydantic model that use to run with any scheduler package.
240
-
241
- The workflows field of this model include ScheduleWorkflow objects that
242
- enhance the workflow object by adding the alias and values fields.
243
- """
244
-
245
- extras: DictData = Field(
246
- default_factory=dict,
247
- description="An extra parameters that want to override config values.",
248
- )
249
-
250
- desc: Optional[str] = Field(
251
- default=None,
252
- description=(
253
- "A schedule description that can be string of markdown content."
254
- ),
255
- )
256
- workflows: list[ScheduleWorkflow] = Field(
257
- default_factory=list,
258
- description="A list of ScheduleWorkflow model.",
259
- )
260
-
261
- @field_validator("desc", mode="after")
262
- def __dedent_desc__(cls, value: str) -> str:
263
- """Prepare description string that was created on a template.
264
-
265
- :param value: A description string value that want to dedent.
266
-
267
- :rtype: str
268
- """
269
- return dedent(value)
270
-
271
- @classmethod
272
- def from_conf(
273
- cls,
274
- name: str,
275
- *,
276
- path: Optional[Path] = None,
277
- extras: DictData | None = None,
278
- ) -> Self:
279
- """Create Schedule instance from the Loader object that only receive
280
- an input schedule name. The loader object will use this schedule name to
281
- searching configuration data of this schedule model in conf path.
282
-
283
- :param name: (str) A schedule name that want to pass to Loader object.
284
- :param path: (Path) An override config path.
285
- :param extras: An extra parameters that want to pass to Loader
286
- object.
287
-
288
- :raise ValueError: If the type does not match with current object.
289
-
290
- :rtype: Self
291
- """
292
- loader: Loader = FileLoad(name, path=path, extras=extras)
293
-
294
- # NOTE: Validate the config type match with current connection model
295
- if loader.type != cls.__name__:
296
- raise ValueError(f"Type {loader.type} does not match with {cls}")
297
-
298
- loader_data: DictData = copy.deepcopy(loader.data)
299
- loader_data["name"] = name
300
-
301
- if extras:
302
- loader_data["extras"] = extras
303
-
304
- return cls.model_validate(obj=loader_data)
305
-
306
- def tasks(
307
- self,
308
- start_date: datetime,
309
- queue: dict[str, ReleaseQueue],
310
- ) -> list[WorkflowTask]:
311
- """Return the list of WorkflowTask object from the specific input
312
- datetime that mapping with the on field from workflow schedule model.
313
-
314
- :param start_date: A start date that get from the workflow schedule.
315
- :param queue: (dict[str, ReleaseQueue]) A mapping of name and list of
316
- datetime for queue.
317
-
318
- :rtype: list[WorkflowTask]
319
- :return: Return the list of WorkflowTask object from the specific
320
- input datetime that mapping with the on field.
321
- """
322
- workflow_tasks: list[WorkflowTask] = []
323
-
324
- for workflow in self.workflows:
325
- if self.extras:
326
- workflow.extras = self.extras
327
-
328
- if workflow.alias not in queue:
329
- queue[workflow.alias] = ReleaseQueue()
330
-
331
- workflow_tasks.extend(workflow.tasks(start_date, queue=queue))
332
-
333
- return workflow_tasks
334
-
335
- def pending(
336
- self,
337
- *,
338
- stop: Optional[datetime] = None,
339
- audit: type[Audit] | None = None,
340
- parent_run_id: Optional[str] = None,
341
- ) -> Result: # pragma: no cov
342
- """Pending this schedule tasks with the schedule package.
343
-
344
- :param stop: A datetime value that use to stop running schedule.
345
- :param audit: An audit class that use on the workflow task release for
346
- writing its release audit context.
347
- :param parent_run_id: A parent workflow running ID for this release.
348
- """
349
- audit: type[Audit] = audit or get_audit(extras=self.extras)
350
- result: Result = Result().set_parent_run_id(parent_run_id)
351
-
352
- # NOTE: Create the start and stop datetime.
353
- start_date: datetime = datetime.now(
354
- tz=dynamic("tz", extras=self.extras)
355
- )
356
- stop_date: datetime = stop or (
357
- start_date + dynamic("stop_boundary_delta", extras=self.extras)
358
- )
359
-
360
- # IMPORTANT: Create main mapping of queue and thread object.
361
- queue: dict[str, ReleaseQueue] = {}
362
- threads: ReleaseThreads = {}
363
-
364
- start_date_waiting: datetime = start_date.replace(
365
- second=0, microsecond=0
366
- ) + timedelta(minutes=1)
367
-
368
- scheduler_pending(
369
- tasks=self.tasks(start_date_waiting, queue=queue),
370
- stop=stop_date,
371
- queue=queue,
372
- threads=threads,
373
- result=result,
374
- audit=audit,
375
- )
376
-
377
- return result.catch(status=SUCCESS)
378
-
379
-
380
- ResultOrCancel = Union[type[CancelJob], Result]
381
- ReturnResultOrCancel = Callable[P, ResultOrCancel]
382
- DecoratorCancelJob = Callable[[ReturnResultOrCancel], ReturnResultOrCancel]
383
-
384
-
385
- def catch_exceptions(
386
- cancel_on_failure: bool = False,
387
- parent_run_id: Optional[str] = None,
388
- ) -> DecoratorCancelJob:
389
- """Catch exception error from scheduler job that running with schedule
390
- package and return CancelJob if this function raise an error.
391
-
392
- :param cancel_on_failure: A flag that allow to return the CancelJob or not
393
- it will raise.
394
- :param parent_run_id:
395
-
396
- :rtype: DecoratorCancelJob
397
- """
398
-
399
- def decorator(
400
- func: ReturnResultOrCancel,
401
- ) -> ReturnResultOrCancel: # pragma: no cov
402
-
403
- @wraps(func)
404
- def wrapper(*args: P.args, **kwargs: P.kwargs) -> ResultOrCancel:
405
-
406
- try:
407
- return func(*args, **kwargs)
408
-
409
- except Exception as err:
410
- if parent_run_id:
411
- (
412
- Result(parent_run_id=parent_run_id).trace.exception(
413
- str(err)
414
- )
415
- )
416
- if cancel_on_failure:
417
- return CancelJob
418
- raise err
419
-
420
- return wrapper
421
-
422
- return decorator
423
-
424
-
425
- class ReleaseThread(TypedDict):
426
- """TypeDict for the release thread."""
427
-
428
- thread: Optional[Thread]
429
- start_date: datetime
430
- release_date: datetime
431
-
432
-
433
- ReleaseThreads = dict[str, ReleaseThread]
434
-
435
-
436
- def schedule_task(
437
- tasks: list[WorkflowTask],
438
- stop: datetime,
439
- queue: dict[str, ReleaseQueue],
440
- threads: ReleaseThreads,
441
- audit: type[Audit],
442
- *,
443
- parent_run_id: Optional[str] = None,
444
- extras: Optional[DictData] = None,
445
- ) -> ResultOrCancel:
446
- """Schedule task function that generate thread of workflow task release
447
- method in background. This function do the same logic as the workflow poke
448
- method, but it runs with map of schedules and the on values.
449
-
450
- This schedule task start runs every minute at ':02' second, and it does
451
- not allow you to run with offset time.
452
-
453
- :param tasks: A list of WorkflowTask object.
454
- :param stop: A stop datetime object that force stop running scheduler.
455
- :param queue: A mapping of alias name and ReleaseQueue object.
456
- :param threads: A mapping of alias name and Thread object.
457
- :param audit: An audit class that want to make audit object.
458
- :param parent_run_id: A parent workflow running ID for this release.
459
- :param extras: An extra parameter that want to override the core config.
460
-
461
- :rtype: ResultOrCancel
462
- """
463
- result: Result = Result().set_parent_run_id(parent_run_id)
464
- current_date: datetime = datetime.now(tz=dynamic("tz", extras=extras))
465
- if current_date > stop.replace(tzinfo=dynamic("tz", extras=extras)):
466
- return CancelJob
467
-
468
- # IMPORTANT:
469
- # Filter workflow & on that should to run with `workflow_release`
470
- # function. It will deplicate running with different schedule value
471
- # because I use current time in this condition.
472
- #
473
- # For example, if a queue has a time release be '00:02:00' that should
474
- # to run and its schedule has '*/2 * * * *' and '*/35 * * * *'.
475
- # This condition make this function create 2 threading tasks.
476
- #
477
- # '00:02:00' --> '*/2 * * * *' --> run
478
- # --> '*/35 * * * *' --> skip
479
- #
480
- for task in tasks:
481
-
482
- # NOTE: Get the ReleaseQueue with an alias of the WorkflowTask.
483
- q: ReleaseQueue = queue[task.alias]
484
-
485
- # NOTE: Start adding queue and move the runner date in the WorkflowTask.
486
- task.queue(stop, q, audit=audit)
487
-
488
- # NOTE: Get incoming datetime queue.
489
- result.trace.debug(
490
- f"[WORKFLOW]: Queue: {task.alias!r} : {list(q.queue)}"
491
- )
492
-
493
- # VALIDATE: Check the queue is empty or not.
494
- if not q.is_queued:
495
- result.trace.warning(
496
- f"[WORKFLOW]: Queue is empty for : {task.alias!r} : "
497
- f"{task.runner.cron}"
498
- )
499
- continue
500
-
501
- # VALIDATE: Check this task is the first release in the queue or not.
502
- current_release: datetime = current_date.replace(
503
- second=0, microsecond=0
504
- )
505
- if (first_date := q.queue[0].date) > current_release: # pragma: no cov
506
- result.trace.debug(
507
- f"[WORKFLOW]: Skip schedule "
508
- f"{first_date:%Y-%m-%d %H:%M:%S} for : {task.alias!r}"
509
- )
510
- continue
511
- elif first_date < current_release: # pragma: no cov
512
- raise ScheduleException(
513
- "The first release date from queue should not less than current"
514
- "release date."
515
- )
516
-
517
- # NOTE: Pop the latest release and push it to running.
518
- release: Release = heappop(q.queue)
519
- heappush(q.running, release)
520
-
521
- result.trace.info(
522
- f"[WORKFLOW]: Start thread: '{task.alias}|"
523
- f"{release.date:%Y%m%d%H%M}'"
524
- )
525
-
526
- # NOTE: Create thread name that able to tracking with observe schedule
527
- # job.
528
- thread_name: str = f"{task.alias}|{release.date:%Y%m%d%H%M}"
529
- thread: Thread = Thread(
530
- target=catch_exceptions(
531
- cancel_on_failure=True,
532
- )(task.release),
533
- kwargs={
534
- "release": release,
535
- "queue": q,
536
- "audit": audit,
537
- },
538
- name=thread_name,
539
- daemon=True,
540
- )
541
-
542
- threads[thread_name] = {
543
- "thread": thread,
544
- "start_date": datetime.now(tz=dynamic("tz", extras=extras)),
545
- "release_date": release.date,
546
- }
547
-
548
- thread.start()
549
-
550
- delay()
551
-
552
- result.trace.debug(
553
- f"[SCHEDULE]: End schedule task that run since "
554
- f"{current_date:%Y-%m-%d %H:%M:%S} {'=' * 30}"
555
- )
556
- return result.catch(status=SUCCESS, context={"task_date": current_date})
557
-
558
-
559
- def monitor(
560
- threads: ReleaseThreads,
561
- parent_run_id: Optional[str] = None,
562
- ) -> None: # pragma: no cov
563
- """Monitoring function that running every five minute for track long-running
564
- thread instance from the schedule_control function that run every minute.
565
-
566
- :param threads: A mapping of Thread object and its name.
567
- :param parent_run_id: A parent workflow running ID for this release.
568
-
569
- :type threads: ReleaseThreads
570
- """
571
- result: Result = Result().set_parent_run_id(parent_run_id)
572
- result.trace.debug("[MONITOR]: Start checking long running schedule task.")
573
-
574
- snapshot_threads: list[str] = list(threads.keys())
575
- for thread_name in snapshot_threads:
576
-
577
- thread_release: ReleaseThread = threads[thread_name]
578
-
579
- # NOTE: remove the thread that running success.
580
- thread = thread_release["thread"]
581
- if thread and (not thread_release["thread"].is_alive()):
582
- thread_release["thread"] = None
583
-
584
-
585
- def scheduler_pending(
586
- tasks: list[WorkflowTask],
587
- stop: datetime,
588
- queue: dict[str, ReleaseQueue],
589
- threads: ReleaseThreads,
590
- result: Result,
591
- audit: type[Audit],
592
- ) -> Result: # pragma: no cov
593
- """Scheduler pending function.
594
-
595
- :param tasks: A list of WorkflowTask object.
596
- :param stop: A stop datetime object that force stop running scheduler.
597
- :param queue: A mapping of alias name and ReleaseQueue object.
598
- :param threads: A mapping of alias name and Thread object.
599
- :param result: A result object.
600
- :param audit: An audit class that want to make audit object.
601
-
602
- :rtype: Result
603
- """
604
- try:
605
- from schedule import Scheduler
606
- except ImportError:
607
- raise ImportError(
608
- "Should install schedule package before use this method."
609
- ) from None
610
-
611
- scheduler: Scheduler = Scheduler()
612
-
613
- # NOTE: This schedule job will start every minute at :02 seconds.
614
- (
615
- scheduler.every(1)
616
- .minutes.at(":02")
617
- .do(
618
- catch_exceptions(
619
- cancel_on_failure=True,
620
- parent_run_id=result.parent_run_id,
621
- )(schedule_task),
622
- tasks=tasks,
623
- stop=stop,
624
- queue=queue,
625
- threads=threads,
626
- audit=audit,
627
- parent_run_id=result.parent_run_id,
628
- )
629
- .tag("control")
630
- )
631
-
632
- # NOTE: Checking zombie task with schedule job will start every 5 minute at
633
- # :10 seconds.
634
- (
635
- scheduler.every(5)
636
- .minutes.at(":10")
637
- .do(
638
- monitor,
639
- threads=threads,
640
- parent_run_id=result.parent_run_id,
641
- )
642
- .tag("monitor")
643
- )
644
-
645
- # NOTE: Start running schedule
646
- result.trace.info(
647
- f"[SCHEDULE]: Schedule with stopper: {stop:%Y-%m-%d %H:%M:%S}"
648
- )
649
-
650
- while True:
651
- scheduler.run_pending()
652
- time.sleep(1)
653
-
654
- # NOTE: Break the scheduler when the control job does not exist.
655
- if not scheduler.get_jobs("control"):
656
- scheduler.clear("monitor")
657
-
658
- while len([t for t in threads.values() if t["thread"]]) > 0:
659
- result.trace.warning(
660
- "[SCHEDULE]: Waiting schedule release thread that still "
661
- "running in background."
662
- )
663
- delay(10)
664
- monitor(threads, parent_run_id=result.parent_run_id)
665
-
666
- break
667
-
668
- result.trace.warning(
669
- f"[SCHEDULE]: Queue: {[list(queue[wf].queue) for wf in queue]}"
670
- )
671
- return result.catch(
672
- status=SUCCESS,
673
- context={
674
- "threads": [
675
- {
676
- "name": thread,
677
- "start_date": threads[thread]["start_date"],
678
- "release_date": threads[thread]["release_date"],
679
- }
680
- for thread in threads
681
- ],
682
- },
683
- )
684
-
685
-
686
- def schedule_control(
687
- schedules: list[str],
688
- stop: Optional[datetime] = None,
689
- *,
690
- extras: DictData | None = None,
691
- audit: type[Audit] | None = None,
692
- parent_run_id: Optional[str] = None,
693
- ) -> Result: # pragma: no cov
694
- """Scheduler control function that run the chuck of schedules every minute
695
- and this function release monitoring thread for tracking undead thread in
696
- the background.
697
-
698
- :param schedules: A list of workflow names that want to schedule running.
699
- :param stop: A datetime value that use to stop running schedule.
700
- :param extras: An extra parameters that want to override core config.
701
- :param audit: An audit class that use on the workflow task release for
702
- writing its release audit context.
703
- :param parent_run_id: A parent workflow running ID for this release.
704
-
705
- :rtype: Result
706
- """
707
- audit: type[Audit] = audit or get_audit(extras=extras)
708
- result: Result = Result.construct_with_rs_or_id(parent_run_id=parent_run_id)
709
-
710
- # NOTE: Create the start and stop datetime.
711
- start_date: datetime = datetime.now(tz=dynamic("tz", extras=extras))
712
- stop_date: datetime = stop or (
713
- start_date + dynamic("stop_boundary_delta", extras=extras)
714
- )
715
-
716
- # IMPORTANT: Create main mapping of queue and thread object.
717
- queue: dict[str, ReleaseQueue] = {}
718
- threads: ReleaseThreads = {}
719
-
720
- start_date_waiting: datetime = start_date.replace(
721
- second=0, microsecond=0
722
- ) + timedelta(minutes=1)
723
-
724
- tasks: list[WorkflowTask] = []
725
- for name in schedules:
726
- tasks.extend(
727
- (
728
- Schedule.from_conf(name, extras=extras).tasks(
729
- start_date_waiting, queue=queue
730
- )
731
- ),
732
- )
733
-
734
- scheduler_pending(
735
- tasks=tasks,
736
- stop=stop_date,
737
- queue=queue,
738
- threads=threads,
739
- result=result,
740
- audit=audit,
741
- )
742
-
743
- return result.catch(status=SUCCESS, context={"schedules": schedules})
744
-
745
-
746
- def schedule_runner(
747
- stop: Optional[datetime] = None,
748
- *,
749
- max_process: int | None = None,
750
- extras: DictData | None = None,
751
- excluded: list[str] | None = None,
752
- ) -> Result: # pragma: no cov
753
- """Schedule runner function it the multiprocess controller function for
754
- split the setting schedule to the `schedule_control` function on the
755
- process pool. It chunks schedule configs that exists in config
756
- path by `WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS` value.
757
-
758
- :param stop: A stop datetime object that force stop running scheduler.
759
- :param max_process: (int) The maximum process that want to run this func.
760
- :param extras: An extra parameter that want to override core config.
761
- :param excluded: A list of schedule name that want to exclude from finding.
762
-
763
- This function will get all workflows that include on value that was
764
- created in config path and chuck it with application config variable
765
- `WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS` env var to multiprocess executor
766
- pool.
767
-
768
- The current workflow logic that split to process will be below diagram:
769
-
770
- MAIN ==> process 01 ==> schedule ==> thread 01 --> 01
771
- ==> thread 01 --> 02
772
- ==> schedule ==> thread 02 --> 01
773
- ==> thread 02 --> 02
774
- ==> ...
775
- ==> process 02 ==> ...
776
-
777
- :rtype: Result
778
- """
779
- result: Result = Result()
780
- context: DictData = {"schedules": [], "threads": []}
781
-
782
- with ProcessPoolExecutor(
783
- max_workers=dynamic(
784
- "max_schedule_process", f=max_process, extras=extras
785
- ),
786
- ) as executor:
787
-
788
- futures: list[Future] = [
789
- executor.submit(
790
- schedule_control,
791
- schedules=[load[0] for load in loader],
792
- stop=stop,
793
- extras=extras,
794
- parent_run_id=result.parent_run_id,
795
- )
796
- for loader in batch(
797
- Loader.finds(Schedule, excluded=excluded),
798
- n=dynamic("max_schedule_per_process", extras=extras),
799
- )
800
- ]
801
-
802
- for future in as_completed(futures):
803
-
804
- # NOTE: Raise error when it has any error from schedule_control.
805
- if err := future.exception():
806
- result.trace.error(str(err))
807
- raise WorkflowException(str(err)) from err
808
-
809
- rs: Result = future.result(timeout=1)
810
- context["schedule"].extend(rs.context.get("schedules", []))
811
- context["threads"].extend(rs.context.get("threads", []))
812
-
813
- return result.catch(status=SUCCESS, context=context)