ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,8 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
+ import copy
9
+ import json
8
10
  import logging
9
11
  import os
10
12
  import time
@@ -15,49 +17,189 @@ from datetime import datetime, timedelta
15
17
  from functools import wraps
16
18
  from heapq import heappush
17
19
  from threading import Thread
20
+ from typing import Optional
18
21
  from zoneinfo import ZoneInfo
19
22
 
20
- from ddeutil.workflow.__types import DictData
21
- from ddeutil.workflow.cron import CronRunner
22
- from ddeutil.workflow.exceptions import WorkflowException
23
- from ddeutil.workflow.log import FileLog, Log
24
- from ddeutil.workflow.on import On
25
- from ddeutil.workflow.pipeline import Pipeline
26
- from ddeutil.workflow.utils import (
23
+ from dotenv import load_dotenv
24
+ from pydantic import BaseModel, Field
25
+ from pydantic.functional_validators import model_validator
26
+ from typing_extensions import Self
27
+
28
+ try:
29
+ from schedule import CancelJob, Scheduler
30
+ except ImportError:
31
+ raise ImportError(
32
+ "Should install schedule package before use this module."
33
+ ) from None
34
+
35
+ from .__types import DictData
36
+ from .cron import CronRunner
37
+ from .exceptions import WorkflowException
38
+ from .log import FileLog, Log, get_logger
39
+ from .on import On
40
+ from .pipeline import Pipeline
41
+ from .utils import (
42
+ Loader,
27
43
  Result,
28
44
  batch,
29
45
  delay,
30
46
  get_diff_sec,
31
47
  param2template,
32
48
  )
33
- from dotenv import load_dotenv
34
- from schedule import CancelJob, Scheduler
35
-
36
- load_dotenv("../../../.env")
37
- logging.basicConfig(
38
- level=logging.DEBUG,
39
- format=(
40
- "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, %(thread)-5d) "
41
- "[%(levelname)-7s] %(message)-120s (%(filename)s:%(lineno)s)"
42
- ),
43
- handlers=[logging.StreamHandler()],
44
- datefmt="%Y-%m-%d %H:%M:%S",
45
- )
49
+
50
+ load_dotenv()
51
+ logger = get_logger("ddeutil.workflow")
46
52
  logging.getLogger("schedule").setLevel(logging.INFO)
47
53
 
48
- tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
54
+
55
+ __all__ = (
56
+ "PipelineSchedule",
57
+ "Schedule",
58
+ "workflow",
59
+ )
60
+
61
+
62
+ class PipelineSchedule(BaseModel):
63
+ """Pipeline schedule Pydantic Model."""
64
+
65
+ name: str = Field(description="A pipeline name.")
66
+ on: list[On] = Field(
67
+ default_factory=list,
68
+ description="An override On instance value.",
69
+ )
70
+ params: DictData = Field(
71
+ default_factory=dict,
72
+ description="A parameters that want to use to pipeline execution.",
73
+ )
74
+
75
+ @model_validator(mode="before")
76
+ def __prepare__values(cls, values: DictData) -> DictData:
77
+ """Prepare incoming values before validating with model fields."""
78
+
79
+ values["name"] = values["name"].replace(" ", "_")
80
+
81
+ cls.__bypass_on(values)
82
+ return values
83
+
84
+ @classmethod
85
+ def __bypass_on(cls, data: DictData, externals: DictData | None = None):
86
+ """Bypass the on data to loaded config data."""
87
+ if on := data.pop("on", []):
88
+
89
+ if isinstance(on, str):
90
+ on = [on]
91
+
92
+ if any(not isinstance(n, (dict, str)) for n in on):
93
+ raise TypeError("The ``on`` key should be list of str or dict")
94
+
95
+ # NOTE: Pass on value to Loader and keep on model object to on field
96
+ data["on"] = [
97
+ (
98
+ Loader(n, externals=(externals or {})).data
99
+ if isinstance(n, str)
100
+ else n
101
+ )
102
+ for n in on
103
+ ]
104
+ return data
105
+
106
+
107
+ class Schedule(BaseModel):
108
+ """Schedule Pydantic Model that use to run with scheduler package. It does
109
+ not equal the on value in Pipeline model but it use same logic to running
110
+ release date with crontab interval.
111
+ """
112
+
113
+ desc: Optional[str] = Field(
114
+ default=None,
115
+ description=(
116
+ "A schedule description that can be string of markdown content."
117
+ ),
118
+ )
119
+ pipelines: list[PipelineSchedule] = Field(
120
+ default_factory=list,
121
+ description="A list of PipelineSchedule models.",
122
+ )
123
+
124
+ @classmethod
125
+ def from_loader(
126
+ cls,
127
+ name: str,
128
+ externals: DictData | None = None,
129
+ ) -> Self:
130
+ loader: Loader = Loader(name, externals=(externals or {}))
131
+
132
+ # NOTE: Validate the config type match with current connection model
133
+ if loader.type != cls:
134
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
135
+
136
+ loader_data: DictData = copy.deepcopy(loader.data)
137
+
138
+ # NOTE: Add name to loader data
139
+ loader_data["name"] = name.replace(" ", "_")
140
+
141
+ return cls.model_validate(obj=loader_data)
142
+
143
+ def tasks(
144
+ self,
145
+ start_date: datetime,
146
+ queue: dict[str, list[datetime]],
147
+ running: dict[str, list[datetime]],
148
+ externals: DictData | None = None,
149
+ ) -> list[PipelineTask]:
150
+ """Generate Task from the current datetime.
151
+
152
+ :param start_date: A start date that get from the workflow schedule.
153
+ :param queue:
154
+ :param running:
155
+ :param externals: An external parameters that pass to the Loader object.
156
+ :rtype: list[PipelineTask]
157
+ """
158
+
159
+ # NOTE: Create pair of pipeline and on.
160
+ pipeline_tasks: list[PipelineTask] = []
161
+ externals: DictData = externals or {}
162
+
163
+ for pipe in self.pipelines:
164
+ pipeline: Pipeline = Pipeline.from_loader(
165
+ pipe.name, externals=externals
166
+ )
167
+
168
+ # NOTE: Create default list of release datetime.
169
+ queue[pipe.name]: list[datetime] = []
170
+ running[pipe.name]: list[datetime] = []
171
+
172
+ for on in pipeline.on:
173
+ on_gen = on.generate(start_date)
174
+ next_running_date = on_gen.next
175
+ while next_running_date in queue[pipe.name]:
176
+ next_running_date = on_gen.next
177
+
178
+ heappush(queue[pipe.name], next_running_date)
179
+
180
+ pipeline_tasks.append(
181
+ PipelineTask(
182
+ pipeline=pipeline,
183
+ on=on,
184
+ params=pipe.params,
185
+ queue=queue,
186
+ running=running,
187
+ ),
188
+ )
189
+
190
+ return pipeline_tasks
49
191
 
50
192
 
51
193
  def catch_exceptions(cancel_on_failure=False):
52
194
  """Catch exception error from scheduler job."""
53
195
 
54
- def catch_exceptions_decorator(job_func):
55
- @wraps(job_func)
196
+ def catch_exceptions_decorator(func):
197
+ @wraps(func)
56
198
  def wrapper(*args, **kwargs):
57
199
  try:
58
- return job_func(*args, **kwargs)
200
+ return func(*args, **kwargs)
59
201
  except Exception as err:
60
- logging.exception(err)
202
+ logger.exception(err)
61
203
  if cancel_on_failure:
62
204
  return CancelJob
63
205
 
@@ -66,136 +208,156 @@ def catch_exceptions(cancel_on_failure=False):
66
208
  return catch_exceptions_decorator
67
209
 
68
210
 
69
- @dataclass
211
+ def catch_exceptions_method(cancel_on_failure=False):
212
+ """Catch exception error from scheduler job."""
213
+
214
+ def catch_exceptions_decorator(func):
215
+ @wraps(func)
216
+ def wrapper(self, *args, **kwargs):
217
+ try:
218
+ return func(self, *args, **kwargs)
219
+ except Exception as err:
220
+ logger.exception(err)
221
+ if cancel_on_failure:
222
+ return CancelJob
223
+
224
+ return wrapper
225
+
226
+ return catch_exceptions_decorator
227
+
228
+
229
+ @dataclass(frozen=True)
70
230
  class PipelineTask:
231
+ """Pipeline task dataclass that use to keep mapping data and objects for
232
+ passing in multithreading task.
233
+ """
234
+
71
235
  pipeline: Pipeline
72
236
  on: On
237
+ params: DictData
73
238
  queue: list[datetime]
74
239
  running: list[datetime]
75
240
 
241
+ @catch_exceptions_method(cancel_on_failure=True)
242
+ def release(self, log: Log | None = None) -> None:
243
+ """Pipeline release, it will use with the same logic of
244
+ `pipeline.release` method.
76
245
 
77
- def queue2str(queue: list[datetime]) -> Iterator[str]:
78
- return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
246
+ :param log: A log object.
247
+ """
248
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
249
+ log: Log = log or FileLog
250
+ pipeline: Pipeline = self.pipeline
251
+ on: On = self.on
79
252
 
253
+ gen: CronRunner = on.generate(
254
+ datetime.now(tz=tz).replace(second=0, microsecond=0)
255
+ )
256
+ cron_tz: ZoneInfo = gen.tz
80
257
 
81
- def pipeline_release(
82
- task: PipelineTask,
83
- *,
84
- log: Log | None = None,
85
- ) -> None:
86
- """Pipeline release, it will use with the same logic of `pipeline.release`
87
- method.
258
+ # NOTE: get next schedule time that generate from now.
259
+ next_time: datetime = gen.next
88
260
 
89
- :param task: A PipelineTask dataclass.
90
- :param log: A log object.
91
- """
92
- log: Log = log or FileLog
93
- pipeline: Pipeline = task.pipeline
94
- on: On = task.on
261
+ # NOTE: get next utils it does not running.
262
+ while log.is_pointed(
263
+ pipeline.name, next_time, queue=self.running[pipeline.name]
264
+ ):
265
+ next_time: datetime = gen.next
95
266
 
96
- gen: CronRunner = on.generate(
97
- datetime.now(tz=tz).replace(second=0, microsecond=0)
98
- )
99
- cron_tz: ZoneInfo = gen.tz
267
+ logger.debug(
268
+ f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
269
+ f"{next_time:%Y-%m-%d %H:%M:%S}"
270
+ )
271
+ heappush(self.running[pipeline.name], next_time)
100
272
 
101
- next_running_time: datetime = gen.next
102
- while next_running_time in task.running[pipeline.name]:
103
- next_running_time: datetime = gen.next
273
+ if get_diff_sec(next_time, tz=cron_tz) > 55:
274
+ logger.debug(
275
+ f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
276
+ f": Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
277
+ )
104
278
 
105
- logging.debug(
106
- f"[CORE]: {pipeline.name!r} : {on.cronjob} : "
107
- f"{next_running_time:%Y-%m-%d %H:%M:%S}"
108
- )
109
- heappush(task.running[pipeline.name], next_running_time)
279
+ # NOTE: Add this next running datetime that not in period to queue
280
+ # and remove it to running.
281
+ self.running[pipeline.name].remove(next_time)
282
+ heappush(self.queue[pipeline.name], next_time)
110
283
 
111
- # TODO: event should set on this step for release next pipeline task?
284
+ time.sleep(0.2)
285
+ return
112
286
 
113
- if get_diff_sec(next_running_time, tz=cron_tz) > 55:
114
- logging.debug(
287
+ logger.debug(
115
288
  f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
116
- f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
289
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
117
290
  )
118
291
 
119
- # NOTE: Add this next running datetime to queue
120
- heappush(task.queue[pipeline.name], next_running_time)
121
- task.running[pipeline.name].remove(next_running_time)
292
+ # NOTE: Release when the time is nearly to schedule time.
293
+ while (duration := get_diff_sec(next_time, tz=tz)) > (15 + 5):
294
+ logger.debug(
295
+ f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} "
296
+ f": Sleep until: {duration}"
297
+ )
298
+ time.sleep(15)
299
+
122
300
  time.sleep(0.5)
123
- return
124
301
 
125
- logging.debug(
126
- f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
127
- f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
128
- )
302
+ # NOTE: Release parameter that use to change if params has
303
+ # templating.
304
+ release_params: DictData = {
305
+ "release": {
306
+ "logical_date": next_time,
307
+ },
308
+ }
129
309
 
130
- # NOTE: Release when the time is nearly to schedule time.
131
- while (duration := get_diff_sec(next_running_time, tz=tz)) > (15 + 5):
132
- logging.debug(
133
- f"({pipeline.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
134
- f"Sleep until: {duration}"
310
+ # WARNING: Re-create pipeline object that use new running pipeline
311
+ # ID.
312
+ runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
313
+ rs: Result = runner.execute(
314
+ params=param2template(self.params, release_params),
315
+ )
316
+ logger.debug(
317
+ f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
318
+ f"End release - {next_time:%Y-%m-%d %H:%M:%S}"
135
319
  )
136
- time.sleep(15)
137
-
138
- time.sleep(0.5)
139
-
140
- # NOTE: Release parameter that use to change if params has
141
- # templating.
142
- release_params: DictData = {
143
- "release": {
144
- "logical_date": next_running_time,
145
- },
146
- }
147
-
148
- # WARNING: Re-create pipeline object that use new running pipeline
149
- # ID.
150
- runner: Pipeline = pipeline.get_running_id(run_id=pipeline.new_run_id)
151
- rs: Result = runner.execute(
152
- # FIXME: replace fix parameters on this execution process.
153
- params=param2template(
154
- {"asat-dt": "${{ release.logical_date }}"}, release_params
155
- ),
156
- )
157
- logging.debug(
158
- f"({runner.run_id}) [CORE]: {pipeline.name!r} : {on.cronjob} : "
159
- f"End release"
160
- )
161
320
 
162
- del runner
321
+ del runner
322
+
323
+ # NOTE: Set parent ID on this result.
324
+ rs.set_parent_run_id(pipeline.run_id)
325
+
326
+ # NOTE: Save result to log object saving.
327
+ rs_log: Log = log.model_validate(
328
+ {
329
+ "name": pipeline.name,
330
+ "on": str(on.cronjob),
331
+ "release": next_time,
332
+ "context": rs.context,
333
+ "parent_run_id": rs.run_id,
334
+ "run_id": rs.run_id,
335
+ }
336
+ )
337
+ rs_log.save(excluded=None)
163
338
 
164
- # NOTE: remove this release date from running
165
- task.running[pipeline.name].remove(next_running_time)
339
+ # NOTE: remove this release date from running
340
+ self.running[pipeline.name].remove(next_time)
166
341
 
167
- # IMPORTANT:
168
- # Add the next running datetime to pipeline queue
169
- finish_time: datetime = datetime.now(tz=cron_tz).replace(
170
- second=0, microsecond=0
171
- )
172
- future_running_time: datetime = gen.next
173
- while (
174
- future_running_time in task.running[pipeline.name]
175
- or future_running_time in task.queue[pipeline.name]
176
- or future_running_time < finish_time
177
- ):
342
+ # IMPORTANT:
343
+ # Add the next running datetime to pipeline queue
344
+ finish_time: datetime = datetime.now(tz=cron_tz).replace(
345
+ second=0, microsecond=0
346
+ )
178
347
  future_running_time: datetime = gen.next
348
+ while (
349
+ future_running_time in self.running[pipeline.name]
350
+ or future_running_time in self.queue[pipeline.name]
351
+ or future_running_time < finish_time
352
+ ):
353
+ future_running_time: datetime = gen.next
179
354
 
180
- heappush(task.queue[pipeline.name], future_running_time)
181
-
182
- # NOTE: Set parent ID on this result.
183
- rs.set_parent_run_id(pipeline.run_id)
355
+ heappush(self.queue[pipeline.name], future_running_time)
356
+ logger.debug(f"[CORE]: {'-' * 100}")
184
357
 
185
- # NOTE: Save result to log object saving.
186
- rs_log: Log = log.model_validate(
187
- {
188
- "name": pipeline.name,
189
- "on": str(on.cronjob),
190
- "release": next_running_time,
191
- "context": rs.context,
192
- "parent_run_id": rs.run_id,
193
- "run_id": rs.run_id,
194
- }
195
- )
196
- rs_log.save()
197
358
 
198
- logging.debug(f"[CORE]: {rs}")
359
+ def queue2str(queue: list[datetime]) -> Iterator[str]:
360
+ return (f"{q:%Y-%m-%d %H:%M:%S}" for q in queue)
199
361
 
200
362
 
201
363
  @catch_exceptions(cancel_on_failure=True)
@@ -208,14 +370,20 @@ def workflow_task(
208
370
  the threading in background.
209
371
 
210
372
  This workflow task will start every minute at :02 second.
373
+
374
+ :param pipeline_tasks:
375
+ :param stop:
376
+ :param threads:
377
+ :rtype: CancelJob | None
211
378
  """
379
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
212
380
  start_date: datetime = datetime.now(tz=tz)
213
- start_date_minute = start_date.replace(second=0, microsecond=0)
381
+ start_date_minute: datetime = start_date.replace(second=0, microsecond=0)
214
382
 
215
383
  if start_date > stop:
216
- logging.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
384
+ logger.info("[WORKFLOW]: Stop this schedule with datetime stopper.")
217
385
  while len(threads) > 0:
218
- logging.warning(
386
+ logger.warning(
219
387
  "[WORKFLOW]: Waiting pipeline release thread that still "
220
388
  "running in background."
221
389
  )
@@ -238,7 +406,7 @@ def workflow_task(
238
406
  for task in pipeline_tasks:
239
407
 
240
408
  # NOTE: Get incoming datetime queue.
241
- logging.debug(
409
+ logger.debug(
242
410
  f"[WORKFLOW]: Current queue: {task.pipeline.name!r} : "
243
411
  f"{list(queue2str(task.queue[task.pipeline.name]))}"
244
412
  )
@@ -255,15 +423,14 @@ def workflow_task(
255
423
  task.on.next(current_running_time)
256
424
  != task.queue[task.pipeline.name][0]
257
425
  ):
258
- logging.debug(
426
+ logger.debug(
259
427
  f"[WORKFLOW]: Skip schedule "
260
428
  f"{current_running_time:%Y-%m-%d %H:%M:%S} "
261
429
  f"for : {task.pipeline.name!r} : {task.on.cronjob}"
262
430
  )
263
431
  continue
264
432
  elif len(task.queue[task.pipeline.name]) == 0:
265
- # TODO: Should auto add new queue?
266
- logging.warning(
433
+ logger.warning(
267
434
  f"[WORKFLOW]: Queue is empty for : {task.pipeline.name!r} : "
268
435
  f"{task.on.cronjob}"
269
436
  )
@@ -272,13 +439,14 @@ def workflow_task(
272
439
  # NOTE: Remove this datetime from queue.
273
440
  task.queue[task.pipeline.name].pop(0)
274
441
 
442
+ # NOTE: Create thread name that able to tracking with observe schedule
443
+ # job.
275
444
  thread_name: str = (
276
445
  f"{task.pipeline.name}|{str(task.on.cronjob)}|"
277
446
  f"{current_running_time:%Y%m%d%H%M}"
278
447
  )
279
448
  pipe_thread: Thread = Thread(
280
- target=pipeline_release,
281
- args=(task,),
449
+ target=task.release,
282
450
  name=thread_name,
283
451
  daemon=True,
284
452
  )
@@ -289,7 +457,7 @@ def workflow_task(
289
457
 
290
458
  delay()
291
459
 
292
- logging.debug(f"[WORKFLOW]: {'=' * 100}")
460
+ logger.debug(f"[WORKFLOW]: {'=' * 100}")
293
461
 
294
462
 
295
463
  def workflow_long_running_task(threads: dict[str, Thread]) -> None:
@@ -297,8 +465,11 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
297
465
  control.
298
466
 
299
467
  :param threads: A mapping of Thread object and its name.
468
+ :rtype: None
300
469
  """
301
- logging.debug("[MONITOR]: Start checking long running pipeline release.")
470
+ logger.debug(
471
+ "[MONITOR]: Start checking long running pipeline release task."
472
+ )
302
473
  snapshot_threads = list(threads.keys())
303
474
  for t_name in snapshot_threads:
304
475
 
@@ -308,16 +479,18 @@ def workflow_long_running_task(threads: dict[str, Thread]) -> None:
308
479
 
309
480
 
310
481
  def workflow_control(
311
- pipelines: list[str],
312
- until: datetime | None = None,
482
+ schedules: list[str],
483
+ stop: datetime | None = None,
313
484
  externals: DictData | None = None,
314
485
  ) -> list[str]:
315
486
  """Workflow scheduler control.
316
487
 
317
- :param pipelines: A list of pipeline names that want to schedule running.
318
- :param until:
488
+ :param schedules: A list of pipeline names that want to schedule running.
489
+ :param stop: An datetime value that use to stop running schedule.
319
490
  :param externals: An external parameters that pass to Loader.
491
+ :rtype: list[str]
320
492
  """
493
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
321
494
  schedule: Scheduler = Scheduler()
322
495
  start_date: datetime = datetime.now(tz=tz)
323
496
 
@@ -333,35 +506,28 @@ def workflow_control(
333
506
  second=0, microsecond=0
334
507
  )
335
508
 
336
- # NOTE: Create pair of pipeline and on.
509
+ # NOTE: Create pair of pipeline and on from schedule model.
337
510
  pipeline_tasks: list[PipelineTask] = []
338
-
339
- for name in pipelines:
340
- pipeline: Pipeline = Pipeline.from_loader(name, externals=externals)
341
-
342
- # NOTE: Create default list of release datetime.
343
- wf_queue[name]: list[datetime] = []
344
- wf_running[name]: list[datetime] = []
345
-
346
- for on in pipeline.on:
347
-
348
- on_gen = on.generate(start_date_waiting)
349
- next_running_date = on_gen.next
350
- while next_running_date in wf_queue[name]:
351
- next_running_date = on_gen.next
352
-
353
- heappush(wf_queue[name], next_running_date)
354
- pipeline_tasks.append(
355
- PipelineTask(
356
- pipeline=pipeline, on=on, queue=wf_queue, running=wf_running
357
- ),
358
- )
511
+ for name in schedules:
512
+ sch: Schedule = Schedule.from_loader(name, externals=externals)
513
+ pipeline_tasks.extend(
514
+ sch.tasks(start_date_waiting, wf_queue, wf_running, externals)
515
+ )
359
516
 
360
517
  # NOTE: This schedule job will start every minute at :02 seconds.
361
518
  schedule.every(1).minutes.at(":02").do(
362
519
  workflow_task,
363
520
  pipeline_tasks=pipeline_tasks,
364
- stop=until or (start_date + timedelta(minutes=5, seconds=20)),
521
+ stop=stop
522
+ or (
523
+ start_date
524
+ + timedelta(
525
+ **json.loads(
526
+ os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
527
+ or '{"minutes": 5, "seconds": 20}'
528
+ )
529
+ )
530
+ ),
365
531
  threads=thread_releases,
366
532
  ).tag("control")
367
533
 
@@ -372,37 +538,42 @@ def workflow_control(
372
538
  ).tag("monitor")
373
539
 
374
540
  # NOTE: Start running schedule
375
- logging.info(f"[WORKFLOW]: Start schedule: {pipelines}")
541
+ logger.info(f"[WORKFLOW]: Start schedule: {schedules}")
376
542
  while True:
377
543
  schedule.run_pending()
378
544
  time.sleep(1)
379
545
  if not schedule.get_jobs("control"):
380
546
  schedule.clear("monitor")
381
- logging.warning(
547
+ logger.warning(
382
548
  f"[WORKFLOW]: Pipeline release thread: {thread_releases}"
383
549
  )
384
- logging.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
550
+ logger.warning("[WORKFLOW]: Does not have any schedule jobs !!!")
385
551
  break
386
552
 
387
- logging.warning(f"Queue: {[wf_queue[wf] for wf in wf_queue]}")
388
- logging.warning(f"Running: {[wf_running[wf] for wf in wf_running]}")
389
- return pipelines
553
+ logger.warning(
554
+ f"Queue: {[list(queue2str(wf_queue[wf])) for wf in wf_queue]}"
555
+ )
556
+ logger.warning(
557
+ f"Running: {[list(queue2str(wf_running[wf])) for wf in wf_running]}"
558
+ )
559
+ return schedules
390
560
 
391
561
 
392
562
  def workflow(
393
- until: datetime | None = None,
563
+ stop: datetime | None = None,
394
564
  externals: DictData | None = None,
395
565
  excluded: list[str] | None = None,
396
- ):
566
+ ) -> list[str]:
397
567
  """Workflow application that running multiprocessing schedule with chunk of
398
568
  pipelines that exists in config path.
399
569
 
400
- :param until:
570
+ :param stop:
401
571
  :param excluded:
402
572
  :param externals:
573
+ :rtype: list[str]
403
574
 
404
575
  This function will get all pipelines that include on value that was
405
- created in config path and chuck it with WORKFLOW_APP_PIPELINE_PER_PROCESS
576
+ created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
406
577
  value to multiprocess executor pool.
407
578
 
408
579
  The current workflow logic:
@@ -417,36 +588,33 @@ def workflow(
417
588
  pipeline task 02 02
418
589
  ==> ...
419
590
  """
420
- excluded: list = excluded or []
591
+ excluded: list[str] = excluded or []
421
592
 
422
- with ProcessPoolExecutor(max_workers=2) as executor:
593
+ with ProcessPoolExecutor(
594
+ max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
595
+ ) as executor:
423
596
  futures: list[Future] = [
424
597
  executor.submit(
425
598
  workflow_control,
426
- pipelines=[load[0] for load in loader],
427
- until=until,
599
+ schedules=[load[0] for load in loader],
600
+ stop=stop,
428
601
  externals=(externals or {}),
429
602
  )
430
603
  for loader in batch(
431
- # Loader.find(Pipeline, include=["on"], excluded=excluded),
432
- [
433
- ("pipe-scheduling", None),
434
- # ("pipe-scheduling-minute", None),
435
- ],
436
- n=1,
604
+ Loader.finds(Schedule, excluded=excluded),
605
+ n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
437
606
  )
438
607
  ]
439
608
 
440
609
  results: list[str] = []
441
610
  for future in as_completed(futures):
442
611
  if err := future.exception():
443
- logging.error(str(err))
612
+ logger.error(str(err))
444
613
  raise WorkflowException(str(err)) from err
445
614
  results.extend(future.result(timeout=1))
446
615
  return results
447
616
 
448
617
 
449
618
  if __name__ == "__main__":
450
- # TODO: Define input arguments that want to manage this application.
451
619
  workflow_rs: list[str] = workflow()
452
- logging.info(f"Application run success: {workflow_rs}")
620
+ logger.info(f"Application run success: {workflow_rs}")