ddeutil-workflow 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/log.py CHANGED
@@ -7,20 +7,18 @@ from __future__ import annotations
7
7
 
8
8
  import json
9
9
  import logging
10
- import os
11
10
  from abc import ABC, abstractmethod
12
11
  from datetime import datetime
13
12
  from functools import lru_cache
14
13
  from pathlib import Path
15
14
  from typing import ClassVar, Optional, Union
16
15
 
17
- from ddeutil.core import str2bool
18
16
  from pydantic import BaseModel, Field
19
17
  from pydantic.functional_validators import model_validator
20
18
  from typing_extensions import Self
21
19
 
22
20
  from .__types import DictData
23
- from .utils import load_config
21
+ from .conf import config, load_config
24
22
 
25
23
 
26
24
  @lru_cache
@@ -42,8 +40,7 @@ def get_logger(name: str):
42
40
  stream.setFormatter(formatter)
43
41
  logger.addHandler(stream)
44
42
 
45
- debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
46
- logger.setLevel(logging.DEBUG if debug else logging.INFO)
43
+ logger.setLevel(logging.DEBUG if config.debug else logging.INFO)
47
44
  return logger
48
45
 
49
46
 
@@ -72,7 +69,7 @@ class BaseLog(BaseModel, ABC):
72
69
 
73
70
  :rtype: Self
74
71
  """
75
- if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
72
+ if config.enable_write_log:
76
73
  self.do_before()
77
74
  return self
78
75
 
@@ -141,7 +138,7 @@ class FileLog(BaseLog):
141
138
  future.
142
139
  """
143
140
  # NOTE: Check environ variable was set for real writing.
144
- if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
141
+ if not config.enable_write_log:
145
142
  return False
146
143
 
147
144
  # NOTE: create pointer path that use the same logic of pointer method.
@@ -171,7 +168,7 @@ class FileLog(BaseLog):
171
168
  :rtype: Self
172
169
  """
173
170
  # NOTE: Check environ variable was set for real writing.
174
- if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
171
+ if not config.enable_write_log:
175
172
  return self
176
173
 
177
174
  log_file: Path = self.pointer() / f"{self.run_id}.log"
ddeutil/workflow/on.py CHANGED
@@ -15,8 +15,8 @@ from pydantic.functional_validators import field_validator, model_validator
15
15
  from typing_extensions import Self
16
16
 
17
17
  from .__types import DictData, DictStr, TupleStr
18
+ from .conf import Loader
18
19
  from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
19
- from .utils import Loader
20
20
 
21
21
  __all__: TupleStr = (
22
22
  "On",
@@ -6,14 +6,13 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import asyncio
9
- import os
10
9
  from asyncio import ensure_future
11
10
  from datetime import datetime
12
11
  from functools import wraps
13
- from zoneinfo import ZoneInfo
14
12
 
15
13
  from starlette.concurrency import run_in_threadpool
16
14
 
15
+ from .conf import config
17
16
  from .cron import CronJob
18
17
  from .log import get_logger
19
18
 
@@ -24,9 +23,7 @@ def get_cronjob_delta(cron: str) -> float:
24
23
  """This function returns the time delta between now and the next cron
25
24
  execution time.
26
25
  """
27
- now: datetime = datetime.now(
28
- tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
29
- )
26
+ now: datetime = datetime.now(tz=config.tz)
30
27
  cron = CronJob(cron)
31
28
  return (cron.schedule(now).next - now).total_seconds()
32
29
 
ddeutil/workflow/route.py CHANGED
@@ -6,10 +6,8 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import copy
9
- import os
10
9
  from datetime import datetime, timedelta
11
10
  from typing import Any
12
- from zoneinfo import ZoneInfo
13
11
 
14
12
  from fastapi import APIRouter, HTTPException, Request
15
13
  from fastapi import status as st
@@ -18,9 +16,10 @@ from pydantic import BaseModel
18
16
 
19
17
  from . import Workflow
20
18
  from .__types import DictData
19
+ from .conf import Loader, config
21
20
  from .log import get_logger
22
21
  from .scheduler import Schedule
23
- from .utils import Loader, Result
22
+ from .utils import Result
24
23
 
25
24
  logger = get_logger("ddeutil.workflow")
26
25
  workflow = APIRouter(
@@ -87,12 +86,7 @@ async def execute_workflow(name: str, payload: ExecutePayload) -> DictData:
87
86
  # NOTE: Start execute manually
88
87
  rs: Result = wf.execute(params=payload.params)
89
88
 
90
- return rs.model_dump(
91
- by_alias=True,
92
- exclude_none=True,
93
- exclude_unset=True,
94
- exclude_defaults=True,
95
- )
89
+ return dict(rs)
96
90
 
97
91
 
98
92
  @workflow.get("/{name}/logs")
@@ -172,8 +166,7 @@ async def add_deploy_scheduler(request: Request, name: str):
172
166
 
173
167
  request.state.scheduler.append(name)
174
168
 
175
- tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
176
- start_date: datetime = datetime.now(tz=tz)
169
+ start_date: datetime = datetime.now(tz=config.tz)
177
170
  start_date_waiting: datetime = (start_date + timedelta(minutes=1)).replace(
178
171
  second=0, microsecond=0
179
172
  )
@@ -3,13 +3,26 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
+ """
7
+ The main schedule running is ``workflow_runner`` function that trigger the
8
+ multiprocess of ``workflow_control`` function for listing schedules on the
9
+ config by ``Loader.finds(Schedule)``.
10
+
11
+ The ``workflow_control`` is the scheduler function that release 2 schedule
12
+ functions; ``workflow_task``, and ``workflow_monitor``.
13
+
14
+ ``workflow_control`` --- Every minute at :02 --> ``workflow_task``
15
+ --- Every 5 minutes --> ``workflow_monitor``
16
+
17
+ The ``workflow_task`` will run ``task.release`` method in threading object
18
+ for multithreading strategy. This ``release`` method will run only one crontab
19
+ value with the on field.
20
+ """
6
21
  from __future__ import annotations
7
22
 
8
23
  import copy
9
24
  import inspect
10
- import json
11
25
  import logging
12
- import os
13
26
  import time
14
27
  from concurrent.futures import (
15
28
  Future,
@@ -43,14 +56,13 @@ except ImportError:
43
56
  CancelJob = None
44
57
 
45
58
  from .__types import DictData, TupleStr
46
- from .conf import config
59
+ from .conf import Loader, config
47
60
  from .cron import CronRunner
48
61
  from .exceptions import JobException, WorkflowException
49
62
  from .job import Job
50
63
  from .log import FileLog, Log, get_logger
51
64
  from .on import On
52
65
  from .utils import (
53
- Loader,
54
66
  Param,
55
67
  Result,
56
68
  batch,
@@ -75,7 +87,7 @@ __all__: TupleStr = (
75
87
  "Schedule",
76
88
  "ScheduleWorkflow",
77
89
  "workflow_task",
78
- "workflow_long_running_task",
90
+ "workflow_monitor",
79
91
  "workflow_control",
80
92
  "workflow_runner",
81
93
  )
@@ -184,7 +196,7 @@ class Workflow(BaseModel):
184
196
  return data
185
197
 
186
198
  @model_validator(mode="before")
187
- def __prepare_params(cls, values: DictData) -> DictData:
199
+ def __prepare_model_before__(cls, values: DictData) -> DictData:
188
200
  """Prepare the params key."""
189
201
  # NOTE: Prepare params type if it passing with only type value.
190
202
  if params := values.pop("params", {}):
@@ -199,9 +211,10 @@ class Workflow(BaseModel):
199
211
  return values
200
212
 
201
213
  @field_validator("desc", mode="after")
202
- def ___prepare_desc(cls, value: str) -> str:
214
+ def __dedent_desc__(cls, value: str) -> str:
203
215
  """Prepare description string that was created on a template.
204
216
 
217
+ :param value: A description string value that want to dedent.
205
218
  :rtype: str
206
219
  """
207
220
  return dedent(value)
@@ -458,8 +471,10 @@ class Workflow(BaseModel):
458
471
  queue: list[datetime] = []
459
472
  results: list[Result] = []
460
473
 
461
- worker: int = int(os.getenv("WORKFLOW_CORE_MAX_NUM_POKING") or "4")
462
- with ThreadPoolExecutor(max_workers=worker) as executor:
474
+ with ThreadPoolExecutor(
475
+ max_workers=config.max_poking_pool_worker,
476
+ thread_name_prefix="wf_poking_",
477
+ ) as executor:
463
478
  futures: list[Future] = []
464
479
  for on in self.on:
465
480
  futures.append(
@@ -513,7 +528,6 @@ class Workflow(BaseModel):
513
528
  f"workflow."
514
529
  )
515
530
 
516
- context: DictData = {}
517
531
  logger.info(f"({self.run_id}) [WORKFLOW]: Start execute: {job_id!r}")
518
532
 
519
533
  # IMPORTANT:
@@ -523,7 +537,7 @@ class Workflow(BaseModel):
523
537
  job: Job = self.jobs[job_id].get_running_id(self.run_id)
524
538
  job.set_outputs(
525
539
  job.execute(params=params).context,
526
- to=context,
540
+ to=params,
527
541
  )
528
542
  except JobException as err:
529
543
  logger.error(
@@ -536,7 +550,7 @@ class Workflow(BaseModel):
536
550
  else:
537
551
  raise NotImplementedError() from None
538
552
 
539
- return Result(status=0, context=context)
553
+ return Result(status=0, context=params)
540
554
 
541
555
  def execute(
542
556
  self,
@@ -587,8 +601,14 @@ class Workflow(BaseModel):
587
601
  for job_id in self.jobs:
588
602
  jq.put(job_id)
589
603
 
590
- # NOTE: Create result context that will pass this context to any
591
- # execution dependency.
604
+ # NOTE: Create data context that will pass to any job executions
605
+ # on this workflow.
606
+ #
607
+ # {
608
+ # 'params': <input-params>,
609
+ # 'jobs': {},
610
+ # }
611
+ #
592
612
  context: DictData = self.parameterize(params)
593
613
  status: int = 0
594
614
  try:
@@ -657,15 +677,23 @@ class Workflow(BaseModel):
657
677
  job: Job = self.jobs[job_id]
658
678
 
659
679
  if any(need not in context["jobs"] for need in job.needs):
680
+ job_queue.task_done()
660
681
  job_queue.put(job_id)
661
682
  time.sleep(0.25)
662
683
  continue
663
684
 
685
+ # NOTE: Start workflow job execution with deep copy context data
686
+ # before release.
687
+ #
688
+ # {
689
+ # 'params': <input-params>,
690
+ # 'jobs': {},
691
+ # }
664
692
  futures.append(
665
693
  executor.submit(
666
694
  self.execute_job,
667
695
  job_id,
668
- params=copy.deepcopy(context),
696
+ params=context,
669
697
  ),
670
698
  )
671
699
 
@@ -677,14 +705,13 @@ class Workflow(BaseModel):
677
705
 
678
706
  for future in as_completed(futures, timeout=1800):
679
707
  if err := future.exception():
680
- logger.error(f"{err}")
708
+ logger.error(f"({self.run_id}) [CORE]: {err}")
681
709
  raise WorkflowException(f"{err}")
682
710
  try:
683
- # NOTE: Update job result to workflow result.
684
- context["jobs"].update(future.result(timeout=60).context)
711
+ future.result(timeout=60)
685
712
  except TimeoutError as err:
686
713
  raise WorkflowException(
687
- "Get result from future was timeout"
714
+ "Timeout when getting result from future"
688
715
  ) from err
689
716
 
690
717
  if not_time_out_flag:
@@ -731,18 +758,21 @@ class Workflow(BaseModel):
731
758
  job_id: str = job_queue.get()
732
759
  job: Job = self.jobs[job_id]
733
760
 
734
- # NOTE:
761
+ # NOTE: Waiting dependency job run successful before release.
735
762
  if any(need not in context["jobs"] for need in job.needs):
763
+ job_queue.task_done()
736
764
  job_queue.put(job_id)
737
- time.sleep(0.25)
765
+ time.sleep(0.05)
738
766
  continue
739
767
 
740
- # NOTE: Start workflow job execution.
741
- job_rs = self.execute_job(
742
- job_id=job_id,
743
- params=copy.deepcopy(context),
744
- )
745
- context["jobs"].update(job_rs.context)
768
+ # NOTE: Start workflow job execution with deep copy context data
769
+ # before release.
770
+ #
771
+ # {
772
+ # 'params': <input-params>,
773
+ # 'jobs': {},
774
+ # }
775
+ self.execute_job(job_id=job_id, params=context)
746
776
 
747
777
  # NOTE: Mark this job queue done.
748
778
  job_queue.task_done()
@@ -780,7 +810,7 @@ class ScheduleWorkflow(BaseModel):
780
810
  )
781
811
 
782
812
  @model_validator(mode="before")
783
- def __prepare_values(cls, values: DictData) -> DictData:
813
+ def __prepare_before__(cls, values: DictData) -> DictData:
784
814
  """Prepare incoming values before validating with model fields.
785
815
 
786
816
  :rtype: DictData
@@ -918,9 +948,11 @@ class Schedule(BaseModel):
918
948
  return workflow_tasks
919
949
 
920
950
 
921
- def catch_exceptions(
922
- cancel_on_failure: bool = False,
923
- ) -> Callable[P, Optional[CancelJob]]:
951
+ ReturnCancelJob = Callable[P, Optional[CancelJob]]
952
+ DecoratorCancelJob = Callable[[ReturnCancelJob], ReturnCancelJob]
953
+
954
+
955
+ def catch_exceptions(cancel_on_failure: bool = False) -> DecoratorCancelJob:
924
956
  """Catch exception error from scheduler job that running with schedule
925
957
  package and return CancelJob if this function raise an error.
926
958
 
@@ -929,9 +961,7 @@ def catch_exceptions(
929
961
  :rtype: Callable[P, Optional[CancelJob]]
930
962
  """
931
963
 
932
- def decorator(
933
- func: Callable[P, Optional[CancelJob]],
934
- ) -> Callable[P, Optional[CancelJob]]:
964
+ def decorator(func: ReturnCancelJob) -> ReturnCancelJob:
935
965
  try:
936
966
  # NOTE: Check the function that want to handle is method or not.
937
967
  if inspect.ismethod(func):
@@ -966,8 +996,8 @@ class WorkflowTaskData:
966
996
  workflow: Workflow
967
997
  on: On
968
998
  params: DictData = field(compare=False, hash=False)
969
- queue: list[datetime] = field(compare=False, hash=False)
970
- running: list[datetime] = field(compare=False, hash=False)
999
+ queue: dict[str, list[datetime]] = field(compare=False, hash=False)
1000
+ running: dict[str, list[datetime]] = field(compare=False, hash=False)
971
1001
 
972
1002
  @catch_exceptions(cancel_on_failure=True)
973
1003
  def release(
@@ -1047,8 +1077,9 @@ class WorkflowTaskData:
1047
1077
  },
1048
1078
  }
1049
1079
 
1050
- # WARNING: Re-create workflow object that use new running workflow
1051
- # ID.
1080
+ # WARNING:
1081
+ # Re-create workflow object that use new running workflow ID.
1082
+ #
1052
1083
  runner: Workflow = wf.get_running_id(run_id=wf.new_run_id)
1053
1084
  rs: Result = runner.execute(
1054
1085
  params=param2template(self.params, release_params),
@@ -1101,6 +1132,7 @@ class WorkflowTaskData:
1101
1132
  self.workflow.name == other.workflow.name
1102
1133
  and self.on.cronjob == other.on.cronjob
1103
1134
  )
1135
+ return NotImplemented
1104
1136
 
1105
1137
 
1106
1138
  @catch_exceptions(cancel_on_failure=True)
@@ -1112,10 +1144,10 @@ def workflow_task(
1112
1144
  """Workflow task generator that create release pair of workflow and on to
1113
1145
  the threading in background.
1114
1146
 
1115
- This workflow task will start every minute at :02 second.
1147
+ This workflow task will start every minute at ':02' second.
1116
1148
 
1117
1149
  :param workflow_tasks:
1118
- :param stop:
1150
+ :param stop: A stop datetime object that force stop running scheduler.
1119
1151
  :param threads:
1120
1152
  :rtype: CancelJob | None
1121
1153
  """
@@ -1130,7 +1162,7 @@ def workflow_task(
1130
1162
  "running in background."
1131
1163
  )
1132
1164
  time.sleep(15)
1133
- workflow_long_running_task(threads)
1165
+ workflow_monitor(threads)
1134
1166
  return CancelJob
1135
1167
 
1136
1168
  # IMPORTANT:
@@ -1202,7 +1234,7 @@ def workflow_task(
1202
1234
  logger.debug(f"[WORKFLOW]: {'=' * 100}")
1203
1235
 
1204
1236
 
1205
- def workflow_long_running_task(threads: dict[str, Thread]) -> None:
1237
+ def workflow_monitor(threads: dict[str, Thread]) -> None:
1206
1238
  """Workflow schedule for monitoring long running thread from the schedule
1207
1239
  control.
1208
1240
 
@@ -1260,30 +1292,29 @@ def workflow_control(
1260
1292
  sch: Schedule = Schedule.from_loader(name, externals=externals)
1261
1293
  workflow_tasks.extend(
1262
1294
  sch.tasks(
1263
- start_date_waiting, wf_queue, wf_running, externals=externals
1295
+ start_date_waiting,
1296
+ queue=wf_queue,
1297
+ running=wf_running,
1298
+ externals=externals,
1264
1299
  ),
1265
1300
  )
1266
1301
 
1267
1302
  # NOTE: This schedule job will start every minute at :02 seconds.
1268
- schedule.every(1).minutes.at(":02").do(
1269
- workflow_task,
1270
- workflow_tasks=workflow_tasks,
1271
- stop=stop
1272
- or (
1273
- start_date
1274
- + timedelta(
1275
- **json.loads(
1276
- os.getenv("WORKFLOW_APP_STOP_BOUNDARY_DELTA")
1277
- or '{"minutes": 5, "seconds": 20}'
1278
- )
1279
- )
1280
- ),
1281
- threads=thread_releases,
1282
- ).tag("control")
1303
+ (
1304
+ schedule.every(1)
1305
+ .minutes.at(":02")
1306
+ .do(
1307
+ workflow_task,
1308
+ workflow_tasks=workflow_tasks,
1309
+ stop=(stop or (start_date + config.stop_boundary_delta)),
1310
+ threads=thread_releases,
1311
+ )
1312
+ .tag("control")
1313
+ )
1283
1314
 
1284
1315
  # NOTE: Checking zombie task with schedule job will start every 5 minute.
1285
1316
  schedule.every(5).minutes.at(":10").do(
1286
- workflow_long_running_task,
1317
+ workflow_monitor,
1287
1318
  threads=thread_releases,
1288
1319
  ).tag("monitor")
1289
1320
 
@@ -1317,14 +1348,16 @@ def workflow_runner(
1317
1348
  """Workflow application that running multiprocessing schedule with chunk of
1318
1349
  workflows that exists in config path.
1319
1350
 
1320
- :param stop:
1351
+ :param stop: A stop datetime object that force stop running scheduler.
1321
1352
  :param excluded:
1322
1353
  :param externals:
1354
+
1323
1355
  :rtype: list[str]
1324
1356
 
1325
1357
  This function will get all workflows that include on value that was
1326
- created in config path and chuck it with WORKFLOW_APP_SCHEDULE_PER_PROCESS
1327
- value to multiprocess executor pool.
1358
+ created in config path and chuck it with application config variable
1359
+ ``WORKFLOW_APP_MAX_SCHEDULE_PER_PROCESS`` env var to multiprocess executor
1360
+ pool.
1328
1361
 
1329
1362
  The current workflow logic that split to process will be below diagram:
1330
1363
 
@@ -1341,7 +1374,7 @@ def workflow_runner(
1341
1374
  excluded: list[str] = excluded or []
1342
1375
 
1343
1376
  with ProcessPoolExecutor(
1344
- max_workers=int(os.getenv("WORKFLOW_APP_PROCESS_WORKER") or "2"),
1377
+ max_workers=config.max_schedule_process,
1345
1378
  ) as executor:
1346
1379
  futures: list[Future] = [
1347
1380
  executor.submit(
@@ -1352,7 +1385,7 @@ def workflow_runner(
1352
1385
  )
1353
1386
  for loader in batch(
1354
1387
  Loader.finds(Schedule, excluded=excluded),
1355
- n=int(os.getenv("WORKFLOW_APP_SCHEDULE_PER_PROCESS") or "100"),
1388
+ n=config.max_schedule_per_process,
1356
1389
  )
1357
1390
  ]
1358
1391