ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/log.py CHANGED
@@ -6,32 +6,54 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import json
9
+ import logging
9
10
  import os
10
- import re
11
11
  from abc import ABC, abstractmethod
12
12
  from datetime import datetime
13
- from heapq import heappop, heappush
13
+ from functools import lru_cache
14
14
  from pathlib import Path
15
15
  from typing import Optional, Union
16
16
 
17
17
  from ddeutil.core import str2bool
18
18
  from pydantic import BaseModel, Field
19
19
  from pydantic.functional_validators import model_validator
20
+ from typing_extensions import Self
20
21
 
21
22
  from .__types import DictData
22
23
  from .utils import config
23
24
 
24
25
 
26
+ @lru_cache
27
+ def get_logger(name: str):
28
+ """Return logger with an input module name."""
29
+ logger = logging.getLogger(name)
30
+ formatter = logging.Formatter(
31
+ fmt=(
32
+ "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, "
33
+ "%(thread)-5d) [%(levelname)-7s] %(message)-120s "
34
+ "(%(filename)s:%(lineno)s)"
35
+ ),
36
+ datefmt="%Y-%m-%d %H:%M:%S",
37
+ )
38
+ stream = logging.StreamHandler()
39
+ stream.setFormatter(formatter)
40
+ logger.addHandler(stream)
41
+
42
+ debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
43
+ logger.setLevel(logging.DEBUG if debug else logging.INFO)
44
+ return logger
45
+
46
+
25
47
  class BaseLog(BaseModel, ABC):
26
- """Base Log Pydantic Model"""
48
+ """Base Log Pydantic Model abstraction that implement only model fields."""
27
49
 
28
- name: str = Field(description="A pipeline name.")
50
+ name: str = Field(description="A workflow name.")
29
51
  on: str = Field(description="A cronjob string of this piepline schedule.")
30
52
  release: datetime = Field(description="A release datetime.")
31
53
  context: DictData = Field(
32
54
  default_factory=dict,
33
55
  description=(
34
- "A context data that receive from a pipeline execution result.",
56
+ "A context data that receive from a workflow execution result.",
35
57
  ),
36
58
  )
37
59
  parent_run_id: Optional[str] = Field(default=None)
@@ -46,17 +68,16 @@ class BaseLog(BaseModel, ABC):
46
68
 
47
69
  def do_before(self) -> None:
48
70
  """To something before end up of initial log model."""
49
- return
50
71
 
51
72
  @abstractmethod
52
- def save(self) -> None:
73
+ def save(self, excluded: list[str] | None) -> None:
53
74
  """Save logging"""
54
75
  raise NotImplementedError("Log should implement ``save`` method.")
55
76
 
56
77
 
57
78
  class FileLog(BaseLog):
58
79
  """File Log Pydantic Model that use to saving log data from result of
59
- pipeline execution. It inherit from BaseLog model that implement the
80
+ workflow execution. It inherit from BaseLog model that implement the
60
81
  ``self.save`` method for file.
61
82
  """
62
83
 
@@ -65,43 +86,28 @@ class FileLog(BaseLog):
65
86
  self.pointer().mkdir(parents=True, exist_ok=True)
66
87
 
67
88
  @classmethod
68
- def latest_point(
69
- cls,
70
- name: str,
71
- *,
72
- queue: list[datetime] | None = None,
73
- ) -> datetime | None:
74
- """Return latest point that exist in current logging pointer keeping.
75
-
76
- :param name: A pipeline name
77
- :param queue: A release queue.
78
- """
79
- keeping: Path = config().engine.paths.root / f"./logs/pipeline={name}/"
80
- if not keeping.exists():
81
- return None
82
-
83
- keeping_files: list[int] = [
84
- int(found.stem)
85
- for found in keeping.glob("*")
86
- if found.is_dir() and re.match(r"\d{14}", found.stem)
87
- ]
88
-
89
- latest = max(keeping_files or [None])
89
+ def find_logs(cls, name: str):
90
+ pointer: Path = config().engine.paths.root / f"./logs/workflow={name}"
91
+ for file in pointer.glob("./release=*/*.log"):
92
+ with file.open(mode="r", encoding="utf-8") as f:
93
+ yield json.load(f)
90
94
 
91
- if not queue:
92
- if latest is None:
93
- return None
94
- return datetime.strptime(str(latest), "%Y%m%d%H%M%S")
95
-
96
- latest_queue: datetime = max(queue)
97
-
98
- if latest is None:
99
- return latest_queue
100
-
101
- latest_dt: datetime = datetime.strptime(
102
- str(latest), "%Y%m%d%H%M%S"
103
- ).replace(tzinfo=latest_queue.tzinfo)
104
- return max(latest_dt, latest_queue)
95
+ @classmethod
96
+ def find_log(cls, name: str, release: datetime | None = None):
97
+ if release is not None:
98
+ pointer: Path = (
99
+ config().engine.paths.root
100
+ / f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
101
+ )
102
+ if not pointer.exists():
103
+ raise FileNotFoundError(
104
+ f"Pointer: ./logs/workflow={name}/"
105
+ f"release={release:%Y%m%d%H%M%S} does not found."
106
+ )
107
+ return cls.model_validate(
108
+ obj=json.loads(pointer.read_text(encoding="utf-8"))
109
+ )
110
+ raise NotImplementedError("Find latest log does not implement yet.")
105
111
 
106
112
  @classmethod
107
113
  def is_pointed(
@@ -111,63 +117,64 @@ class FileLog(BaseLog):
111
117
  *,
112
118
  queue: list[datetime] | None = None,
113
119
  ) -> bool:
114
- """Check this log already point.
120
+ """Check this log already point in the destination.
115
121
 
116
- :param name: A pipeline name.
122
+ :param name: A workflow name.
117
123
  :param release: A release datetime.
118
124
  :param queue: A list of queue of datetime that already run in the
119
125
  future.
120
126
  """
127
+ # NOTE: Check environ variable was set for real writing.
121
128
  if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
122
129
  return False
123
130
 
124
131
  # NOTE: create pointer path that use the same logic of pointer method.
125
132
  pointer: Path = (
126
133
  config().engine.paths.root
127
- / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
134
+ / f"./logs/workflow={name}/release={release:%Y%m%d%H%M%S}"
128
135
  )
129
136
 
130
- if queue is None:
137
+ if not queue:
131
138
  return pointer.exists()
132
-
133
- if pointer.exists() and not queue:
134
- return True
135
-
136
- if queue:
137
- latest: datetime = heappop(queue)
138
- heappush(queue, latest)
139
- if release == latest:
140
- return True
141
-
142
- return False
139
+ return pointer.exists() or (release in queue)
143
140
 
144
141
  def pointer(self) -> Path:
145
- """Return release directory path that was generated from model data."""
142
+ """Return release directory path that was generated from model data.
143
+
144
+ :rtype: Path
145
+ """
146
146
  return (
147
147
  config().engine.paths.root
148
- / f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
148
+ / f"./logs/workflow={self.name}/release={self.release:%Y%m%d%H%M%S}"
149
149
  )
150
150
 
151
- def save(self) -> None:
152
- """Save logging data that receive a context data from a pipeline
151
+ def save(self, excluded: list[str] | None) -> Self:
152
+ """Save logging data that receive a context data from a workflow
153
153
  execution result.
154
+
155
+ :param excluded: An excluded list of key name that want to pass in the
156
+ model_dump method.
157
+ :rtype: Self
154
158
  """
159
+ # NOTE: Check environ variable was set for real writing.
155
160
  if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
156
- return
161
+ return self
157
162
 
158
163
  log_file: Path = self.pointer() / f"{self.run_id}.log"
159
164
  log_file.write_text(
160
165
  json.dumps(
161
- self.model_dump(),
166
+ self.model_dump(exclude=excluded),
162
167
  default=str,
168
+ indent=2,
163
169
  ),
164
170
  encoding="utf-8",
165
171
  )
172
+ return self
166
173
 
167
174
 
168
175
  class SQLiteLog(BaseLog):
169
176
 
170
- def save(self) -> None:
177
+ def save(self, excluded: list[str] | None) -> None:
171
178
  raise NotImplementedError("SQLiteLog does not implement yet.")
172
179
 
173
180
 
ddeutil/workflow/on.py CHANGED
@@ -10,12 +10,13 @@ from typing import Annotated, Literal
10
10
  from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
11
 
12
12
  from pydantic import BaseModel, ConfigDict, Field
13
+ from pydantic.functional_serializers import field_serializer
13
14
  from pydantic.functional_validators import field_validator, model_validator
14
15
  from typing_extensions import Self
15
16
 
16
17
  from .__types import DictData, DictStr, TupleStr
17
18
  from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
18
- from .loader import Loader
19
+ from .utils import Loader
19
20
 
20
21
  __all__: TupleStr = (
21
22
  "On",
@@ -101,7 +102,7 @@ class On(BaseModel):
101
102
  passing["cronjob"] = interval2crontab(
102
103
  **{v: value[v] for v in value if v in ("interval", "day", "time")}
103
104
  )
104
- return cls(extras=externals, **passing)
105
+ return cls(extras=externals | passing.pop("extras", {}), **passing)
105
106
 
106
107
  @classmethod
107
108
  def from_loader(
@@ -116,6 +117,7 @@ class On(BaseModel):
116
117
  :param externals: A extras external parameter that will keep in extras.
117
118
  """
118
119
  loader: Loader = Loader(name, externals=externals)
120
+
119
121
  # NOTE: Validate the config type match with current connection model
120
122
  if loader.type != cls:
121
123
  raise ValueError(f"Type {loader.type} does not match with {cls}")
@@ -131,7 +133,7 @@ class On(BaseModel):
131
133
  if v in ("interval", "day", "time")
132
134
  }
133
135
  ),
134
- extras=externals,
136
+ extras=externals | loader_data.pop("extras", {}),
135
137
  **loader_data,
136
138
  )
137
139
  )
@@ -140,7 +142,7 @@ class On(BaseModel):
140
142
  return cls.model_validate(
141
143
  obj=dict(
142
144
  cronjob=loader_data.pop("cronjob"),
143
- extras=externals,
145
+ extras=externals | loader_data.pop("extras", {}),
144
146
  **loader_data,
145
147
  )
146
148
  )
@@ -166,6 +168,10 @@ class On(BaseModel):
166
168
  """Prepare crontab value that able to receive with string type."""
167
169
  return CronJob(value) if isinstance(value, str) else value
168
170
 
171
+ @field_serializer("cronjob")
172
+ def __serialize_cronjob(self, value: CronJob) -> str:
173
+ return str(value)
174
+
169
175
  def generate(self, start: str | datetime) -> CronRunner:
170
176
  """Return Cron runner object."""
171
177
  if not isinstance(start, datetime):
@@ -3,33 +3,63 @@
3
3
  # Licensed under the MIT License.
4
4
  # This code refs from: https://github.com/priyanshu-panwar/fastapi-utilities
5
5
  # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
6
8
  import asyncio
7
- import logging
8
9
  import os
9
10
  from asyncio import ensure_future
10
11
  from datetime import datetime
11
12
  from functools import wraps
12
13
  from zoneinfo import ZoneInfo
13
14
 
14
- from croniter import croniter
15
15
  from starlette.concurrency import run_in_threadpool
16
16
 
17
+ from .cron import CronJob
18
+ from .log import get_logger
19
+
20
+ logger = get_logger("ddeutil.workflow")
17
21
 
18
- def get_cron_delta(cron: str):
22
+
23
+ def get_cronjob_delta(cron: str) -> float:
19
24
  """This function returns the time delta between now and the next cron
20
25
  execution time.
21
26
  """
22
27
  now: datetime = datetime.now(
23
28
  tz=ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
24
29
  )
25
- cron = croniter(cron, now)
26
- return (cron.get_next(datetime) - now).total_seconds()
30
+ cron = CronJob(cron)
31
+ return (cron.schedule(now).next - now).total_seconds()
32
+
33
+
34
+ def cron_valid(cron: str):
35
+ try:
36
+ CronJob(cron)
37
+ except Exception as err:
38
+ raise ValueError(f"Crontab value does not valid, {cron}") from err
39
+
40
+
41
+ async def run_func(
42
+ is_coroutine,
43
+ func,
44
+ *args,
45
+ raise_exceptions: bool = False,
46
+ **kwargs,
47
+ ):
48
+ try:
49
+ if is_coroutine:
50
+ await func(*args, **kwargs)
51
+ else:
52
+ await run_in_threadpool(func, *args, **kwargs)
53
+ except Exception as e:
54
+ logger.exception(e)
55
+ if raise_exceptions:
56
+ raise e
27
57
 
28
58
 
29
59
  def repeat_at(
30
60
  *,
31
61
  cron: str,
32
- logger: logging.Logger = None,
62
+ delay: float = 0,
33
63
  raise_exceptions: bool = False,
34
64
  max_repetitions: int = None,
35
65
  ):
@@ -38,40 +68,38 @@ def repeat_at(
38
68
 
39
69
  :param cron: str
40
70
  Cron-style string for periodic execution, eg. '0 0 * * *' every midnight
41
- :param logger: logging.Logger (default None)
42
- Logger object to log exceptions
71
+ :param delay:
43
72
  :param raise_exceptions: bool (default False)
44
73
  Whether to raise exceptions or log them
45
74
  :param max_repetitions: int (default None)
46
75
  Maximum number of times to repeat the function. If None, repeat
47
76
  indefinitely.
48
-
49
77
  """
78
+ if max_repetitions and max_repetitions <= 0:
79
+ raise ValueError(
80
+ "max_repetitions should more than zero if it want to set"
81
+ )
50
82
 
51
83
  def decorator(func):
52
- is_coroutine = asyncio.iscoroutinefunction(func)
84
+ is_coroutine: bool = asyncio.iscoroutinefunction(func)
53
85
 
54
86
  @wraps(func)
55
87
  def wrapper(*_args, **_kwargs):
56
- repititions = 0
57
- if not croniter.is_valid(cron):
58
- raise ValueError("Invalid cron expression")
88
+ repititions: int = 0
89
+ cron_valid(cron)
59
90
 
60
91
  async def loop(*args, **kwargs):
61
92
  nonlocal repititions
62
93
  while max_repetitions is None or repititions < max_repetitions:
63
- try:
64
- sleep_time = get_cron_delta(cron)
65
- await asyncio.sleep(sleep_time)
66
- if is_coroutine:
67
- await func(*args, **kwargs)
68
- else:
69
- await run_in_threadpool(func, *args, **kwargs)
70
- except Exception as e:
71
- if logger:
72
- logger.exception(e)
73
- if raise_exceptions:
74
- raise e
94
+ sleep_time = get_cronjob_delta(cron) + delay
95
+ await asyncio.sleep(sleep_time)
96
+ await run_func(
97
+ is_coroutine,
98
+ func,
99
+ *args,
100
+ raise_exceptions=raise_exceptions,
101
+ **kwargs,
102
+ )
75
103
  repititions += 1
76
104
 
77
105
  ensure_future(loop(*_args, **_kwargs))
@@ -85,7 +113,6 @@ def repeat_every(
85
113
  *,
86
114
  seconds: float,
87
115
  wait_first: bool = False,
88
- logger: logging.Logger = None,
89
116
  raise_exceptions: bool = False,
90
117
  max_repetitions: int = None,
91
118
  ):
@@ -97,17 +124,19 @@ def repeat_every(
97
124
  :param wait_first: bool (default False)
98
125
  Whether to wait `seconds` seconds before executing the function for the
99
126
  first time.
100
- :param logger: logging.Logger (default None)
101
- The logger to use for logging exceptions.
102
127
  :param raise_exceptions: bool (default False)
103
128
  Whether to raise exceptions instead of logging them.
104
129
  :param max_repetitions: int (default None)
105
130
  The maximum number of times to repeat the function. If None, the
106
131
  function will repeat indefinitely.
107
132
  """
133
+ if max_repetitions and max_repetitions <= 0:
134
+ raise ValueError(
135
+ "max_repetitions should more than zero if it want to set"
136
+ )
108
137
 
109
138
  def decorator(func):
110
- is_coroutine = asyncio.iscoroutinefunction(func)
139
+ is_coroutine: bool = asyncio.iscoroutinefunction(func)
111
140
 
112
141
  @wraps(func)
113
142
  async def wrapper(*_args, **_kwargs):
@@ -115,19 +144,19 @@ def repeat_every(
115
144
 
116
145
  async def loop(*args, **kwargs):
117
146
  nonlocal repetitions
147
+
118
148
  if wait_first:
119
149
  await asyncio.sleep(seconds)
150
+
120
151
  while max_repetitions is None or repetitions < max_repetitions:
121
- try:
122
- if is_coroutine:
123
- await func(*args, **kwargs)
124
- else:
125
- await run_in_threadpool(func, *args, **kwargs)
126
- except Exception as e:
127
- if logger is not None:
128
- logger.exception(e)
129
- if raise_exceptions:
130
- raise e
152
+ await run_func(
153
+ is_coroutine,
154
+ func,
155
+ *args,
156
+ raise_exceptions=raise_exceptions,
157
+ **kwargs,
158
+ )
159
+
131
160
  repetitions += 1
132
161
  await asyncio.sleep(seconds)
133
162