ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,4 +21,7 @@ class JobException(WorkflowException): ...
21
21
  class PipelineException(WorkflowException): ...
22
22
 
23
23
 
24
+ class PipelineFailException(WorkflowException): ...
25
+
26
+
24
27
  class ParamValueException(WorkflowException): ...
ddeutil/workflow/log.py CHANGED
@@ -5,75 +5,180 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
+ import json
8
9
  import logging
10
+ import os
11
+ from abc import ABC, abstractmethod
9
12
  from datetime import datetime
10
13
  from functools import lru_cache
11
- from typing import Union
14
+ from pathlib import Path
15
+ from typing import Optional, Union
12
16
 
17
+ from ddeutil.core import str2bool
13
18
  from pydantic import BaseModel, Field
14
- from rich.console import Console
15
- from rich.logging import RichHandler
19
+ from pydantic.functional_validators import model_validator
20
+ from typing_extensions import Self
16
21
 
17
22
  from .__types import DictData
18
-
19
- console = Console(color_system="256", width=200, style="blue")
23
+ from .utils import config
20
24
 
21
25
 
22
26
  @lru_cache
23
- def get_logger(module_name):
24
- logger = logging.getLogger(module_name)
25
- handler = RichHandler(
26
- rich_tracebacks=True, console=console, tracebacks_show_locals=True
27
- )
28
- handler.setFormatter(
29
- logging.Formatter(
30
- "[ %(threadName)s:%(funcName)s:%(process)d ] - %(message)s"
31
- )
27
+ def get_logger(name: str):
28
+ """Return logger with an input module name."""
29
+ logger = logging.getLogger(name)
30
+ formatter = logging.Formatter(
31
+ fmt=(
32
+ "%(asctime)s.%(msecs)03d (%(name)-10s, %(process)-5d, "
33
+ "%(thread)-5d) [%(levelname)-7s] %(message)-120s "
34
+ "(%(filename)s:%(lineno)s)"
35
+ ),
36
+ datefmt="%Y-%m-%d %H:%M:%S",
32
37
  )
33
- logger.addHandler(handler)
34
- logger.setLevel(logging.DEBUG)
38
+ stream = logging.StreamHandler()
39
+ stream.setFormatter(formatter)
40
+ logger.addHandler(stream)
41
+
42
+ debug: bool = str2bool(os.getenv("WORKFLOW_LOG_DEBUG_MODE", "true"))
43
+ logger.setLevel(logging.DEBUG if debug else logging.INFO)
35
44
  return logger
36
45
 
37
46
 
38
- class BaseLog(BaseModel):
39
- """Base logging model."""
47
+ class BaseLog(BaseModel, ABC):
48
+ """Base Log Pydantic Model abstraction that implement only model fields."""
40
49
 
41
- parent_id: str
42
- id: str
43
- input: DictData
44
- output: DictData
45
- update_time: datetime = Field(default_factory=datetime.now)
50
+ name: str = Field(description="A pipeline name.")
51
+ on: str = Field(description="A cronjob string of this piepline schedule.")
52
+ release: datetime = Field(description="A release datetime.")
53
+ context: DictData = Field(
54
+ default_factory=dict,
55
+ description=(
56
+ "A context data that receive from a pipeline execution result.",
57
+ ),
58
+ )
59
+ parent_run_id: Optional[str] = Field(default=None)
60
+ run_id: str
61
+ update: datetime = Field(default_factory=datetime.now)
62
+
63
+ @model_validator(mode="after")
64
+ def __model_action(self):
65
+ if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
66
+ self.do_before()
67
+ return self
68
+
69
+ def do_before(self) -> None:
70
+ """To something before end up of initial log model."""
71
+
72
+ @abstractmethod
73
+ def save(self, excluded: list[str] | None) -> None:
74
+ """Save logging"""
75
+ raise NotImplementedError("Log should implement ``save`` method.")
76
+
77
+
78
+ class FileLog(BaseLog):
79
+ """File Log Pydantic Model that use to saving log data from result of
80
+ pipeline execution. It inherit from BaseLog model that implement the
81
+ ``self.save`` method for file.
82
+ """
83
+
84
+ def do_before(self) -> None:
85
+ """Create directory of release before saving log file."""
86
+ self.pointer().mkdir(parents=True, exist_ok=True)
87
+
88
+ @classmethod
89
+ def find_logs(cls, name: str):
90
+ pointer: Path = config().engine.paths.root / f"./logs/pipeline={name}"
91
+ for file in pointer.glob("./release=*/*.log"):
92
+ with file.open(mode="r", encoding="utf-8") as f:
93
+ yield json.load(f)
94
+
95
+ @classmethod
96
+ def find_log(cls, name: str, release: datetime | None = None):
97
+ if release is not None:
98
+ pointer: Path = (
99
+ config().engine.paths.root
100
+ / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
101
+ )
102
+ if not pointer.exists():
103
+ raise FileNotFoundError(
104
+ f"Pointer: ./logs/pipeline={name}/"
105
+ f"release={release:%Y%m%d%H%M%S} does not found."
106
+ )
107
+ return cls.model_validate(
108
+ obj=json.loads(pointer.read_text(encoding="utf-8"))
109
+ )
110
+ raise NotImplementedError("Find latest log does not implement yet.")
111
+
112
+ @classmethod
113
+ def is_pointed(
114
+ cls,
115
+ name: str,
116
+ release: datetime,
117
+ *,
118
+ queue: list[datetime] | None = None,
119
+ ) -> bool:
120
+ """Check this log already point in the destination.
121
+
122
+ :param name: A pipeline name.
123
+ :param release: A release datetime.
124
+ :param queue: A list of queue of datetime that already run in the
125
+ future.
126
+ """
127
+ # NOTE: Check environ variable was set for real writing.
128
+ if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
129
+ return False
130
+
131
+ # NOTE: create pointer path that use the same logic of pointer method.
132
+ pointer: Path = (
133
+ config().engine.paths.root
134
+ / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
135
+ )
136
+
137
+ if not queue:
138
+ return pointer.exists()
139
+ return pointer.exists() or (release in queue)
46
140
 
141
+ def pointer(self) -> Path:
142
+ """Return release directory path that was generated from model data.
47
143
 
48
- class StageLog(BaseLog): ...
144
+ :rtype: Path
145
+ """
146
+ return (
147
+ config().engine.paths.root
148
+ / f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
149
+ )
49
150
 
151
+ def save(self, excluded: list[str] | None) -> Self:
152
+ """Save logging data that receive a context data from a pipeline
153
+ execution result.
154
+
155
+ :param excluded: An excluded list of key name that want to pass in the
156
+ model_dump method.
157
+ :rtype: Self
158
+ """
159
+ # NOTE: Check environ variable was set for real writing.
160
+ if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
161
+ return self
162
+
163
+ log_file: Path = self.pointer() / f"{self.run_id}.log"
164
+ log_file.write_text(
165
+ json.dumps(
166
+ self.model_dump(exclude=excluded),
167
+ default=str,
168
+ indent=2,
169
+ ),
170
+ encoding="utf-8",
171
+ )
172
+ return self
50
173
 
51
- class JobLog(BaseLog): ...
52
174
 
175
+ class SQLiteLog(BaseLog):
53
176
 
54
- class PipelineLog(BaseLog): ...
177
+ def save(self, excluded: list[str] | None) -> None:
178
+ raise NotImplementedError("SQLiteLog does not implement yet.")
55
179
 
56
180
 
57
181
  Log = Union[
58
- StageLog,
59
- JobLog,
60
- PipelineLog,
182
+ FileLog,
183
+ SQLiteLog,
61
184
  ]
62
-
63
-
64
- def push_log_memory(log: DictData):
65
- """Push message log to globals log queue."""
66
- print(log)
67
-
68
-
69
- LOGS_REGISTRY = {
70
- "memory": push_log_memory,
71
- }
72
-
73
-
74
- def push_log(log: DictData, mode: str = "memory"):
75
- return LOGS_REGISTRY[mode](log)
76
-
77
-
78
- def save_log():
79
- """Save log that push to queue to target saving"""
ddeutil/workflow/on.py CHANGED
@@ -10,22 +10,18 @@ from typing import Annotated, Literal
10
10
  from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
11
 
12
12
  from pydantic import BaseModel, ConfigDict, Field
13
+ from pydantic.functional_serializers import field_serializer
13
14
  from pydantic.functional_validators import field_validator, model_validator
14
15
  from typing_extensions import Self
15
16
 
16
- try:
17
- from .__types import DictData, DictStr
18
- from .loader import Loader
19
- from .scheduler import WEEKDAYS, CronJob, CronJobYear, CronRunner
20
- except ImportError:
21
- from ddeutil.workflow.__types import DictData, DictStr
22
- from ddeutil.workflow.loader import Loader
23
- from ddeutil.workflow.scheduler import (
24
- WEEKDAYS,
25
- CronJob,
26
- CronJobYear,
27
- CronRunner,
28
- )
17
+ from .__types import DictData, DictStr, TupleStr
18
+ from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
19
+ from .utils import Loader
20
+
21
+ __all__: TupleStr = (
22
+ "On",
23
+ "interval2crontab",
24
+ )
29
25
 
30
26
 
31
27
  def interval2crontab(
@@ -106,7 +102,7 @@ class On(BaseModel):
106
102
  passing["cronjob"] = interval2crontab(
107
103
  **{v: value[v] for v in value if v in ("interval", "day", "time")}
108
104
  )
109
- return cls(extras=externals, **passing)
105
+ return cls(extras=externals | passing.pop("extras", {}), **passing)
110
106
 
111
107
  @classmethod
112
108
  def from_loader(
@@ -121,6 +117,7 @@ class On(BaseModel):
121
117
  :param externals: A extras external parameter that will keep in extras.
122
118
  """
123
119
  loader: Loader = Loader(name, externals=externals)
120
+
124
121
  # NOTE: Validate the config type match with current connection model
125
122
  if loader.type != cls:
126
123
  raise ValueError(f"Type {loader.type} does not match with {cls}")
@@ -136,7 +133,7 @@ class On(BaseModel):
136
133
  if v in ("interval", "day", "time")
137
134
  }
138
135
  ),
139
- extras=externals,
136
+ extras=externals | loader_data.pop("extras", {}),
140
137
  **loader_data,
141
138
  )
142
139
  )
@@ -145,19 +142,19 @@ class On(BaseModel):
145
142
  return cls.model_validate(
146
143
  obj=dict(
147
144
  cronjob=loader_data.pop("cronjob"),
148
- extras=externals,
145
+ extras=externals | loader_data.pop("extras", {}),
149
146
  **loader_data,
150
147
  )
151
148
  )
152
149
 
153
150
  @model_validator(mode="before")
154
- def __prepare_values(cls, values):
151
+ def __prepare_values(cls, values: DictData) -> DictData:
155
152
  if tz := values.pop("tz", None):
156
153
  values["timezone"] = tz
157
154
  return values
158
155
 
159
156
  @field_validator("tz")
160
- def __validate_tz(cls, value: str):
157
+ def __validate_tz(cls, value: str) -> str:
161
158
  """Validate timezone value that able to initialize with ZoneInfo after
162
159
  it passing to this model in before mode."""
163
160
  try:
@@ -171,12 +168,24 @@ class On(BaseModel):
171
168
  """Prepare crontab value that able to receive with string type."""
172
169
  return CronJob(value) if isinstance(value, str) else value
173
170
 
171
+ @field_serializer("cronjob")
172
+ def __serialize_cronjob(self, value: CronJob) -> str:
173
+ return str(value)
174
+
174
175
  def generate(self, start: str | datetime) -> CronRunner:
175
176
  """Return Cron runner object."""
176
177
  if not isinstance(start, datetime):
177
178
  start: datetime = datetime.fromisoformat(start)
178
179
  return self.cronjob.schedule(date=start, tz=self.tz)
179
180
 
181
+ def next(self, start: str | datetime) -> datetime:
182
+ """Return a next datetime from Cron runner object that start with any
183
+ date that given from input.
184
+ """
185
+ if not isinstance(start, datetime):
186
+ start: datetime = datetime.fromisoformat(start)
187
+ return self.cronjob.schedule(date=start, tz=self.tz).next
188
+
180
189
 
181
190
  class AwsOn(On):
182
191
  """Implement On AWS Schedule for AWS Service like AWS Glue."""