ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
+ from collections.abc import Iterator
8
9
  from functools import cached_property
9
10
  from typing import TypeVar
10
11
 
@@ -35,18 +36,65 @@ class SimLoad:
35
36
  self,
36
37
  name: str,
37
38
  params: ConfParams,
38
- externals: DictData,
39
+ externals: DictData | None = None,
39
40
  ) -> None:
40
41
  self.data: DictData = {}
41
42
  for file in PathSearch(params.engine.paths.conf).files:
42
- if any(file.suffix.endswith(s) for s in ("yml", "yaml")) and (
43
+ if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
43
44
  data := YamlFlResolve(file).read().get(name, {})
44
45
  ):
45
46
  self.data = data
46
47
  if not self.data:
47
48
  raise ValueError(f"Config {name!r} does not found on conf path")
49
+
50
+ # TODO: Validate the version of template data that mean if version of
51
+ # Template were change it should raise to upgrade package version.
52
+ # ---
53
+ # <pipeline-name>:
54
+ # version: 1
55
+ # type: pipeline.Pipeline
56
+ #
48
57
  self.conf_params: ConfParams = params
49
- self.externals: DictData = externals
58
+ self.externals: DictData = externals or {}
59
+ self.data.update(self.externals)
60
+
61
+ @classmethod
62
+ def find(
63
+ cls,
64
+ obj: object,
65
+ params: ConfParams,
66
+ *,
67
+ include: list[str] | None = None,
68
+ exclude: list[str] | None = None,
69
+ ) -> Iterator[tuple[str, DictData]]:
70
+ """Find all object"""
71
+ exclude: list[str] = exclude or []
72
+ for file in PathSearch(params.engine.paths.conf).files:
73
+ if any(file.suffix.endswith(s) for s in (".yml", ".yaml")) and (
74
+ values := YamlFlResolve(file).read()
75
+ ):
76
+ for key, data in values.items():
77
+ if key in exclude:
78
+ continue
79
+ if (
80
+ (t := data.get("type"))
81
+ and issubclass(cls.get_type(t, params), obj)
82
+ and all(i in data for i in (include or data.keys()))
83
+ ):
84
+ yield key, data
85
+
86
+ @classmethod
87
+ def get_type(cls, t: str, params: ConfParams) -> AnyModelType:
88
+ try:
89
+ # NOTE: Auto adding module prefix if it does not set
90
+ return import_string(f"ddeutil.workflow.{t}")
91
+ except ModuleNotFoundError:
92
+ for registry in params.engine.registry:
93
+ try:
94
+ return import_string(f"{registry}.{t}")
95
+ except ModuleNotFoundError:
96
+ continue
97
+ return import_string(f"{t}")
50
98
 
51
99
  @cached_property
52
100
  def type(self) -> AnyModelType:
@@ -57,16 +105,7 @@ class SimLoad:
57
105
  raise ValueError(
58
106
  f"the 'type' value: {_typ} does not exists in config data."
59
107
  )
60
- try:
61
- # NOTE: Auto adding module prefix if it does not set
62
- return import_string(f"ddeutil.workflow.{_typ}")
63
- except ModuleNotFoundError:
64
- for registry in self.conf_params.engine.registry:
65
- try:
66
- return import_string(f"{registry}.{_typ}")
67
- except ModuleNotFoundError:
68
- continue
69
- return import_string(f"{_typ}")
108
+ return self.get_type(_typ, self.conf_params)
70
109
 
71
110
 
72
111
  class Loader(SimLoad):
@@ -76,5 +115,18 @@ class Loader(SimLoad):
76
115
  :param externals: An external parameters
77
116
  """
78
117
 
118
+ @classmethod
119
+ def find(
120
+ cls,
121
+ obj,
122
+ *,
123
+ include: list[str] | None = None,
124
+ exclude: list[str] | None = None,
125
+ **kwargs,
126
+ ) -> DictData:
127
+ return super().find(
128
+ obj=obj, params=config(), include=include, exclude=exclude
129
+ )
130
+
79
131
  def __init__(self, name: str, externals: DictData) -> None:
80
132
  super().__init__(name, config(), externals)
ddeutil/workflow/log.py CHANGED
@@ -5,75 +5,173 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
- import logging
8
+ import json
9
+ import os
10
+ import re
11
+ from abc import ABC, abstractmethod
9
12
  from datetime import datetime
10
- from functools import lru_cache
11
- from typing import Union
13
+ from heapq import heappop, heappush
14
+ from pathlib import Path
15
+ from typing import Optional, Union
12
16
 
17
+ from ddeutil.core import str2bool
13
18
  from pydantic import BaseModel, Field
14
- from rich.console import Console
15
- from rich.logging import RichHandler
19
+ from pydantic.functional_validators import model_validator
16
20
 
17
21
  from .__types import DictData
22
+ from .utils import config
18
23
 
19
- console = Console(color_system="256", width=200, style="blue")
20
24
 
25
+ class BaseLog(BaseModel, ABC):
26
+ """Base Log Pydantic Model"""
21
27
 
22
- @lru_cache
23
- def get_logger(module_name):
24
- logger = logging.getLogger(module_name)
25
- handler = RichHandler(
26
- rich_tracebacks=True, console=console, tracebacks_show_locals=True
28
+ name: str = Field(description="A pipeline name.")
29
+ on: str = Field(description="A cronjob string of this piepline schedule.")
30
+ release: datetime = Field(description="A release datetime.")
31
+ context: DictData = Field(
32
+ default_factory=dict,
33
+ description=(
34
+ "A context data that receive from a pipeline execution result.",
35
+ ),
27
36
  )
28
- handler.setFormatter(
29
- logging.Formatter(
30
- "[ %(threadName)s:%(funcName)s:%(process)d ] - %(message)s"
37
+ parent_run_id: Optional[str] = Field(default=None)
38
+ run_id: str
39
+ update: datetime = Field(default_factory=datetime.now)
40
+
41
+ @model_validator(mode="after")
42
+ def __model_action(self):
43
+ if str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
44
+ self.do_before()
45
+ return self
46
+
47
+ def do_before(self) -> None:
48
+ """To something before end up of initial log model."""
49
+ return
50
+
51
+ @abstractmethod
52
+ def save(self) -> None:
53
+ """Save logging"""
54
+ raise NotImplementedError("Log should implement ``save`` method.")
55
+
56
+
57
+ class FileLog(BaseLog):
58
+ """File Log Pydantic Model that use to saving log data from result of
59
+ pipeline execution. It inherit from BaseLog model that implement the
60
+ ``self.save`` method for file.
61
+ """
62
+
63
+ def do_before(self) -> None:
64
+ """Create directory of release before saving log file."""
65
+ self.pointer().mkdir(parents=True, exist_ok=True)
66
+
67
+ @classmethod
68
+ def latest_point(
69
+ cls,
70
+ name: str,
71
+ *,
72
+ queue: list[datetime] | None = None,
73
+ ) -> datetime | None:
74
+ """Return latest point that exist in current logging pointer keeping.
75
+
76
+ :param name: A pipeline name
77
+ :param queue: A release queue.
78
+ """
79
+ keeping: Path = config().engine.paths.root / f"./logs/pipeline={name}/"
80
+ if not keeping.exists():
81
+ return None
82
+
83
+ keeping_files: list[int] = [
84
+ int(found.stem)
85
+ for found in keeping.glob("*")
86
+ if found.is_dir() and re.match(r"\d{14}", found.stem)
87
+ ]
88
+
89
+ latest = max(keeping_files or [None])
90
+
91
+ if not queue:
92
+ if latest is None:
93
+ return None
94
+ return datetime.strptime(str(latest), "%Y%m%d%H%M%S")
95
+
96
+ latest_queue: datetime = max(queue)
97
+
98
+ if latest is None:
99
+ return latest_queue
100
+
101
+ latest_dt: datetime = datetime.strptime(
102
+ str(latest), "%Y%m%d%H%M%S"
103
+ ).replace(tzinfo=latest_queue.tzinfo)
104
+ return max(latest_dt, latest_queue)
105
+
106
+ @classmethod
107
+ def is_pointed(
108
+ cls,
109
+ name: str,
110
+ release: datetime,
111
+ *,
112
+ queue: list[datetime] | None = None,
113
+ ) -> bool:
114
+ """Check this log already point.
115
+
116
+ :param name: A pipeline name.
117
+ :param release: A release datetime.
118
+ :param queue: A list of queue of datetime that already run in the
119
+ future.
120
+ """
121
+ if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
122
+ return False
123
+
124
+ # NOTE: create pointer path that use the same logic of pointer method.
125
+ pointer: Path = (
126
+ config().engine.paths.root
127
+ / f"./logs/pipeline={name}/release={release:%Y%m%d%H%M%S}"
31
128
  )
32
- )
33
- logger.addHandler(handler)
34
- logger.setLevel(logging.DEBUG)
35
- return logger
36
129
 
130
+ if queue is None:
131
+ return pointer.exists()
37
132
 
38
- class BaseLog(BaseModel):
39
- """Base logging model."""
133
+ if pointer.exists() and not queue:
134
+ return True
40
135
 
41
- parent_id: str
42
- id: str
43
- input: DictData
44
- output: DictData
45
- update_time: datetime = Field(default_factory=datetime.now)
136
+ if queue:
137
+ latest: datetime = heappop(queue)
138
+ heappush(queue, latest)
139
+ if release == latest:
140
+ return True
46
141
 
142
+ return False
47
143
 
48
- class StageLog(BaseLog): ...
144
+ def pointer(self) -> Path:
145
+ """Return release directory path that was generated from model data."""
146
+ return (
147
+ config().engine.paths.root
148
+ / f"./logs/pipeline={self.name}/release={self.release:%Y%m%d%H%M%S}"
149
+ )
49
150
 
151
+ def save(self) -> None:
152
+ """Save logging data that receive a context data from a pipeline
153
+ execution result.
154
+ """
155
+ if not str2bool(os.getenv("WORKFLOW_LOG_ENABLE_WRITE", "false")):
156
+ return
157
+
158
+ log_file: Path = self.pointer() / f"{self.run_id}.log"
159
+ log_file.write_text(
160
+ json.dumps(
161
+ self.model_dump(),
162
+ default=str,
163
+ ),
164
+ encoding="utf-8",
165
+ )
50
166
 
51
- class JobLog(BaseLog): ...
52
167
 
168
+ class SQLiteLog(BaseLog):
53
169
 
54
- class PipelineLog(BaseLog): ...
170
+ def save(self) -> None:
171
+ raise NotImplementedError("SQLiteLog does not implement yet.")
55
172
 
56
173
 
57
174
  Log = Union[
58
- StageLog,
59
- JobLog,
60
- PipelineLog,
175
+ FileLog,
176
+ SQLiteLog,
61
177
  ]
62
-
63
-
64
- def push_log_memory(log: DictData):
65
- """Push message log to globals log queue."""
66
- print(log)
67
-
68
-
69
- LOGS_REGISTRY = {
70
- "memory": push_log_memory,
71
- }
72
-
73
-
74
- def push_log(log: DictData, mode: str = "memory"):
75
- return LOGS_REGISTRY[mode](log)
76
-
77
-
78
- def save_log():
79
- """Save log that push to queue to target saving"""
ddeutil/workflow/on.py CHANGED
@@ -13,19 +13,14 @@ from pydantic import BaseModel, ConfigDict, Field
13
13
  from pydantic.functional_validators import field_validator, model_validator
14
14
  from typing_extensions import Self
15
15
 
16
- try:
17
- from .__types import DictData, DictStr
18
- from .loader import Loader
19
- from .scheduler import WEEKDAYS, CronJob, CronJobYear, CronRunner
20
- except ImportError:
21
- from ddeutil.workflow.__types import DictData, DictStr
22
- from ddeutil.workflow.loader import Loader
23
- from ddeutil.workflow.scheduler import (
24
- WEEKDAYS,
25
- CronJob,
26
- CronJobYear,
27
- CronRunner,
28
- )
16
+ from .__types import DictData, DictStr, TupleStr
17
+ from .cron import WEEKDAYS, CronJob, CronJobYear, CronRunner
18
+ from .loader import Loader
19
+
20
+ __all__: TupleStr = (
21
+ "On",
22
+ "interval2crontab",
23
+ )
29
24
 
30
25
 
31
26
  def interval2crontab(
@@ -151,13 +146,13 @@ class On(BaseModel):
151
146
  )
152
147
 
153
148
  @model_validator(mode="before")
154
- def __prepare_values(cls, values):
149
+ def __prepare_values(cls, values: DictData) -> DictData:
155
150
  if tz := values.pop("tz", None):
156
151
  values["timezone"] = tz
157
152
  return values
158
153
 
159
154
  @field_validator("tz")
160
- def __validate_tz(cls, value: str):
155
+ def __validate_tz(cls, value: str) -> str:
161
156
  """Validate timezone value that able to initialize with ZoneInfo after
162
157
  it passing to this model in before mode."""
163
158
  try:
@@ -177,6 +172,14 @@ class On(BaseModel):
177
172
  start: datetime = datetime.fromisoformat(start)
178
173
  return self.cronjob.schedule(date=start, tz=self.tz)
179
174
 
175
+ def next(self, start: str | datetime) -> datetime:
176
+ """Return a next datetime from Cron runner object that start with any
177
+ date that given from input.
178
+ """
179
+ if not isinstance(start, datetime):
180
+ start: datetime = datetime.fromisoformat(start)
181
+ return self.cronjob.schedule(date=start, tz=self.tz).next
182
+
180
183
 
181
184
  class AwsOn(On):
182
185
  """Implement On AWS Schedule for AWS Service like AWS Glue."""