ddeutil-workflow 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/on.py ADDED
@@ -0,0 +1,143 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from datetime import datetime
9
+ from typing import Annotated, Literal
10
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field
13
+ from pydantic.functional_validators import field_validator
14
+ from typing_extensions import Self
15
+
16
+ try:
17
+ from .__schedule import WEEKDAYS
18
+ from .__types import DictData, DictStr
19
+ from .loader import CronJob, CronRunner, Loader
20
+ except ImportError:
21
+ from ddeutil.workflow.__scheduler import WEEKDAYS, CronJob, CronRunner
22
+ from ddeutil.workflow.__types import DictData, DictStr
23
+ from ddeutil.workflow.loader import Loader
24
+
25
+
26
+ def interval2crontab(
27
+ interval: Literal["daily", "weekly", "monthly"],
28
+ day: str = "monday",
29
+ time: str = "00:00",
30
+ ) -> str:
31
+ """Return the crontab string that was generated from specific values.
32
+
33
+ :param interval: A interval value that is one of 'daily', 'weekly', or
34
+ 'monthly'.
35
+ :param day: A day value that will be day of week.
36
+ :param time: A time value that passing with format '%H:%M'.
37
+
38
+ Examples:
39
+ >>> interval2crontab(interval='daily', time='01:30')
40
+ '1 30 * * *'
41
+ >>> interval2crontab(interval='weekly', day='friday', time='18:30')
42
+ '18 30 * * 5'
43
+ >>> interval2crontab(interval='monthly', time='00:00')
44
+ '0 0 1 * *'
45
+ """
46
+ h, m = tuple(
47
+ i.lstrip("0") if i != "00" else "0" for i in time.split(":", maxsplit=1)
48
+ )
49
+ return (
50
+ f"{h} {m} {'1' if interval == 'monthly' else '*'} * "
51
+ f"{WEEKDAYS[day[:3].title()] if interval == 'weekly' else '*'}"
52
+ )
53
+
54
+
55
+ class Schedule(BaseModel):
56
+ """Schedule Model
57
+
58
+ See Also:
59
+ * ``generate()`` is the main usecase of this schedule object.
60
+ """
61
+
62
+ model_config = ConfigDict(arbitrary_types_allowed=True)
63
+
64
+ # NOTE: This is fields of the base schedule.
65
+ cronjob: Annotated[CronJob, Field(description="Cron job of this schedule")]
66
+ tz: Annotated[str, Field(description="A timezone string value")] = "Etc/UTC"
67
+ extras: Annotated[
68
+ DictData,
69
+ Field(
70
+ default_factory=dict,
71
+ description="An extras mapping parameters",
72
+ ),
73
+ ]
74
+
75
+ @classmethod
76
+ def from_value(cls, value: DictStr, externals: DictData) -> Self:
77
+ """Constructor from values that will generate crontab by function.
78
+
79
+ :param value: A mapping value that will generate crontab before create
80
+ schedule model.
81
+ :param externals: A extras external parameter that will keep in extras.
82
+ """
83
+ passing: DictStr = {}
84
+ if "timezone" in value:
85
+ passing["tz"] = value.pop("timezone")
86
+ passing["cronjob"] = interval2crontab(
87
+ **{v: value[v] for v in value if v in ("interval", "day", "time")}
88
+ )
89
+ return cls(extras=externals, **passing)
90
+
91
+ @classmethod
92
+ def from_loader(
93
+ cls,
94
+ name: str,
95
+ externals: DictData,
96
+ ) -> Self:
97
+ """Constructor from the name of config that will use loader object for
98
+ getting the data.
99
+
100
+ :param name: A name of config that will getting from loader.
101
+ :param externals: A extras external parameter that will keep in extras.
102
+ """
103
+ loader: Loader = Loader(name, externals=externals)
104
+ # NOTE: Validate the config type match with current connection model
105
+ if loader.type != cls:
106
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
107
+
108
+ if "interval" in loader.data:
109
+ return cls.from_value(loader.data, externals=externals)
110
+ if "cronjob" not in loader.data:
111
+ raise ValueError("Config does not set ``cronjob`` value")
112
+ if "timezone" in loader.data:
113
+ return cls(
114
+ cronjob=loader.data["cronjob"],
115
+ tz=loader.data["timezone"],
116
+ extras=externals,
117
+ )
118
+ return cls(cronjob=loader.data["cronjob"], extras=externals)
119
+
120
+ @field_validator("tz")
121
+ def __validate_tz(cls, value: str):
122
+ """Validate timezone value that able to initialize with ZoneInfo after
123
+ it passing to this model in before mode."""
124
+ try:
125
+ _ = ZoneInfo(value)
126
+ return value
127
+ except ZoneInfoNotFoundError as err:
128
+ raise ValueError(f"Invalid timezone: {value}") from err
129
+
130
+ @field_validator("cronjob", mode="before")
131
+ def __prepare_cronjob(cls, value: str | CronJob) -> CronJob:
132
+ """Prepare crontab value that able to receive with string type."""
133
+ return CronJob(value) if isinstance(value, str) else value
134
+
135
+ def generate(self, start: str | datetime) -> CronRunner:
136
+ """Return Cron runner object."""
137
+ if not isinstance(start, datetime):
138
+ start: datetime = datetime.fromisoformat(start)
139
+ return self.cronjob.schedule(date=(start.astimezone(ZoneInfo(self.tz))))
140
+
141
+
142
+ class AwsSchedule(Schedule):
143
+ """Implement Schedule for AWS Service."""
@@ -20,7 +20,6 @@ from queue import Queue
20
20
  from subprocess import CompletedProcess
21
21
  from typing import Any, Callable, Optional, Union
22
22
 
23
- import msgspec as spec
24
23
  from pydantic import BaseModel, Field
25
24
  from pydantic.functional_validators import model_validator
26
25
  from typing_extensions import Self
@@ -29,11 +28,14 @@ from .__regex import RegexConf
29
28
  from .__types import DictData, DictStr
30
29
  from .exceptions import TaskException
31
30
  from .loader import Loader, map_params
32
- from .utils import Params, make_exec, make_registry
31
+ from .utils import Params, TaskSearch, make_exec, make_registry
33
32
 
34
33
 
35
34
  class BaseStage(BaseModel, ABC):
36
- """Base Stage Model that keep only id and name fields."""
35
+ """Base Stage Model that keep only id and name fields for the stage
36
+ metadata. If you want to implement any custom stage, you can use this class
37
+ to parent and implement ``self.execute()`` method only.
38
+ """
37
39
 
38
40
  id: Optional[str] = Field(
39
41
  default=None,
@@ -49,13 +51,20 @@ class BaseStage(BaseModel, ABC):
49
51
  @abstractmethod
50
52
  def execute(self, params: DictData) -> DictData:
51
53
  """Execute abstraction method that action something by sub-model class.
54
+ This is important method that make this class is able to be the stage.
52
55
 
53
56
  :param params: A parameter data that want to use in this execution.
57
+ :rtype: DictData
54
58
  """
55
59
  raise NotImplementedError("Stage should implement ``execute`` method.")
56
60
 
57
61
  def set_outputs(self, rs: DictData, params: DictData) -> DictData:
58
- """Set an outputs from execution process to an input params."""
62
+ """Set an outputs from execution process to an input params.
63
+
64
+ :param rs: A result data that want to extract to an output key.
65
+ :param params: A context data that want to add output result.
66
+ :rtype: DictData
67
+ """
59
68
  if self.id is None:
60
69
  return params
61
70
 
@@ -67,36 +76,51 @@ class BaseStage(BaseModel, ABC):
67
76
 
68
77
 
69
78
  class EmptyStage(BaseStage):
70
- """Empty stage that is doing nothing and logging the name of stage only."""
79
+ """Empty stage that do nothing (context equal empty stage) and logging the
80
+ name of stage only to stdout.
81
+ """
71
82
 
72
83
  def execute(self, params: DictData) -> DictData:
73
- """Execute for the Empty stage that do only logging out."""
74
- logging.info(f"Execute: {self.name!r}")
84
+ """Execution method for the Empty stage that do only logging out to
85
+ stdout.
86
+
87
+ :param params: A context data that want to add output result. But this
88
+ stage does not pass any output.
89
+ """
90
+ logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
75
91
  return params
76
92
 
77
93
 
78
94
  class ShellStage(BaseStage):
79
- """Shell statement stage."""
95
+ """Shell stage that execute bash script on the current OS. That mean if your
96
+ current OS is Windows, it will running bash in the WSL.
97
+ """
80
98
 
81
- shell: str
82
- env: DictStr = Field(default_factory=dict)
99
+ shell: str = Field(description="A shell statement that want to execute.")
100
+ env: DictStr = Field(
101
+ default_factory=dict,
102
+ description=(
103
+ "An environment variable mapping that want to set before execute "
104
+ "this shell statement."
105
+ ),
106
+ )
83
107
 
84
- @staticmethod
85
108
  @contextlib.contextmanager
86
- def __prepare_shell(shell: str):
109
+ def __prepare_shell(self):
87
110
  """Return context of prepared shell statement that want to execute. This
88
111
  step will write the `.sh` file before giving this file name to context.
89
112
  After that, it will auto delete this file automatic.
90
-
91
- :param shell: A shell statement that want to prepare.
92
113
  """
93
114
  f_name: str = f"{uuid.uuid4()}.sh"
94
115
  f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
95
116
  with open(f"./{f_name}", mode="w", newline="\n") as f:
96
117
  f.write(f"#!/bin/{f_shebang}\n")
97
118
 
119
+ for k in self.env:
120
+ f.write(f"{k}='{self.env[k]}';\n")
121
+
98
122
  # NOTE: make sure that shell script file does not have `\r` char.
99
- f.write(shell.replace("\r\n", "\n"))
123
+ f.write(self.shell.replace("\r\n", "\n"))
100
124
 
101
125
  make_exec(f"./{f_name}")
102
126
 
@@ -126,10 +150,8 @@ class ShellStage(BaseStage):
126
150
  """Execute the Shell & Powershell statement with the Python build-in
127
151
  ``subprocess`` package.
128
152
  """
129
- with self.__prepare_shell(self.shell) as sh:
130
- with open(sh[-1]) as f:
131
- logging.debug(f.read())
132
- logging.info(f"Shell-Execute: {sh}")
153
+ with self.__prepare_shell() as sh:
154
+ logging.info(f"[STAGE]: Shell-Execute: {sh}")
133
155
  rs: CompletedProcess = subprocess.run(
134
156
  sh,
135
157
  shell=False,
@@ -137,10 +159,13 @@ class ShellStage(BaseStage):
137
159
  text=True,
138
160
  )
139
161
  if rs.returncode > 0:
140
- logging.error(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
141
- raise TaskException(
142
- f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}"
162
+ err: str = (
163
+ rs.stderr.encode("utf-8").decode("utf-16")
164
+ if "\\x00" in rs.stderr
165
+ else rs.stderr
143
166
  )
167
+ logging.error(f"{err}\nRunning Statement:\n---\n{self.shell}")
168
+ raise TaskException(f"{err}\nRunning Statement:\n---\n{self.shell}")
144
169
  self.set_outputs(rs, params)
145
170
  return params
146
171
 
@@ -161,7 +186,12 @@ class PyStage(BaseStage):
161
186
  return rs
162
187
 
163
188
  def set_outputs(self, rs: DictData, params: DictData) -> DictData:
164
- """Set outputs to params"""
189
+ """Set an outputs from execution process to an input params.
190
+
191
+ :param rs: A result data that want to extract to an output key.
192
+ :param params: A context data that want to add output result.
193
+ :rtype: DictData
194
+ """
165
195
  # NOTE: skipping set outputs of stage execution when id does not set.
166
196
  if self.id is None:
167
197
  return params
@@ -201,18 +231,6 @@ class PyStage(BaseStage):
201
231
  return params | {k: _globals[k] for k in params if k in _globals}
202
232
 
203
233
 
204
- class TaskSearch(spec.Struct, kw_only=True, tag="task"):
205
- """Task Search Struct that use the `msgspec` for the best performance."""
206
-
207
- path: str
208
- func: str
209
- tag: str
210
-
211
- def to_dict(self) -> DictData:
212
- """Return dict data from struct fields."""
213
- return {f: getattr(self, f) for f in self.__struct_fields__}
214
-
215
-
216
234
  class TaskStage(BaseStage):
217
235
  """Task executor stage that running the Python function."""
218
236
 
@@ -309,11 +327,14 @@ class Strategy(BaseModel):
309
327
 
310
328
 
311
329
  class Job(BaseModel):
312
- """Job Model"""
330
+ """Job Model that is able to call a group of stages."""
313
331
 
314
332
  runs_on: Optional[str] = Field(default=None)
315
333
  stages: list[Stage] = Field(default_factory=list)
316
- needs: list[str] = Field(default_factory=list)
334
+ needs: list[str] = Field(
335
+ default_factory=list,
336
+ description="A list of the job ID that want to run before this job.",
337
+ )
317
338
  strategy: Strategy = Field(default_factory=Strategy)
318
339
 
319
340
  @model_validator(mode="before")
@@ -388,11 +409,14 @@ class Pipeline(BaseModel):
388
409
  coding line to execute it.
389
410
  """
390
411
 
412
+ desc: Optional[str] = Field(default=None)
391
413
  params: dict[str, Params] = Field(default_factory=dict)
414
+ on: dict[str, DictStr] = Field(default_factory=dict)
392
415
  jobs: dict[str, Job]
393
416
 
394
417
  @model_validator(mode="before")
395
418
  def __prepare_params(cls, values: DictData) -> DictData:
419
+ # NOTE: Prepare params type if it passing with only type value.
396
420
  if params := values.pop("params", {}):
397
421
  values["params"] = {
398
422
  p: (
@@ -408,8 +432,9 @@ class Pipeline(BaseModel):
408
432
  def from_loader(
409
433
  cls,
410
434
  name: str,
411
- externals: Optional[DictData] = None,
435
+ externals: DictData | None = None,
412
436
  ) -> Self:
437
+ """Create Pipeline instance from the Loader object."""
413
438
  loader: Loader = Loader(name, externals=(externals or {}))
414
439
  if "jobs" not in loader.data:
415
440
  raise ValueError("Config does not set ``jobs`` value")
@@ -444,8 +469,10 @@ class Pipeline(BaseModel):
444
469
  included in the pipeline.
445
470
 
446
471
  :param params: An input parameters that use on pipeline execution.
447
- :param time_out: A time out second value for limit time of this
448
- execution.
472
+ :param time_out: A time out in second unit that use for limit time of
473
+ this pipeline execution.
474
+
475
+ ---
449
476
 
450
477
  See Also:
451
478
 
@@ -469,6 +496,7 @@ class Pipeline(BaseModel):
469
496
  if any(p not in params for p in self.params if self.params[p].required):
470
497
  raise ValueError("Required parameter does not pass")
471
498
 
499
+ # NOTE: mapping type of param before adding it to params variable.
472
500
  params: DictData = {
473
501
  "params": (
474
502
  params
@@ -481,6 +509,8 @@ class Pipeline(BaseModel):
481
509
  "jobs": {},
482
510
  }
483
511
 
512
+ # NOTE: create a job queue that keep the job that want to running after
513
+ # it dependency condition.
484
514
  jq = Queue()
485
515
  for job_id in self.jobs:
486
516
  jq.put(job_id)
@@ -507,6 +537,7 @@ class Pipeline(BaseModel):
507
537
  #
508
538
  if any(params["jobs"].get(need) for need in job.needs):
509
539
  jq.put(job_id)
540
+
510
541
  job.execute(params=params)
511
542
  params["jobs"][job_id] = {
512
543
  "stages": params.pop("stages", {}),
@@ -3,4 +3,4 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
- from ._polars import *
6
+ from .dummy import *
@@ -0,0 +1,52 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ from ddeutil.workflow.utils import tag
11
+
12
+
13
+ @tag("polars-dir", name="el-csv-to-parquet")
14
+ def dummy_task_1(
15
+ source: str,
16
+ sink: str,
17
+ conversion: dict[str, Any] | None = None,
18
+ ) -> dict[str, int]:
19
+ """Extract Load data from CSV to Parquet file.
20
+
21
+ :param source:
22
+ :param sink:
23
+ :param conversion:
24
+ """
25
+ print("Start EL for CSV to Parquet with Polars Engine")
26
+ print("---")
27
+ print(f"Reading data from {source}")
28
+
29
+ conversion: dict[str, Any] = conversion or {}
30
+ if conversion:
31
+ print("Start Schema Conversion ...")
32
+
33
+ print(f"Writing data to {sink}")
34
+ return {"records": 1}
35
+
36
+
37
+ @tag("polars-dir-scan", name="el-csv-to-parquet")
38
+ def dummy_task_2(
39
+ source: str,
40
+ sink: str,
41
+ conversion: dict[str, Any] | None = None,
42
+ ) -> dict[str, int]:
43
+ print("Start EL for CSV to Parquet with Polars Engine")
44
+ print("---")
45
+ print(f"Reading data from {source}")
46
+
47
+ conversion: dict[str, Any] = conversion or {}
48
+ if conversion:
49
+ print("Start Schema Conversion ...")
50
+
51
+ print(f"Writing data to {sink}")
52
+ return {"records": 1}
ddeutil/workflow/utils.py CHANGED
@@ -14,12 +14,15 @@ from importlib import import_module
14
14
  from pathlib import Path
15
15
  from typing import Any, Callable, Literal, Optional, Protocol, Union
16
16
 
17
+ import msgspec as spec
17
18
  from ddeutil.core import lazy
18
19
  from ddeutil.io.models.lineage import dt_now
19
20
  from pydantic import BaseModel, Field
20
21
  from pydantic.functional_validators import model_validator
21
22
  from typing_extensions import Self
22
23
 
24
+ from .__types import DictData
25
+
23
26
 
24
27
  class TagFunc(Protocol):
25
28
  """Tag Function Protocol"""
@@ -30,16 +33,16 @@ class TagFunc(Protocol):
30
33
  def __call__(self, *args, **kwargs): ...
31
34
 
32
35
 
33
- def tag(tag_value: str, name: str | None = None):
36
+ def tag(value: str, name: str | None = None):
34
37
  """Tag decorator function that set function attributes, ``tag`` and ``name``
35
38
  for making registries variable.
36
39
 
37
- :param: tag_value: A tag value for make different use-case of a function.
40
+ :param: value: A tag value for make different use-case of a function.
38
41
  :param: name: A name that keeping in registries.
39
42
  """
40
43
 
41
- def func_internal(func: TagFunc):
42
- func.tag = tag_value
44
+ def func_internal(func: callable) -> TagFunc:
45
+ func.tag = value
43
46
  func.name = name or func.__name__.replace("_", "-")
44
47
 
45
48
  @wraps(func)
@@ -52,7 +55,10 @@ def tag(tag_value: str, name: str | None = None):
52
55
 
53
56
 
54
57
  def make_registry(module: str) -> dict[str, dict[str, Callable[[], TagFunc]]]:
55
- """Return registries of all functions that able to called with task."""
58
+ """Return registries of all functions that able to called with task.
59
+
60
+ :param module: A module prefix that want to import registry.
61
+ """
56
62
  rs: dict[str, dict[str, Callable[[], Callable]]] = {}
57
63
  for fstr, func in inspect.getmembers(
58
64
  import_module(module), inspect.isfunction
@@ -183,5 +189,20 @@ Params = Union[
183
189
 
184
190
 
185
191
  def make_exec(path: str | Path):
192
+ """Change mode of file to be executable file."""
186
193
  f: Path = Path(path) if isinstance(path, str) else path
187
194
  f.chmod(f.stat().st_mode | stat.S_IEXEC)
195
+
196
+
197
+ class TaskSearch(spec.Struct, kw_only=True, tag="task"):
198
+ """Task Search Struct that use the `msgspec` for the best performance data
199
+ serialize.
200
+ """
201
+
202
+ path: str
203
+ func: str
204
+ tag: str
205
+
206
+ def to_dict(self) -> DictData:
207
+ """Return dict data from struct fields."""
208
+ return {f: getattr(self, f) for f in self.__struct_fields__}