ddeutil-workflow 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/on.py ADDED
@@ -0,0 +1,143 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from datetime import datetime
9
+ from typing import Annotated, Literal
10
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field
13
+ from pydantic.functional_validators import field_validator
14
+ from typing_extensions import Self
15
+
16
+ try:
17
+ from .__schedule import WEEKDAYS
18
+ from .__types import DictData, DictStr
19
+ from .loader import CronJob, CronRunner, Loader
20
+ except ImportError:
21
+ from ddeutil.workflow.__scheduler import WEEKDAYS, CronJob, CronRunner
22
+ from ddeutil.workflow.__types import DictData, DictStr
23
+ from ddeutil.workflow.loader import Loader
24
+
25
+
26
+ def interval2crontab(
27
+ interval: Literal["daily", "weekly", "monthly"],
28
+ day: str = "monday",
29
+ time: str = "00:00",
30
+ ) -> str:
31
+ """Return the crontab string that was generated from specific values.
32
+
33
+ :param interval: A interval value that is one of 'daily', 'weekly', or
34
+ 'monthly'.
35
+ :param day: A day value that will be day of week.
36
+ :param time: A time value that passing with format '%H:%M'.
37
+
38
+ Examples:
39
+ >>> interval2crontab(interval='daily', time='01:30')
40
+ '1 30 * * *'
41
+ >>> interval2crontab(interval='weekly', day='friday', time='18:30')
42
+ '18 30 * * 5'
43
+ >>> interval2crontab(interval='monthly', time='00:00')
44
+ '0 0 1 * *'
45
+ """
46
+ h, m = tuple(
47
+ i.lstrip("0") if i != "00" else "0" for i in time.split(":", maxsplit=1)
48
+ )
49
+ return (
50
+ f"{h} {m} {'1' if interval == 'monthly' else '*'} * "
51
+ f"{WEEKDAYS[day[:3].title()] if interval == 'weekly' else '*'}"
52
+ )
53
+
54
+
55
+ class Schedule(BaseModel):
56
+ """Schedule Model
57
+
58
+ See Also:
59
+ * ``generate()`` is the main usecase of this schedule object.
60
+ """
61
+
62
+ model_config = ConfigDict(arbitrary_types_allowed=True)
63
+
64
+ # NOTE: This is fields of the base schedule.
65
+ cronjob: Annotated[CronJob, Field(description="Cron job of this schedule")]
66
+ tz: Annotated[str, Field(description="A timezone string value")] = "Etc/UTC"
67
+ extras: Annotated[
68
+ DictData,
69
+ Field(
70
+ default_factory=dict,
71
+ description="An extras mapping parameters",
72
+ ),
73
+ ]
74
+
75
+ @classmethod
76
+ def from_value(cls, value: DictStr, externals: DictData) -> Self:
77
+ """Constructor from values that will generate crontab by function.
78
+
79
+ :param value: A mapping value that will generate crontab before create
80
+ schedule model.
81
+ :param externals: A extras external parameter that will keep in extras.
82
+ """
83
+ passing: DictStr = {}
84
+ if "timezone" in value:
85
+ passing["tz"] = value.pop("timezone")
86
+ passing["cronjob"] = interval2crontab(
87
+ **{v: value[v] for v in value if v in ("interval", "day", "time")}
88
+ )
89
+ return cls(extras=externals, **passing)
90
+
91
+ @classmethod
92
+ def from_loader(
93
+ cls,
94
+ name: str,
95
+ externals: DictData,
96
+ ) -> Self:
97
+ """Constructor from the name of config that will use loader object for
98
+ getting the data.
99
+
100
+ :param name: A name of config that will getting from loader.
101
+ :param externals: A extras external parameter that will keep in extras.
102
+ """
103
+ loader: Loader = Loader(name, externals=externals)
104
+ # NOTE: Validate the config type match with current connection model
105
+ if loader.type != cls:
106
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
107
+
108
+ if "interval" in loader.data:
109
+ return cls.from_value(loader.data, externals=externals)
110
+ if "cronjob" not in loader.data:
111
+ raise ValueError("Config does not set ``cronjob`` value")
112
+ if "timezone" in loader.data:
113
+ return cls(
114
+ cronjob=loader.data["cronjob"],
115
+ tz=loader.data["timezone"],
116
+ extras=externals,
117
+ )
118
+ return cls(cronjob=loader.data["cronjob"], extras=externals)
119
+
120
+ @field_validator("tz")
121
+ def __validate_tz(cls, value: str):
122
+ """Validate timezone value that able to initialize with ZoneInfo after
123
+ it passing to this model in before mode."""
124
+ try:
125
+ _ = ZoneInfo(value)
126
+ return value
127
+ except ZoneInfoNotFoundError as err:
128
+ raise ValueError(f"Invalid timezone: {value}") from err
129
+
130
+ @field_validator("cronjob", mode="before")
131
+ def __prepare_cronjob(cls, value: str | CronJob) -> CronJob:
132
+ """Prepare crontab value that able to receive with string type."""
133
+ return CronJob(value) if isinstance(value, str) else value
134
+
135
+ def generate(self, start: str | datetime) -> CronRunner:
136
+ """Return Cron runner object."""
137
+ if not isinstance(start, datetime):
138
+ start: datetime = datetime.fromisoformat(start)
139
+ return self.cronjob.schedule(date=(start.astimezone(ZoneInfo(self.tz))))
140
+
141
+
142
+ class AwsSchedule(Schedule):
143
+ """Implement Schedule for AWS Service."""
@@ -5,18 +5,21 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
+ import contextlib
8
9
  import inspect
9
10
  import itertools
10
11
  import logging
11
12
  import subprocess
13
+ import sys
12
14
  import time
15
+ import uuid
13
16
  from abc import ABC, abstractmethod
14
17
  from inspect import Parameter
18
+ from pathlib import Path
15
19
  from queue import Queue
16
20
  from subprocess import CompletedProcess
17
21
  from typing import Any, Callable, Optional, Union
18
22
 
19
- import msgspec as spec
20
23
  from pydantic import BaseModel, Field
21
24
  from pydantic.functional_validators import model_validator
22
25
  from typing_extensions import Self
@@ -25,11 +28,14 @@ from .__regex import RegexConf
25
28
  from .__types import DictData, DictStr
26
29
  from .exceptions import TaskException
27
30
  from .loader import Loader, map_params
28
- from .utils import Params, make_registry
31
+ from .utils import Params, TaskSearch, make_exec, make_registry
29
32
 
30
33
 
31
34
  class BaseStage(BaseModel, ABC):
32
- """Base Stage Model that keep only id and name fields."""
35
+ """Base Stage Model that keep only id and name fields for the stage
36
+ metadata. If you want to implement any custom stage, you can use this class
37
+ to parent and implement ``self.execute()`` method only.
38
+ """
33
39
 
34
40
  id: Optional[str] = Field(
35
41
  default=None,
@@ -45,13 +51,20 @@ class BaseStage(BaseModel, ABC):
45
51
  @abstractmethod
46
52
  def execute(self, params: DictData) -> DictData:
47
53
  """Execute abstraction method that action something by sub-model class.
54
+ This is important method that make this class is able to be the stage.
48
55
 
49
56
  :param params: A parameter data that want to use in this execution.
57
+ :rtype: DictData
50
58
  """
51
59
  raise NotImplementedError("Stage should implement ``execute`` method.")
52
60
 
53
61
  def set_outputs(self, rs: DictData, params: DictData) -> DictData:
54
- """Set an outputs from execution process to an input params."""
62
+ """Set an outputs from execution process to an input params.
63
+
64
+ :param rs: A result data that want to extract to an output key.
65
+ :param params: A context data that want to add output result.
66
+ :rtype: DictData
67
+ """
55
68
  if self.id is None:
56
69
  return params
57
70
 
@@ -63,24 +76,57 @@ class BaseStage(BaseModel, ABC):
63
76
 
64
77
 
65
78
  class EmptyStage(BaseStage):
66
- """Empty stage that is doing nothing and logging the name of stage only."""
79
+ """Empty stage that do nothing (context equal empty stage) and logging the
80
+ name of stage only to stdout.
81
+ """
67
82
 
68
83
  def execute(self, params: DictData) -> DictData:
69
- """Execute for the Empty stage that do only logging out."""
70
- logging.info(f"Execute: {self.name!r}")
84
+ """Execution method for the Empty stage that do only logging out to
85
+ stdout.
86
+
87
+ :param params: A context data that want to add output result. But this
88
+ stage does not pass any output.
89
+ """
90
+ logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
71
91
  return params
72
92
 
73
93
 
74
94
  class ShellStage(BaseStage):
75
- """Shell statement stage."""
95
+ """Shell stage that execute bash script on the current OS. That mean if your
96
+ current OS is Windows, it will running bash in the WSL.
97
+ """
76
98
 
77
- shell: str
78
- env: DictStr = Field(default_factory=dict)
99
+ shell: str = Field(description="A shell statement that want to execute.")
100
+ env: DictStr = Field(
101
+ default_factory=dict,
102
+ description=(
103
+ "An environment variable mapping that want to set before execute "
104
+ "this shell statement."
105
+ ),
106
+ )
79
107
 
80
- @staticmethod
81
- def __prepare_shell(shell: str):
82
- """Prepare shell statement string that include newline"""
83
- return shell.replace("\n", ";")
108
+ @contextlib.contextmanager
109
+ def __prepare_shell(self):
110
+ """Return context of prepared shell statement that want to execute. This
111
+ step will write the `.sh` file before giving this file name to context.
112
+ After that, it will auto delete this file automatic.
113
+ """
114
+ f_name: str = f"{uuid.uuid4()}.sh"
115
+ f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
116
+ with open(f"./{f_name}", mode="w", newline="\n") as f:
117
+ f.write(f"#!/bin/{f_shebang}\n")
118
+
119
+ for k in self.env:
120
+ f.write(f"{k}='{self.env[k]}';\n")
121
+
122
+ # NOTE: make sure that shell script file does not have `\r` char.
123
+ f.write(self.shell.replace("\r\n", "\n"))
124
+
125
+ make_exec(f"./{f_name}")
126
+
127
+ yield [f_shebang, f_name]
128
+
129
+ Path(f_name).unlink()
84
130
 
85
131
  def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
86
132
  """Set outputs to params"""
@@ -95,8 +141,7 @@ class ShellStage(BaseStage):
95
141
  # NOTE: The output will fileter unnecessary keys from ``_locals``.
96
142
  "outputs": {
97
143
  "return_code": rs.returncode,
98
- "stdout": rs.stdout,
99
- "stderr": rs.stderr,
144
+ "stdout": rs.stdout.rstrip("\n"),
100
145
  },
101
146
  }
102
147
  return params
@@ -105,19 +150,22 @@ class ShellStage(BaseStage):
105
150
  """Execute the Shell & Powershell statement with the Python build-in
106
151
  ``subprocess`` package.
107
152
  """
108
- rs: CompletedProcess = subprocess.run(
109
- self.__prepare_shell(self.shell),
110
- capture_output=True,
111
- text=True,
112
- shell=True,
113
- )
153
+ with self.__prepare_shell() as sh:
154
+ logging.info(f"[STAGE]: Shell-Execute: {sh}")
155
+ rs: CompletedProcess = subprocess.run(
156
+ sh,
157
+ shell=False,
158
+ capture_output=True,
159
+ text=True,
160
+ )
114
161
  if rs.returncode > 0:
115
- print(f"{rs.stderr}\nRunning Statement:\n---\n{self.shell}")
116
- # FIXME: raise err for this execution.
117
- # raise TaskException(
118
- # f"{rs.stderr}\nRunning Statement:\n---\n"
119
- # f"{self.shell}"
120
- # )
162
+ err: str = (
163
+ rs.stderr.encode("utf-8").decode("utf-16")
164
+ if "\\x00" in rs.stderr
165
+ else rs.stderr
166
+ )
167
+ logging.error(f"{err}\nRunning Statement:\n---\n{self.shell}")
168
+ raise TaskException(f"{err}\nRunning Statement:\n---\n{self.shell}")
121
169
  self.set_outputs(rs, params)
122
170
  return params
123
171
 
@@ -138,7 +186,12 @@ class PyStage(BaseStage):
138
186
  return rs
139
187
 
140
188
  def set_outputs(self, rs: DictData, params: DictData) -> DictData:
141
- """Set outputs to params"""
189
+ """Set an outputs from execution process to an input params.
190
+
191
+ :param rs: A result data that want to extract to an output key.
192
+ :param params: A context data that want to add output result.
193
+ :rtype: DictData
194
+ """
142
195
  # NOTE: skipping set outputs of stage execution when id does not set.
143
196
  if self.id is None:
144
197
  return params
@@ -178,18 +231,6 @@ class PyStage(BaseStage):
178
231
  return params | {k: _globals[k] for k in params if k in _globals}
179
232
 
180
233
 
181
- class TaskSearch(spec.Struct, kw_only=True, tag="task"):
182
- """Task Search Struct that use the `msgspec` for the best performance."""
183
-
184
- path: str
185
- func: str
186
- tag: str
187
-
188
- def to_dict(self) -> DictData:
189
- """Return dict data from struct fields."""
190
- return {f: getattr(self, f) for f in self.__struct_fields__}
191
-
192
-
193
234
  class TaskStage(BaseStage):
194
235
  """Task executor stage that running the Python function."""
195
236
 
@@ -286,11 +327,14 @@ class Strategy(BaseModel):
286
327
 
287
328
 
288
329
  class Job(BaseModel):
289
- """Job Model"""
330
+ """Job Model that is able to call a group of stages."""
290
331
 
291
332
  runs_on: Optional[str] = Field(default=None)
292
333
  stages: list[Stage] = Field(default_factory=list)
293
- needs: list[str] = Field(default_factory=list)
334
+ needs: list[str] = Field(
335
+ default_factory=list,
336
+ description="A list of the job ID that want to run before this job.",
337
+ )
294
338
  strategy: Strategy = Field(default_factory=Strategy)
295
339
 
296
340
  @model_validator(mode="before")
@@ -365,11 +409,14 @@ class Pipeline(BaseModel):
365
409
  coding line to execute it.
366
410
  """
367
411
 
412
+ desc: Optional[str] = Field(default=None)
368
413
  params: dict[str, Params] = Field(default_factory=dict)
414
+ on: dict[str, DictStr] = Field(default_factory=dict)
369
415
  jobs: dict[str, Job]
370
416
 
371
417
  @model_validator(mode="before")
372
418
  def __prepare_params(cls, values: DictData) -> DictData:
419
+ # NOTE: Prepare params type if it passing with only type value.
373
420
  if params := values.pop("params", {}):
374
421
  values["params"] = {
375
422
  p: (
@@ -385,8 +432,9 @@ class Pipeline(BaseModel):
385
432
  def from_loader(
386
433
  cls,
387
434
  name: str,
388
- externals: Optional[DictData] = None,
435
+ externals: DictData | None = None,
389
436
  ) -> Self:
437
+ """Create Pipeline instance from the Loader object."""
390
438
  loader: Loader = Loader(name, externals=(externals or {}))
391
439
  if "jobs" not in loader.data:
392
440
  raise ValueError("Config does not set ``jobs`` value")
@@ -421,8 +469,10 @@ class Pipeline(BaseModel):
421
469
  included in the pipeline.
422
470
 
423
471
  :param params: An input parameters that use on pipeline execution.
424
- :param time_out: A time out second value for limit time of this
425
- execution.
472
+ :param time_out: A time out in second unit that use for limit time of
473
+ this pipeline execution.
474
+
475
+ ---
426
476
 
427
477
  See Also:
428
478
 
@@ -446,6 +496,7 @@ class Pipeline(BaseModel):
446
496
  if any(p not in params for p in self.params if self.params[p].required):
447
497
  raise ValueError("Required parameter does not pass")
448
498
 
499
+ # NOTE: mapping type of param before adding it to params variable.
449
500
  params: DictData = {
450
501
  "params": (
451
502
  params
@@ -458,6 +509,8 @@ class Pipeline(BaseModel):
458
509
  "jobs": {},
459
510
  }
460
511
 
512
+ # NOTE: create a job queue that keep the job that want to running after
513
+ # it dependency condition.
461
514
  jq = Queue()
462
515
  for job_id in self.jobs:
463
516
  jq.put(job_id)
@@ -472,6 +525,7 @@ class Pipeline(BaseModel):
472
525
  job_id: str = jq.get()
473
526
  logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
474
527
  job: Job = self.jobs[job_id]
528
+
475
529
  # TODO: Condition on ``needs`` of this job was set. It should create
476
530
  # multithreading process on this step.
477
531
  # But, I don't know how to handle changes params between each job
@@ -480,8 +534,10 @@ class Pipeline(BaseModel):
480
534
  # >>> import multiprocessing
481
535
  # >>> with multiprocessing.Pool(processes=3) as pool:
482
536
  # ... results = pool.starmap(merge_names, ('', '', ...))
537
+ #
483
538
  if any(params["jobs"].get(need) for need in job.needs):
484
539
  jq.put(job_id)
540
+
485
541
  job.execute(params=params)
486
542
  params["jobs"][job_id] = {
487
543
  "stages": params.pop("stages", {}),
@@ -3,4 +3,4 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
- from ._polars import *
6
+ from .dummy import *
@@ -0,0 +1,52 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ from ddeutil.workflow.utils import tag
11
+
12
+
13
+ @tag("polars-dir", name="el-csv-to-parquet")
14
+ def dummy_task_1(
15
+ source: str,
16
+ sink: str,
17
+ conversion: dict[str, Any] | None = None,
18
+ ) -> dict[str, int]:
19
+ """Extract Load data from CSV to Parquet file.
20
+
21
+ :param source:
22
+ :param sink:
23
+ :param conversion:
24
+ """
25
+ print("Start EL for CSV to Parquet with Polars Engine")
26
+ print("---")
27
+ print(f"Reading data from {source}")
28
+
29
+ conversion: dict[str, Any] = conversion or {}
30
+ if conversion:
31
+ print("Start Schema Conversion ...")
32
+
33
+ print(f"Writing data to {sink}")
34
+ return {"records": 1}
35
+
36
+
37
+ @tag("polars-dir-scan", name="el-csv-to-parquet")
38
+ def dummy_task_2(
39
+ source: str,
40
+ sink: str,
41
+ conversion: dict[str, Any] | None = None,
42
+ ) -> dict[str, int]:
43
+ print("Start EL for CSV to Parquet with Polars Engine")
44
+ print("---")
45
+ print(f"Reading data from {source}")
46
+
47
+ conversion: dict[str, Any] = conversion or {}
48
+ if conversion:
49
+ print("Start Schema Conversion ...")
50
+
51
+ print(f"Writing data to {sink}")
52
+ return {"records": 1}
ddeutil/workflow/utils.py CHANGED
@@ -6,18 +6,23 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import inspect
9
+ import stat
9
10
  from abc import ABC, abstractmethod
10
11
  from datetime import date, datetime
11
12
  from functools import wraps
12
13
  from importlib import import_module
14
+ from pathlib import Path
13
15
  from typing import Any, Callable, Literal, Optional, Protocol, Union
14
16
 
17
+ import msgspec as spec
15
18
  from ddeutil.core import lazy
16
19
  from ddeutil.io.models.lineage import dt_now
17
20
  from pydantic import BaseModel, Field
18
21
  from pydantic.functional_validators import model_validator
19
22
  from typing_extensions import Self
20
23
 
24
+ from .__types import DictData
25
+
21
26
 
22
27
  class TagFunc(Protocol):
23
28
  """Tag Function Protocol"""
@@ -28,16 +33,16 @@ class TagFunc(Protocol):
28
33
  def __call__(self, *args, **kwargs): ...
29
34
 
30
35
 
31
- def tag(tag_value: str, name: str | None = None):
36
+ def tag(value: str, name: str | None = None):
32
37
  """Tag decorator function that set function attributes, ``tag`` and ``name``
33
38
  for making registries variable.
34
39
 
35
- :param: tag_value: A tag value for make different use-case of a function.
40
+ :param: value: A tag value for make different use-case of a function.
36
41
  :param: name: A name that keeping in registries.
37
42
  """
38
43
 
39
- def func_internal(func: TagFunc):
40
- func.tag = tag_value
44
+ def func_internal(func: callable) -> TagFunc:
45
+ func.tag = value
41
46
  func.name = name or func.__name__.replace("_", "-")
42
47
 
43
48
  @wraps(func)
@@ -50,7 +55,10 @@ def tag(tag_value: str, name: str | None = None):
50
55
 
51
56
 
52
57
  def make_registry(module: str) -> dict[str, dict[str, Callable[[], TagFunc]]]:
53
- """Return registries of all functions that able to called with task."""
58
+ """Return registries of all functions that able to called with task.
59
+
60
+ :param module: A module prefix that want to import registry.
61
+ """
54
62
  rs: dict[str, dict[str, Callable[[], Callable]]] = {}
55
63
  for fstr, func in inspect.getmembers(
56
64
  import_module(module), inspect.isfunction
@@ -178,3 +186,23 @@ Params = Union[
178
186
  DatetimeParams,
179
187
  StrParams,
180
188
  ]
189
+
190
+
191
+ def make_exec(path: str | Path):
192
+ """Change mode of file to be executable file."""
193
+ f: Path = Path(path) if isinstance(path, str) else path
194
+ f.chmod(f.stat().st_mode | stat.S_IEXEC)
195
+
196
+
197
+ class TaskSearch(spec.Struct, kw_only=True, tag="task"):
198
+ """Task Search Struct that use the `msgspec` for the best performance data
199
+ serialize.
200
+ """
201
+
202
+ path: str
203
+ func: str
204
+ tag: str
205
+
206
+ def to_dict(self) -> DictData:
207
+ """Return dict data from struct fields."""
208
+ return {f: getattr(self, f) for f in self.__struct_fields__}