ddeutil-workflow 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from datetime import datetime
9
+ from typing import Annotated
10
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
11
+
12
+ from ddeutil.io import Params
13
+ from ddeutil.workflow.vendors.__schedule import CronJob, CronRunner
14
+ from pydantic import BaseModel, ConfigDict, Field
15
+ from pydantic.functional_validators import field_validator
16
+ from typing_extensions import Self
17
+
18
+ from .__types import DictData
19
+ from .exceptions import ScdlArgumentError
20
+ from .loader import SimLoad
21
+
22
+
23
+ class BaseScdl(BaseModel):
24
+ """Base Scdl (Schedule) Model"""
25
+
26
+ model_config = ConfigDict(arbitrary_types_allowed=True)
27
+
28
+ # NOTE: This is fields
29
+ cronjob: Annotated[CronJob, Field(description="Cron job of this schedule")]
30
+ tz: Annotated[str, Field(description="Timezone")] = "utc"
31
+ extras: Annotated[
32
+ DictData,
33
+ Field(default_factory=dict, description="Extras mapping of parameters"),
34
+ ]
35
+
36
+ @classmethod
37
+ def from_loader(
38
+ cls,
39
+ name: str,
40
+ params: Params,
41
+ externals: DictData,
42
+ ) -> Self:
43
+ loader: SimLoad = SimLoad(name, params=params, externals=externals)
44
+ if "cronjob" not in loader.data:
45
+ raise ScdlArgumentError(
46
+ "cronjob", "Config does not set ``cronjob``"
47
+ )
48
+ return cls(cronjob=loader.data["cronjob"], extras=externals)
49
+
50
+ @field_validator("tz")
51
+ def __validate_tz(cls, value: str):
52
+ try:
53
+ _ = ZoneInfo(value)
54
+ return value
55
+ except ZoneInfoNotFoundError as err:
56
+ raise ValueError(f"Invalid timezone: {value}") from err
57
+
58
+ @field_validator("cronjob", mode="before")
59
+ def __prepare_cronjob(cls, value: str | CronJob) -> CronJob:
60
+ return CronJob(value) if isinstance(value, str) else value
61
+
62
+ def generate(self, start: str | datetime) -> CronRunner:
63
+ """Return Cron runner object."""
64
+ if not isinstance(start, datetime):
65
+ start: datetime = datetime.fromisoformat(start)
66
+ return self.cronjob.schedule(date=(start.astimezone(ZoneInfo(self.tz))))
67
+
68
+
69
+ class Scdl(BaseScdl):
70
+ """Scdl (Schedule) Model.
71
+
72
+ See Also:
73
+ * ``generate()`` is the main usecase of this schedule object.
74
+ """
75
+
76
+
77
+ class ScdlBkk(Scdl):
78
+ """Asia Bangkok Scdl (Schedule) timezone Model.
79
+
80
+ This model use for change timezone from utc to Asia/Bangkok
81
+ """
82
+
83
+ tz: Annotated[str, Field(description="Timezone")] = "Asia/Bangkok"
84
+
85
+
86
+ class AwsScdl(BaseScdl):
87
+ """Implement Schedule for AWS Service."""
@@ -0,0 +1,10 @@
1
+ from typing import Any
2
+
3
+ from ddeutil.core import lazy
4
+
5
+ registries: dict[str, Any] = {
6
+ "el-csv-to-parquet": {
7
+ "polars": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet"),
8
+ "polars-dir": lazy("ddeutil.workflow.tasks._polars.csv_to_parquet_dir"),
9
+ },
10
+ }
@@ -0,0 +1,41 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+ from uuid import uuid4
10
+
11
+ import polars as pl
12
+ import pyarrow.parquet as pq
13
+ from ddeutil.workflow.dataset import PolarsCsv, PolarsParq
14
+
15
+
16
+ def csv_to_parquet_dir(
17
+ source: str,
18
+ sink: str,
19
+ conversion: dict[str, Any] | None = None,
20
+ ):
21
+ print("Start EL for CSV to Parquet with Polars Engine")
22
+ print("---")
23
+ # STEP 01: Read the source data to Polars.
24
+ src_dataset: PolarsCsv = PolarsCsv.from_loader(name=source, externals={})
25
+ src_df = src_dataset.load()
26
+ print(src_df)
27
+
28
+ # STEP 02: Schema conversion on Polars DataFrame.
29
+ conversion: dict[str, Any] = conversion or {}
30
+ if conversion:
31
+ print("Start Schema Conversion ...")
32
+
33
+ # STEP 03: Write data to parquet file format.
34
+ sink = PolarsParq.from_loader(name=sink, externals={})
35
+ pq.write_to_dataset(
36
+ table=src_df.to_arrow(),
37
+ root_path=f"{sink.conn.endpoint}/{sink.object}",
38
+ compression="snappy",
39
+ basename_template=f"{sink.object}-{uuid4().hex}-{{i}}.snappy.parquet",
40
+ )
41
+ return {"records": src_df.select(pl.len()).item()}
File without changes
@@ -0,0 +1,33 @@
1
+ import datetime as dt
2
+ from typing import Any
3
+
4
+
5
+ def datetime(value: Any) -> dt.datetime:
6
+ if isinstance(value, dt.datetime):
7
+ return value
8
+ elif isinstance(value, dt.date):
9
+ return dt.datetime(value.year, value.month, value.day)
10
+ if value is None:
11
+ return dt.datetime.now(dt.timezone.utc)
12
+ elif not isinstance(value, str):
13
+ raise ValueError(
14
+ f"Value that want to convert to datetime does not support for "
15
+ f"type: {type(value)}"
16
+ )
17
+ return dt.datetime.fromisoformat(value)
18
+
19
+
20
+ def string(value: Any) -> str:
21
+ return str(value)
22
+
23
+
24
+ def integer(value: Any) -> int:
25
+ if not isinstance(value, int):
26
+ try:
27
+ return int(str(value))
28
+ except TypeError as err:
29
+ raise ValueError(
30
+ f"Value that want to convert to integer does not support for "
31
+ f"type: {type(value)}"
32
+ ) from err
33
+ return value
@@ -0,0 +1,2 @@
1
+ def conn(value):
2
+ return value
@@ -0,0 +1,333 @@
1
+ """
2
+ Reference:
3
+ * https://github.com/LarsHill/metadict
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import contextlib
9
+ import copy
10
+ import keyword
11
+ import re
12
+ import warnings
13
+ from collections.abc import (
14
+ Iterable,
15
+ Iterator,
16
+ KeysView,
17
+ Mapping,
18
+ MutableMapping,
19
+ )
20
+ from re import Pattern
21
+ from typing import (
22
+ Any,
23
+ Optional,
24
+ TypeVar,
25
+ )
26
+
27
+ from typing_extensions import Self
28
+
29
+
30
+ def _warning(
31
+ message,
32
+ category=UserWarning,
33
+ filename="",
34
+ lineno=-1,
35
+ file=None,
36
+ line="",
37
+ ):
38
+ """Monkey patch `warnings` to show UserWarning without the line information
39
+ of warnings call.
40
+ """
41
+ msg = warnings.WarningMessage(
42
+ message, category, filename, lineno, file, line
43
+ )
44
+ print(f"{msg.category.__name__}: {msg.message}")
45
+
46
+
47
+ warnings.showwarning = _warning
48
+
49
+ KT = TypeVar("KT")
50
+ VT = TypeVar("VT")
51
+
52
+ # NOTE: regex to enforce python variable/attribute syntax
53
+ ALLOWED_VAR_SYNTAX: Pattern = re.compile(r"[a-zA-Z_]\w*")
54
+
55
+
56
+ def complies_variable_syntax(name: Any) -> bool:
57
+ """Checks whether a given object is a string which complies the python
58
+ variable syntax.
59
+ """
60
+ if not isinstance(name, str) or keyword.iskeyword(name):
61
+ return False
62
+ name_cleaned = "".join(re.findall(ALLOWED_VAR_SYNTAX, name))
63
+ return name_cleaned == name
64
+
65
+
66
+ class MetaDict(MutableMapping[KT, VT], dict):
67
+ """Class that extends `dict` to access and assign keys via attribute dot
68
+ notation.
69
+
70
+ Examples:
71
+ >>> d = MetaDict({'foo': {'bar': [{'a': 1}, {'a': 2}]}})
72
+ >>> d.foo.bar[1].a
73
+ 2
74
+ >>> d["foo"]["bar"][1]["a"]
75
+ 2
76
+ >>> d.bar = 'demo'
77
+ >>> d.bar
78
+ 'demo'
79
+
80
+ `MetaDict` inherits from MutableMapping to avoid overwriting all `dict`
81
+ methods. In addition, it inherits from `dict` to pass the quite common
82
+ `isinstance(obj, dict) check.
83
+
84
+ Also, inheriting from `dict` enables json encoding/decoding without a
85
+ custom encoder.
86
+ """
87
+
88
+ def __init__(self, *args, nested_assign: bool = False, **kwargs) -> None:
89
+ # NOTE: check that 'nested_assign' is of type bool
90
+ if not isinstance(nested_assign, bool):
91
+ raise TypeError(
92
+ "Keyword argument 'nested_assign' must be an instance of "
93
+ "type 'bool'"
94
+ )
95
+
96
+ # NOTE: init internal attributes and data store
97
+ self.__dict__["_data"]: dict[KT, VT] = {}
98
+ self.__dict__["_nested_assign"] = nested_assign
99
+ self.__dict__["_parent"] = kwargs.pop("_parent", None)
100
+ self.__dict__["_key"] = kwargs.pop("_key", None)
101
+
102
+ # update state of data store
103
+ self.update(*args, **kwargs)
104
+
105
+ # call `dict` constructor with stored data to enable object encoding
106
+ # (e.g. `json.dumps()`) that relies on `dict`
107
+ dict.__init__(self, self._data)
108
+
109
+ def __len__(self) -> int:
110
+ return len(self._data)
111
+
112
+ def __iter__(self) -> Iterator[KT]:
113
+ return iter(self._data)
114
+
115
+ def __setitem__(self, key: KT, value: VT) -> None:
116
+ # show a warning if the assigned key or attribute is used internally
117
+ # (e.g `items`, `keys`, etc.)
118
+ try:
119
+ self.__getattribute__(key)
120
+ key_is_protected = True
121
+ except (AttributeError, TypeError):
122
+ key_is_protected = False
123
+ if key_is_protected:
124
+ warnings.warn(
125
+ f"'{self.__class__.__name__}' object uses '{key}' internally. "
126
+ f"'{key}' can only be accessed via `obj['{key}']`.",
127
+ stacklevel=2,
128
+ )
129
+
130
+ # set key recursively
131
+ self._data[key] = self._from_object(value)
132
+
133
+ # update parent when nested keys or attributes are assigned
134
+ parent = self.__dict__.pop("_parent", None)
135
+ key = self.__dict__.get("_key", None)
136
+ if parent is not None:
137
+ parent[key] = self._data
138
+
139
+ def __getitem__(self, key: KT) -> VT:
140
+ try:
141
+ value = self._data[key]
142
+ except KeyError:
143
+ if self.nested_assign:
144
+ return self.__missing__(key)
145
+ raise
146
+
147
+ return value
148
+
149
+ def __missing__(self, key: KT) -> Self:
150
+ return self.__class__(
151
+ _parent=self, _key=key, nested_assign=self._nested_assign
152
+ )
153
+
154
+ def __delitem__(self, key: KT) -> None:
155
+ del self._data[key]
156
+
157
+ def __setattr__(self, attr: str, val: VT) -> None:
158
+ self[attr] = val
159
+
160
+ def __getattr__(self, key: KT) -> VT:
161
+ try:
162
+ return self[key]
163
+ except KeyError:
164
+ raise AttributeError(
165
+ f"'{self.__class__.__name__}' object has no attribute '{key}'"
166
+ ) from None
167
+
168
+ def __delattr__(self, key: KT) -> None:
169
+ try:
170
+ del self[key]
171
+ except KeyError:
172
+ raise AttributeError(
173
+ f"'{self.__class__.__name__}' object has no attribute '{key}'"
174
+ ) from None
175
+
176
+ def __str__(self) -> str:
177
+ return str(self._data)
178
+
179
+ def __repr__(self) -> str:
180
+ return repr(self._data)
181
+
182
+ @staticmethod
183
+ def repack_args(cls: type, state: dict) -> MetaDict:
184
+ """Repack and rename keyword arguments stored in state before feeding
185
+ to class constructor
186
+ """
187
+ _data = state.pop("_data")
188
+ _nested_assign = state.pop("_nested_assign")
189
+ return cls(_data, nested_assign=_nested_assign, **state)
190
+
191
+ def __reduce__(self) -> tuple:
192
+ """Return state information for pickling."""
193
+ return MetaDict.repack_args, (self.__class__, self.__dict__)
194
+
195
+ def __dir__(self) -> Iterable[str]:
196
+ """Extend dir list with accessible dict keys (enables autocompletion
197
+ when using dot notation)
198
+ """
199
+ dict_keys = [
200
+ key for key in self._data.keys() if complies_variable_syntax(key)
201
+ ]
202
+ return dir(type(self)) + dict_keys
203
+
204
+ def copy(self) -> Self:
205
+ return self.__copy__()
206
+
207
+ def __copy__(self) -> Self:
208
+ cls = self.__class__
209
+ result = cls.__new__(cls)
210
+ result.__dict__.update(
211
+ {k: copy.copy(v) for k, v in self.__dict__.items()}
212
+ )
213
+ return result
214
+
215
+ @classmethod
216
+ def fromkeys(
217
+ cls,
218
+ iterable: Iterable[KT],
219
+ value: Optional[VT] = None,
220
+ ) -> Self:
221
+ """Constructor MetaDict form keys iterator.
222
+
223
+ Examples:
224
+ >>> def iter_keys() -> Iterable[str]:
225
+ ... for i in range(3):
226
+ ... yield f"k{i}"
227
+ >>> MetaDict.fromkeys(iterable=iter_keys())
228
+ {'k0': None, 'k1': None, 'k2': None}
229
+ """
230
+ return cls({key: value for key in iterable})
231
+
232
+ def to_dict(self) -> dict:
233
+ return MetaDict._to_object(self._data)
234
+
235
+ @staticmethod
236
+ def _to_object(obj: Any) -> Any:
237
+ """Recursively converts all nested MetaDicts to dicts."""
238
+
239
+ if isinstance(obj, (list, tuple, set)):
240
+ if MetaDict._contains_mapping(obj):
241
+ value = type(obj)(MetaDict._to_object(x) for x in obj)
242
+ else:
243
+ value = obj
244
+ elif isinstance(obj, Mapping):
245
+ value = {k: MetaDict._to_object(v) for k, v in obj.items()}
246
+ else:
247
+ value = obj
248
+
249
+ return value
250
+
251
+ def _from_object(self, obj: Any) -> Any:
252
+ """Recursively converts all nested dicts to MetaDicts."""
253
+
254
+ if isinstance(obj, (list, tuple, set)):
255
+ if MetaDict._contains_mapping(obj):
256
+ value = type(obj)(self._from_object(x) for x in obj)
257
+ else:
258
+ value = obj
259
+ elif isinstance(obj, MetaDict):
260
+ value = obj
261
+ elif isinstance(obj, Mapping):
262
+ value = self.__class__(
263
+ {k: self._from_object(v) for k, v in obj.items()},
264
+ nested_assign=self._nested_assign,
265
+ )
266
+ else:
267
+ value = obj
268
+
269
+ return value
270
+
271
+ def _set_nested_assignment(self, val: bool):
272
+ self.__dict__["_nested_assign"] = val
273
+ for value in self.values():
274
+ if isinstance(value, (list, tuple, set)):
275
+ for elem in value:
276
+ if isinstance(elem, MetaDict):
277
+ elem._set_nested_assignment(val)
278
+ elif isinstance(value, MetaDict):
279
+ value._set_nested_assignment(val)
280
+
281
+ def enable_nested_assignment(self):
282
+ self._set_nested_assignment(True)
283
+
284
+ def disable_nested_assignment(self):
285
+ self._set_nested_assignment(False)
286
+
287
+ @contextlib.contextmanager
288
+ def enabling_nested_assignment(self):
289
+ """Context manager which temporarily enables nested key/attribute
290
+ assignment.
291
+ """
292
+ nested_assign = self.nested_assign
293
+ if not nested_assign:
294
+ self.enable_nested_assignment()
295
+ try:
296
+ yield self
297
+ finally:
298
+ if not nested_assign:
299
+ self.disable_nested_assignment()
300
+
301
+ @property
302
+ def nested_assign(self):
303
+ return self._nested_assign
304
+
305
+ @staticmethod
306
+ def _contains_mapping(
307
+ iterable: Iterable, ignore: Optional[type] = None
308
+ ) -> bool:
309
+ """Recursively checks whether an Iterable contains an instance of
310
+ Mapping.
311
+ """
312
+ for x in iterable:
313
+ if isinstance(x, Mapping):
314
+ if ignore is None or not isinstance(x, ignore):
315
+ return True
316
+ elif isinstance(x, (list, set, tuple)):
317
+ return MetaDict._contains_mapping(x, ignore)
318
+ return False
319
+
320
+ # NOTE: Add the following inherited methods from collections.abc.Mapping
321
+ # directly to make pycharm happy to checking.
322
+ # (removing an annoying warning for dict unpacking)
323
+ def __contains__(self, key):
324
+ try:
325
+ self[key]
326
+ except KeyError:
327
+ return False
328
+ else:
329
+ return True
330
+
331
+ def keys(self):
332
+ """D.keys() -> a set-like object providing a view on D's keys"""
333
+ return KeysView(self)
File without changes