ddeutil-workflow 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/utils.py CHANGED
@@ -6,22 +6,138 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import inspect
9
+ import logging
10
+ import os
9
11
  import stat
10
12
  from abc import ABC, abstractmethod
13
+ from ast import Call, Constant, Expr, Module, Name, parse
14
+ from collections.abc import Iterator
15
+ from dataclasses import dataclass, field
11
16
  from datetime import date, datetime
12
17
  from functools import wraps
18
+ from hashlib import md5
13
19
  from importlib import import_module
20
+ from itertools import product
14
21
  from pathlib import Path
15
22
  from typing import Any, Callable, Literal, Optional, Protocol, Union
23
+ from zoneinfo import ZoneInfo
16
24
 
17
- import msgspec as spec
18
- from ddeutil.core import lazy
25
+ from ddeutil.core import getdot, hasdot, import_string, lazy
26
+ from ddeutil.io import PathData, search_env_replace
19
27
  from ddeutil.io.models.lineage import dt_now
20
- from pydantic import BaseModel, Field
28
+ from pydantic import BaseModel, ConfigDict, Field
21
29
  from pydantic.functional_validators import model_validator
22
30
  from typing_extensions import Self
23
31
 
24
- from .__types import DictData
32
+ from .__types import DictData, Matrix, Re
33
+ from .exceptions import ParamValueException, UtilException
34
+
35
+
36
+ def get_diff_sec(dt: datetime, tz: ZoneInfo | None = None) -> int:
37
+ """Return second value that come from diff of an input datetime and the
38
+ current datetime with specific timezone.
39
+ """
40
+ return round(
41
+ (dt - datetime.now(tz=(tz or ZoneInfo("UTC")))).total_seconds()
42
+ )
43
+
44
+
45
+ class Engine(BaseModel):
46
+ """Engine Model"""
47
+
48
+ paths: PathData = Field(default_factory=PathData)
49
+ registry: list[str] = Field(
50
+ default_factory=lambda: ["ddeutil.workflow"],
51
+ )
52
+ registry_filter: list[str] = Field(
53
+ default=lambda: ["ddeutil.workflow.utils"]
54
+ )
55
+
56
+ @model_validator(mode="before")
57
+ def __prepare_registry(cls, values: DictData) -> DictData:
58
+ """Prepare registry value that passing with string type. It convert the
59
+ string type to list of string.
60
+ """
61
+ if (_regis := values.get("registry")) and isinstance(_regis, str):
62
+ values["registry"] = [_regis]
63
+ if (_regis_filter := values.get("registry_filter")) and isinstance(
64
+ _regis, str
65
+ ):
66
+ values["registry_filter"] = [_regis_filter]
67
+ return values
68
+
69
+
70
+ class CoreConf(BaseModel):
71
+ """Core Config Model"""
72
+
73
+ model_config = ConfigDict(arbitrary_types_allowed=True)
74
+
75
+ tz: ZoneInfo = Field(default_factory=lambda: ZoneInfo("UTC"))
76
+
77
+
78
+ class ConfParams(BaseModel):
79
+ """Params Model"""
80
+
81
+ engine: Engine = Field(
82
+ default_factory=Engine,
83
+ description="A engine mapping values.",
84
+ )
85
+ core: CoreConf = Field(
86
+ default_factory=CoreConf,
87
+ description="A core config value",
88
+ )
89
+
90
+
91
+ def config() -> ConfParams:
92
+ """Load Config data from ``workflows-conf.yaml`` file."""
93
+ root_path: str = os.getenv("WORKFLOW_ROOT_PATH", ".")
94
+
95
+ regis: list[str] = ["ddeutil.workflow"]
96
+ if regis_env := os.getenv("WORKFLOW_CORE_REGISTRY"):
97
+ regis = [r.strip() for r in regis_env.split(",")]
98
+
99
+ regis_filter: list[str] = ["ddeutil.workflow.utils"]
100
+ if regis_filter_env := os.getenv("WORKFLOW_CORE_REGISTRY_FILTER"):
101
+ regis_filter = [r.strip() for r in regis_filter_env.split(",")]
102
+
103
+ conf_path: str = (
104
+ f"{root_path}/{conf_env}"
105
+ if (conf_env := os.getenv("WORKFLOW_CORE_PATH_CONF"))
106
+ else None
107
+ )
108
+ return ConfParams.model_validate(
109
+ obj={
110
+ "engine": {
111
+ "registry": regis,
112
+ "registry_filter": regis_filter,
113
+ "paths": {
114
+ "root": root_path,
115
+ "conf": conf_path,
116
+ },
117
+ },
118
+ }
119
+ )
120
+
121
+
122
+ def gen_id(value: Any, *, sensitive: bool = True, unique: bool = False) -> str:
123
+ """Generate running ID for able to tracking. This generate process use `md5`
124
+ function.
125
+
126
+ :param value:
127
+ :param sensitive:
128
+ :param unique:
129
+ :rtype: str
130
+ """
131
+ if not isinstance(value, str):
132
+ value: str = str(value)
133
+
134
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
135
+ return md5(
136
+ (
137
+ f"{(value if sensitive else value.lower())}"
138
+ + (f"{datetime.now(tz=tz):%Y%m%d%H%M%S%f}" if unique else "")
139
+ ).encode()
140
+ ).hexdigest()
25
141
 
26
142
 
27
143
  class TagFunc(Protocol):
@@ -33,20 +149,22 @@ class TagFunc(Protocol):
33
149
  def __call__(self, *args, **kwargs): ...
34
150
 
35
151
 
36
- def tag(value: str, name: str | None = None):
152
+ def tag(name: str, alias: str | None = None):
37
153
  """Tag decorator function that set function attributes, ``tag`` and ``name``
38
154
  for making registries variable.
39
155
 
40
- :param: value: A tag value for make different use-case of a function.
41
- :param: name: A name that keeping in registries.
156
+ :param: name: A tag value for make different use-case of a function.
157
+ :param: alias: A alias function name that keeping in registries. If this
158
+ value does not supply, it will use original function name from __name__.
42
159
  """
43
160
 
44
- def func_internal(func: callable) -> TagFunc:
45
- func.tag = value
46
- func.name = name or func.__name__.replace("_", "-")
161
+ def func_internal(func: Callable[[...], Any]) -> TagFunc:
162
+ func.tag = name
163
+ func.name = alias or func.__name__.replace("_", "-")
47
164
 
48
165
  @wraps(func)
49
166
  def wrapped(*args, **kwargs):
167
+ # NOTE: Able to do anything before calling hook function.
50
168
  return func(*args, **kwargs)
51
169
 
52
170
  return wrapped
@@ -54,32 +172,46 @@ def tag(value: str, name: str | None = None):
54
172
  return func_internal
55
173
 
56
174
 
57
- def make_registry(module: str) -> dict[str, dict[str, Callable[[], TagFunc]]]:
175
+ Registry = dict[str, Callable[[], TagFunc]]
176
+
177
+
178
+ def make_registry(submodule: str) -> dict[str, Registry]:
58
179
  """Return registries of all functions that able to called with task.
59
180
 
60
- :param module: A module prefix that want to import registry.
181
+ :param submodule: A module prefix that want to import registry.
182
+ :rtype: dict[str, Registry]
61
183
  """
62
- rs: dict[str, dict[str, Callable[[], Callable]]] = {}
63
- for fstr, func in inspect.getmembers(
64
- import_module(module), inspect.isfunction
65
- ):
66
- if not hasattr(func, "tag"):
184
+ rs: dict[str, Registry] = {}
185
+ for module in config().engine.registry:
186
+ # NOTE: try to sequential import task functions
187
+ try:
188
+ importer = import_module(f"{module}.{submodule}")
189
+ except ModuleNotFoundError:
67
190
  continue
68
191
 
69
- if func.name in rs:
192
+ for fstr, func in inspect.getmembers(importer, inspect.isfunction):
193
+ # NOTE: check function attribute that already set tag by
194
+ # ``utils.tag`` decorator.
195
+ if not hasattr(func, "tag"):
196
+ continue
197
+
198
+ # NOTE: Create new register name if it not exists
199
+ if func.name not in rs:
200
+ rs[func.name] = {func.tag: lazy(f"{module}.{submodule}.{fstr}")}
201
+ continue
202
+
70
203
  if func.tag in rs[func.name]:
71
204
  raise ValueError(
72
- f"The tag {func.tag!r} already exists on module {module}"
205
+ f"The tag {func.tag!r} already exists on "
206
+ f"{module}.{submodule}, you should change this tag name or "
207
+ f"change it func name."
73
208
  )
74
- rs[func.name][func.tag] = lazy(f"{module}.{fstr}")
75
- continue
209
+ rs[func.name][func.tag] = lazy(f"{module}.{submodule}.{fstr}")
76
210
 
77
- # NOTE: Create new register name if it not exists
78
- rs[func.name] = {func.tag: lazy(f"{module}.{fstr}")}
79
211
  return rs
80
212
 
81
213
 
82
- class BaseParams(BaseModel, ABC):
214
+ class BaseParam(BaseModel, ABC):
83
215
  """Base Parameter that use to make Params Model."""
84
216
 
85
217
  desc: Optional[str] = None
@@ -88,32 +220,32 @@ class BaseParams(BaseModel, ABC):
88
220
 
89
221
  @abstractmethod
90
222
  def receive(self, value: Optional[Any] = None) -> Any:
91
- raise ValueError(
223
+ raise NotImplementedError(
92
224
  "Receive value and validate typing before return valid value."
93
225
  )
94
226
 
95
227
 
96
- class DefaultParams(BaseParams):
228
+ class DefaultParam(BaseParam):
97
229
  """Default Parameter that will check default if it required"""
98
230
 
99
231
  default: Optional[str] = None
100
232
 
101
233
  @abstractmethod
102
234
  def receive(self, value: Optional[Any] = None) -> Any:
103
- raise ValueError(
235
+ raise NotImplementedError(
104
236
  "Receive value and validate typing before return valid value."
105
237
  )
106
238
 
107
239
  @model_validator(mode="after")
108
240
  def check_default(self) -> Self:
109
241
  if not self.required and self.default is None:
110
- raise ValueError(
242
+ raise ParamValueException(
111
243
  "Default should set when this parameter does not required."
112
244
  )
113
245
  return self
114
246
 
115
247
 
116
- class DatetimeParams(DefaultParams):
248
+ class DatetimeParam(DefaultParam):
117
249
  """Datetime parameter."""
118
250
 
119
251
  type: Literal["datetime"] = "datetime"
@@ -121,6 +253,7 @@ class DatetimeParams(DefaultParams):
121
253
  default: datetime = Field(default_factory=dt_now)
122
254
 
123
255
  def receive(self, value: str | datetime | date | None = None) -> datetime:
256
+ """Receive value that match with datetime."""
124
257
  if value is None:
125
258
  return self.default
126
259
 
@@ -129,44 +262,48 @@ class DatetimeParams(DefaultParams):
129
262
  elif isinstance(value, date):
130
263
  return datetime(value.year, value.month, value.day)
131
264
  elif not isinstance(value, str):
132
- raise ValueError(
265
+ raise ParamValueException(
133
266
  f"Value that want to convert to datetime does not support for "
134
267
  f"type: {type(value)}"
135
268
  )
136
269
  return datetime.fromisoformat(value)
137
270
 
138
271
 
139
- class StrParams(DefaultParams):
272
+ class StrParam(DefaultParam):
140
273
  """String parameter."""
141
274
 
142
275
  type: Literal["str"] = "str"
143
276
 
144
277
  def receive(self, value: Optional[str] = None) -> str | None:
278
+ """Receive value that match with str."""
145
279
  if value is None:
146
280
  return self.default
147
281
  return str(value)
148
282
 
149
283
 
150
- class IntParams(DefaultParams):
284
+ class IntParam(DefaultParam):
151
285
  """Integer parameter."""
152
286
 
153
287
  type: Literal["int"] = "int"
154
288
 
155
289
  def receive(self, value: Optional[int] = None) -> int | None:
290
+ """Receive value that match with int."""
156
291
  if value is None:
157
292
  return self.default
158
293
  if not isinstance(value, int):
159
294
  try:
160
295
  return int(str(value))
161
296
  except TypeError as err:
162
- raise ValueError(
297
+ raise ParamValueException(
163
298
  f"Value that want to convert to integer does not support "
164
299
  f"for type: {type(value)}"
165
300
  ) from err
166
301
  return value
167
302
 
168
303
 
169
- class ChoiceParams(BaseParams):
304
+ class ChoiceParam(BaseParam):
305
+ """Choice parameter."""
306
+
170
307
  type: Literal["choice"] = "choice"
171
308
  options: list[str]
172
309
 
@@ -177,32 +314,289 @@ class ChoiceParams(BaseParams):
177
314
  if value is None:
178
315
  return self.options[0]
179
316
  if any(value not in self.options):
180
- raise ValueError(f"{value} does not match any value in options")
317
+ raise ParamValueException(
318
+ f"{value!r} does not match any value in choice options."
319
+ )
181
320
  return value
182
321
 
183
322
 
184
- Params = Union[
185
- ChoiceParams,
186
- DatetimeParams,
187
- StrParams,
323
+ Param = Union[
324
+ ChoiceParam,
325
+ DatetimeParam,
326
+ IntParam,
327
+ StrParam,
188
328
  ]
189
329
 
190
330
 
331
+ @dataclass
332
+ class Result:
333
+ """Result Dataclass object for passing parameter and receiving output from
334
+ the pipeline execution.
335
+ """
336
+
337
+ # TODO: Add running ID to this result dataclass.
338
+ # ---
339
+ # parent_run_id: str
340
+ # run_id: str
341
+ #
342
+ status: int = field(default=2)
343
+ context: DictData = field(default_factory=dict)
344
+
345
+
191
346
  def make_exec(path: str | Path):
192
347
  """Change mode of file to be executable file."""
193
348
  f: Path = Path(path) if isinstance(path, str) else path
194
349
  f.chmod(f.stat().st_mode | stat.S_IEXEC)
195
350
 
196
351
 
197
- class TaskSearch(spec.Struct, kw_only=True, tag="task"):
198
- """Task Search Struct that use the `msgspec` for the best performance data
199
- serialize.
352
+ FILTERS: dict[str, callable] = {
353
+ "abs": abs,
354
+ "str": str,
355
+ "int": int,
356
+ "upper": lambda x: x.upper(),
357
+ "lower": lambda x: x.lower(),
358
+ "rstr": [str, repr],
359
+ }
360
+
361
+
362
+ class FilterFunc(Protocol):
363
+ """Tag Function Protocol"""
364
+
365
+ name: str
366
+
367
+ def __call__(self, *args, **kwargs): ...
368
+
369
+
370
+ def custom_filter(name: str):
371
+ """Custom filter decorator function that set function attributes, ``filter``
372
+ for making filter registries variable.
373
+
374
+ :param: name: A filter name for make different use-case of a function.
200
375
  """
201
376
 
202
- path: str
203
- func: str
204
- tag: str
377
+ def func_internal(func: Callable[[...], Any]) -> TagFunc:
378
+ func.filter = name
379
+
380
+ @wraps(func)
381
+ def wrapped(*args, **kwargs):
382
+ # NOTE: Able to do anything before calling custom filter function.
383
+ return func(*args, **kwargs)
384
+
385
+ return wrapped
386
+
387
+ return func_internal
388
+
389
+
390
+ FilterRegistry = Union[FilterFunc, Callable[[...], Any]]
391
+
392
+
393
+ def make_filter_registry() -> dict[str, FilterRegistry]:
394
+ """Return registries of all functions that able to called with task.
395
+
396
+ :rtype: dict[str, Registry]
397
+ """
398
+ rs: dict[str, Registry] = {}
399
+ for module in config().engine.registry_filter:
400
+ # NOTE: try to sequential import task functions
401
+ try:
402
+ importer = import_module(module)
403
+ except ModuleNotFoundError:
404
+ continue
405
+
406
+ for fstr, func in inspect.getmembers(importer, inspect.isfunction):
407
+ # NOTE: check function attribute that already set tag by
408
+ # ``utils.tag`` decorator.
409
+ if not hasattr(func, "filter"):
410
+ continue
411
+
412
+ rs[func.filter] = import_string(f"{module}.{fstr}")
413
+
414
+ rs.update(FILTERS)
415
+ return rs
416
+
417
+
418
+ def get_args_const(
419
+ expr: str,
420
+ ) -> tuple[str, list[Constant], dict[str, Constant]]:
421
+ """Get arguments and keyword-arguments from function calling string."""
422
+ try:
423
+ mod: Module = parse(expr)
424
+ except SyntaxError:
425
+ raise UtilException(
426
+ f"Post-filter: {expr} does not valid because it raise syntax error."
427
+ ) from None
428
+ body: list[Expr] = mod.body
429
+
430
+ if len(body) > 1:
431
+ raise UtilException(
432
+ "Post-filter function should be only one calling per pipe"
433
+ )
434
+
435
+ caller: Union[Name, Call]
436
+ if isinstance((caller := body[0].value), Name):
437
+ return caller.id, [], {}
438
+ elif not isinstance(caller, Call):
439
+ raise UtilException(
440
+ f"Get arguments does not support for caller type: {type(caller)}"
441
+ )
442
+
443
+ name: Name = caller.func
444
+ args: list[Constant] = caller.args
445
+ keywords: dict[str, Constant] = {k.arg: k.value for k in caller.keywords}
446
+
447
+ if any(not isinstance(i, Constant) for i in args):
448
+ raise UtilException("Argument should be constant.")
449
+
450
+ return name.id, args, keywords
451
+
452
+
453
+ @custom_filter("fmt")
454
+ def datetime_format(value: datetime, fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
455
+ return value.strftime(fmt)
456
+
457
+
458
+ def map_post_filter(
459
+ value: Any,
460
+ post_filter: list[str],
461
+ filters: dict[str, FilterRegistry],
462
+ ) -> Any:
463
+ """Mapping post-filter to value with sequence list of filter function name
464
+ that will get from the filter registry.
205
465
 
206
- def to_dict(self) -> DictData:
207
- """Return dict data from struct fields."""
208
- return {f: getattr(self, f) for f in self.__struct_fields__}
466
+ :param value: A string value that want to mapped with filter function.
467
+ :param post_filter: A list of post-filter function name.
468
+ :param filters: A filter registry.
469
+ """
470
+ for _filter in post_filter:
471
+ func_name, _args, _kwargs = get_args_const(_filter)
472
+ args = [arg.value for arg in _args]
473
+ kwargs = {k: v.value for k, v in _kwargs.items()}
474
+
475
+ if func_name not in filters:
476
+ raise UtilException(
477
+ f"The post-filter: {func_name} does not support yet."
478
+ )
479
+
480
+ try:
481
+ if isinstance((f_func := filters[func_name]), list):
482
+ if args or kwargs:
483
+ raise UtilException(
484
+ "Chain filter function does not support for passing "
485
+ "arguments."
486
+ )
487
+ for func in f_func:
488
+ value: Any = func(value)
489
+ else:
490
+ value: Any = f_func(value, *args, **kwargs)
491
+ except Exception as err:
492
+ logging.warning(str(err))
493
+ raise UtilException(
494
+ f"The post-filter function: {func_name} does not fit with "
495
+ f"{value} (type: {type(value).__name__})."
496
+ ) from None
497
+ return value
498
+
499
+
500
+ def str2template(
501
+ value: str,
502
+ params: DictData,
503
+ *,
504
+ filters: dict[str, FilterRegistry] | None = None,
505
+ ) -> Any:
506
+ """(Sub-function) Pass param to template string that can search by
507
+ ``RE_CALLER`` regular expression.
508
+
509
+ The getter value that map a template should have typing support align
510
+ with the pipeline parameter types that is `str`, `int`, `datetime`, and
511
+ `list`.
512
+
513
+ :param value: A string value that want to mapped with an params
514
+ :param params: A parameter value that getting with matched regular
515
+ expression.
516
+ :param filters:
517
+ """
518
+ filters: dict[str, FilterRegistry] = filters or make_filter_registry()
519
+
520
+ # NOTE: remove space before and after this string value.
521
+ value: str = value.strip()
522
+ for found in Re.RE_CALLER.finditer(value):
523
+ # NOTE:
524
+ # Get caller and filter values that setting inside;
525
+ #
526
+ # ... ``${{ <caller-value> [ | <filter-value>] ... }}``
527
+ #
528
+ caller: str = found.group("caller")
529
+ pfilter: list[str] = [
530
+ i.strip()
531
+ for i in (
532
+ found.group("post_filters").strip().removeprefix("|").split("|")
533
+ )
534
+ if i != ""
535
+ ]
536
+ if not hasdot(caller, params):
537
+ raise UtilException(f"The params does not set caller: {caller!r}.")
538
+
539
+ # NOTE: from validate step, it guarantee that caller exists in params.
540
+ getter: Any = getdot(caller, params)
541
+
542
+ # NOTE:
543
+ # If type of getter caller is not string type and it does not use to
544
+ # concat other string value, it will return origin value from the
545
+ # ``getdot`` function.
546
+ if value.replace(found.group(0), "", 1) == "":
547
+ return map_post_filter(getter, pfilter, filters=filters)
548
+
549
+ # NOTE: map post-filter function.
550
+ getter: Any = map_post_filter(getter, pfilter, filters=filters)
551
+ if not isinstance(getter, str):
552
+ getter: str = str(getter)
553
+
554
+ value: str = value.replace(found.group(0), getter, 1)
555
+
556
+ return search_env_replace(value)
557
+
558
+
559
+ def param2template(
560
+ value: Any,
561
+ params: DictData,
562
+ ) -> Any:
563
+ """Pass param to template string that can search by ``RE_CALLER`` regular
564
+ expression.
565
+
566
+ :param value: A value that want to mapped with an params
567
+ :param params: A parameter value that getting with matched regular
568
+ expression.
569
+
570
+ :rtype: Any
571
+ :returns: An any getter value from the params input.
572
+ """
573
+ filters: dict[str, FilterRegistry] = make_filter_registry()
574
+ if isinstance(value, dict):
575
+ return {k: param2template(value[k], params) for k in value}
576
+ elif isinstance(value, (list, tuple, set)):
577
+ return type(value)([param2template(i, params) for i in value])
578
+ elif not isinstance(value, str):
579
+ return value
580
+ return str2template(value, params, filters=filters)
581
+
582
+
583
+ def dash2underscore(
584
+ key: str,
585
+ values: DictData,
586
+ *,
587
+ fixed: str | None = None,
588
+ ) -> DictData:
589
+ """Change key name that has dash to underscore."""
590
+ if key in values:
591
+ values[(fixed or key.replace("-", "_"))] = values.pop(key)
592
+ return values
593
+
594
+
595
+ def cross_product(matrix: Matrix) -> Iterator[DictData]:
596
+ """Iterator of products value from matrix."""
597
+ yield from (
598
+ {_k: _v for e in mapped for _k, _v in e.items()}
599
+ for mapped in product(
600
+ *[[{k: v} for v in vs] for k, vs in matrix.items()]
601
+ )
602
+ )