ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/stage.py CHANGED
@@ -3,6 +3,18 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
+ """Stage Model that use for getting stage data template from Job Model.
7
+ The stage that handle the minimize task that run in some thread (same thread at
8
+ its job owner) that mean it is the lowest executor of a pipeline workflow that
9
+ can tracking logs.
10
+
11
+ The output of stage execution only return 0 status because I do not want to
12
+ handle stage error on this stage model. I think stage model should have a lot of
13
+ usecase and it does not worry when I want to create a new one.
14
+
15
+ Execution --> Ok --> Result with 0
16
+ --> Error --> Raise StageException
17
+ """
6
18
  from __future__ import annotations
7
19
 
8
20
  import contextlib
@@ -15,13 +27,22 @@ import uuid
15
27
  from abc import ABC, abstractmethod
16
28
  from collections.abc import Iterator
17
29
  from dataclasses import dataclass
30
+ from functools import wraps
18
31
  from inspect import Parameter
19
32
  from pathlib import Path
20
33
  from subprocess import CompletedProcess
34
+ from textwrap import dedent
21
35
  from typing import Callable, Optional, Union
22
36
 
37
+ try:
38
+ from typing import ParamSpec
39
+ except ImportError:
40
+ from typing_extensions import ParamSpec
41
+
23
42
  from ddeutil.core import str2bool
24
43
  from pydantic import BaseModel, Field
44
+ from pydantic.functional_validators import model_validator
45
+ from typing_extensions import Self
25
46
 
26
47
  from .__types import DictData, DictStr, Re, TupleStr
27
48
  from .exceptions import StageException
@@ -32,9 +53,68 @@ from .utils import (
32
53
  gen_id,
33
54
  make_exec,
34
55
  make_registry,
56
+ not_in_template,
35
57
  param2template,
36
58
  )
37
59
 
60
+ P = ParamSpec("P")
61
+ __all__: TupleStr = (
62
+ "Stage",
63
+ "EmptyStage",
64
+ "BashStage",
65
+ "PyStage",
66
+ "HookStage",
67
+ "TriggerStage",
68
+ "handler_result",
69
+ )
70
+
71
+
72
+ def handler_result(message: str | None = None) -> Callable[P, Result]:
73
+ """Decorator function for handler result from the stage execution. This
74
+ function should to use with execution method only.
75
+
76
+ :param message: A message that want to add at prefix of exception statement.
77
+ """
78
+ message: str = message or ""
79
+
80
+ def decorator(func: Callable[P, Result]) -> Callable[P, Result]:
81
+
82
+ @wraps(func)
83
+ def wrapped(self: Stage, *args, **kwargs):
84
+ try:
85
+ # NOTE: Start calling origin function with a passing args.
86
+ return func(self, *args, **kwargs).set_run_id(self.run_id)
87
+ except Exception as err:
88
+ # NOTE: Start catching error from the stage execution.
89
+ logging.error(
90
+ f"({self.run_id}) [STAGE]: {err.__class__.__name__}: {err}"
91
+ )
92
+ if str2bool(
93
+ os.getenv("WORKFLOW_CORE_STAGE_RAISE_ERROR", "true")
94
+ ):
95
+ # NOTE: If error that raise from stage execution course by
96
+ # itself, it will return that error with previous
97
+ # dependency.
98
+ if isinstance(err, StageException):
99
+ raise StageException(
100
+ f"{self.__class__.__name__}: {message}\n\t{err}"
101
+ ) from err
102
+ raise StageException(
103
+ f"{self.__class__.__name__}: {message}\n\t"
104
+ f"{err.__class__.__name__}: {err}"
105
+ ) from None
106
+ rs: Result = Result(
107
+ status=1,
108
+ context={
109
+ "error_message": f"{err.__class__.__name__}: {err}",
110
+ },
111
+ )
112
+ return rs.set_run_id(self.run_id)
113
+
114
+ return wrapped
115
+
116
+ return decorator
117
+
38
118
 
39
119
  class BaseStage(BaseModel, ABC):
40
120
  """Base Stage Model that keep only id and name fields for the stage
@@ -50,12 +130,45 @@ class BaseStage(BaseModel, ABC):
50
130
  ),
51
131
  )
52
132
  name: str = Field(
53
- description="A stage name that want to logging when start execution."
133
+ description="A stage name that want to logging when start execution.",
54
134
  )
55
135
  condition: Optional[str] = Field(
56
136
  default=None,
137
+ description="A stage condition statement to allow stage executable.",
57
138
  alias="if",
58
139
  )
140
+ run_id: Optional[str] = Field(
141
+ default=None,
142
+ description="A running stage ID.",
143
+ repr=False,
144
+ )
145
+
146
+ @model_validator(mode="after")
147
+ def __prepare_running_id(self):
148
+ """Prepare stage running ID that use default value of field and this
149
+ method will validate name and id fields should not contain any template
150
+ parameter (exclude matrix template).
151
+ """
152
+ if self.run_id is None:
153
+ self.run_id = gen_id(self.name + (self.id or ""), unique=True)
154
+
155
+ # VALIDATE: Validate stage id and name should not dynamic with params
156
+ # template. (allow only matrix)
157
+ if not_in_template(self.id) or not_in_template(self.name):
158
+ raise ValueError(
159
+ "Stage name and ID should only template with matrix."
160
+ )
161
+
162
+ return self
163
+
164
+ def get_running_id(self, run_id: str) -> Self:
165
+ """Return Stage model object that changing stage running ID with an
166
+ input running ID.
167
+
168
+ :param run_id: A replace stage running ID.
169
+ :rtype: Self
170
+ """
171
+ return self.model_copy(update={"run_id": run_id})
59
172
 
60
173
  @abstractmethod
61
174
  def execute(self, params: DictData) -> Result:
@@ -67,31 +180,45 @@ class BaseStage(BaseModel, ABC):
67
180
  """
68
181
  raise NotImplementedError("Stage should implement ``execute`` method.")
69
182
 
70
- def set_outputs(self, output: DictData, params: DictData) -> DictData:
183
+ def set_outputs(self, output: DictData, to: DictData) -> DictData:
71
184
  """Set an outputs from execution process to an input params.
72
185
 
73
186
  :param output: A output data that want to extract to an output key.
74
- :param params: A context data that want to add output result.
187
+ :param to: A context data that want to add output result.
75
188
  :rtype: DictData
76
189
  """
77
- if self.id:
78
- _id: str = param2template(self.id, params)
79
- elif str2bool(os.getenv("WORKFLOW_CORE_DEFAULT_STAGE_ID", "false")):
80
- _id: str = gen_id(param2template(self.name, params))
81
- else:
82
- return params
190
+ if not (
191
+ self.id
192
+ or str2bool(os.getenv("WORKFLOW_CORE_STAGE_DEFAULT_ID", "false"))
193
+ ):
194
+ logging.debug(
195
+ f"({self.run_id}) [STAGE]: Output does not set because this "
196
+ f"stage does not set ID or default stage ID config flag not be "
197
+ f"True."
198
+ )
199
+ return to
83
200
 
84
201
  # NOTE: Create stages key to receive an output from the stage execution.
85
- if "stages" not in params:
86
- params["stages"] = {}
202
+ if "stages" not in to:
203
+ to["stages"] = {}
87
204
 
88
- params["stages"][_id] = {"outputs": output}
89
- return params
205
+ if self.id:
206
+ _id: str = param2template(self.id, params=to)
207
+ else:
208
+ _id: str = gen_id(param2template(self.name, params=to))
209
+
210
+ # NOTE: Set the output to that stage generated ID.
211
+ logging.debug(
212
+ f"({self.run_id}) [STAGE]: Set output complete with stage ID: {_id}"
213
+ )
214
+ to["stages"][_id] = {"outputs": output}
215
+ return to
90
216
 
91
- def is_skip(self, params: DictData | None = None) -> bool:
217
+ def is_skipped(self, params: DictData | None = None) -> bool:
92
218
  """Return true if condition of this stage do not correct.
93
219
 
94
220
  :param params: A parameters that want to pass to condition template.
221
+ :rtype: bool
95
222
  """
96
223
  params: DictData = params or {}
97
224
  if self.condition is None:
@@ -104,8 +231,8 @@ class BaseStage(BaseModel, ABC):
104
231
  raise TypeError("Return type of condition does not be boolean")
105
232
  return not rs
106
233
  except Exception as err:
107
- logging.error(str(err))
108
- raise StageException(str(err)) from err
234
+ logging.error(f"({self.run_id}) [STAGE]: {err}")
235
+ raise StageException(f"{err.__class__.__name__}: {err}") from err
109
236
 
110
237
 
111
238
  class EmptyStage(BaseStage):
@@ -131,8 +258,10 @@ class EmptyStage(BaseStage):
131
258
  :param params: A context data that want to add output result. But this
132
259
  stage does not pass any output.
133
260
  """
134
- stm: str = param2template(self.echo, params=params) or "..."
135
- logging.info(f"[STAGE]: Empty-Execute: {self.name!r}: " f"( {stm} )")
261
+ logging.info(
262
+ f"({self.run_id}) [STAGE]: Empty-Execute: {self.name!r}: "
263
+ f"( {param2template(self.echo, params=params) or '...'} )"
264
+ )
136
265
  return Result(status=0, context={})
137
266
 
138
267
 
@@ -174,20 +303,28 @@ class BashStage(BaseStage):
174
303
  f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
175
304
  with open(f"./{f_name}", mode="w", newline="\n") as f:
176
305
  # NOTE: write header of `.sh` file
177
- f.write(f"#!/bin/{f_shebang}\n")
306
+ f.write(f"#!/bin/{f_shebang}\n\n")
178
307
 
179
308
  # NOTE: add setting environment variable before bash skip statement.
180
309
  f.writelines([f"{k}='{env[k]}';\n" for k in env])
181
310
 
182
311
  # NOTE: make sure that shell script file does not have `\r` char.
183
- f.write(bash.replace("\r\n", "\n"))
312
+ f.write("\n" + bash.replace("\r\n", "\n"))
184
313
 
314
+ # NOTE: Make this .sh file able to executable.
185
315
  make_exec(f"./{f_name}")
186
316
 
317
+ logging.debug(
318
+ f"({self.run_id}) [STAGE]: Start create `.sh` file and running a "
319
+ f"bash statement."
320
+ )
321
+
187
322
  yield [f_shebang, f_name]
188
323
 
324
+ # Note: Remove .sh file that use to run bash.
189
325
  Path(f"./{f_name}").unlink()
190
326
 
327
+ @handler_result()
191
328
  def execute(self, params: DictData) -> Result:
192
329
  """Execute the Bash statement with the Python build-in ``subprocess``
193
330
  package.
@@ -195,11 +332,11 @@ class BashStage(BaseStage):
195
332
  :param params: A parameter data that want to use in this execution.
196
333
  :rtype: Result
197
334
  """
198
- bash: str = param2template(self.bash, params)
335
+ bash: str = param2template(dedent(self.bash), params)
199
336
  with self.__prepare_bash(
200
337
  bash=bash, env=param2template(self.env, params)
201
338
  ) as sh:
202
- logging.info(f"[STAGE]: Shell-Execute: {sh}")
339
+ logging.info(f"({self.run_id}) [STAGE]: Shell-Execute: {sh}")
203
340
  rs: CompletedProcess = subprocess.run(
204
341
  sh,
205
342
  shell=False,
@@ -211,9 +348,11 @@ class BashStage(BaseStage):
211
348
  rs.stderr.encode("utf-8").decode("utf-16")
212
349
  if "\\x00" in rs.stderr
213
350
  else rs.stderr
351
+ ).removesuffix("\n")
352
+ raise StageException(
353
+ f"Subprocess: {err}\nRunning Statement:\n---\n"
354
+ f"```bash\n{bash}\n```"
214
355
  )
215
- logging.error(f"{err}\n\n```bash\n{bash}```")
216
- raise StageException(f"{err}\n\n```bash\n{bash}```")
217
356
  return Result(
218
357
  status=0,
219
358
  context={
@@ -227,6 +366,15 @@ class BashStage(BaseStage):
227
366
  class PyStage(BaseStage):
228
367
  """Python executor stage that running the Python statement that receive
229
368
  globals nad additional variables.
369
+
370
+ Data Validate:
371
+ >>> stage = {
372
+ ... "name": "Python stage execution",
373
+ ... "run": 'print("Hello {x}")',
374
+ ... "vars": {
375
+ ... "x": "BAR",
376
+ ... },
377
+ ... }
230
378
  """
231
379
 
232
380
  run: str = Field(
@@ -239,26 +387,26 @@ class PyStage(BaseStage):
239
387
  ),
240
388
  )
241
389
 
242
- def set_outputs(self, output: DictData, params: DictData) -> DictData:
390
+ def set_outputs(self, output: DictData, to: DictData) -> DictData:
243
391
  """Set an outputs from the Python execution process to an input params.
244
392
 
245
393
  :param output: A output data that want to extract to an output key.
246
- :param params: A context data that want to add output result.
394
+ :param to: A context data that want to add output result.
247
395
  :rtype: DictData
248
396
  """
249
397
  # NOTE: The output will fileter unnecessary keys from locals.
250
398
  _locals: DictData = output["locals"]
251
399
  super().set_outputs(
252
- {k: _locals[k] for k in _locals if k != "__annotations__"},
253
- params=params,
400
+ {k: _locals[k] for k in _locals if k != "__annotations__"}, to=to
254
401
  )
255
402
 
256
403
  # NOTE:
257
404
  # Override value that changing from the globals that pass via exec.
258
405
  _globals: DictData = output["globals"]
259
- params.update({k: _globals[k] for k in params if k in _globals})
260
- return params
406
+ to.update({k: _globals[k] for k in to if k in _globals})
407
+ return to
261
408
 
409
+ @handler_result()
262
410
  def execute(self, params: DictData) -> Result:
263
411
  """Execute the Python statement that pass all globals and input params
264
412
  to globals argument on ``exec`` build-in function.
@@ -266,34 +414,66 @@ class PyStage(BaseStage):
266
414
  :param params: A parameter that want to pass before run any statement.
267
415
  :rtype: Result
268
416
  """
417
+ # NOTE: Replace the run statement that has templating value.
418
+ run: str = param2template(dedent(self.run), params)
419
+
269
420
  # NOTE: create custom globals value that will pass to exec function.
270
421
  _globals: DictData = (
271
422
  globals() | params | param2template(self.vars, params)
272
423
  )
273
424
  _locals: DictData = {}
274
- try:
275
- logging.info(f"[STAGE]: Py-Execute: {uuid.uuid4()}")
276
- exec(param2template(self.run, params), _globals, _locals)
277
- except Exception as err:
278
- raise StageException(
279
- f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
280
- f"{self.run}"
281
- ) from None
425
+
426
+ # NOTE: Start exec the run statement.
427
+ logging.info(f"({self.run_id}) [STAGE]: Py-Execute: {self.name}")
428
+ exec(run, _globals, _locals)
429
+
282
430
  return Result(
283
- status=0,
284
- context={"locals": _locals, "globals": _globals},
431
+ status=0, context={"locals": _locals, "globals": _globals}
285
432
  )
286
433
 
287
434
 
288
435
  @dataclass
289
436
  class HookSearch:
290
- """Hook Search dataclass."""
437
+ """Hook Search dataclass that use for receive regular expression grouping
438
+ dict from searching hook string value.
439
+ """
291
440
 
292
441
  path: str
293
442
  func: str
294
443
  tag: str
295
444
 
296
445
 
446
+ def extract_hook(hook: str) -> Callable[[], TagFunc]:
447
+ """Extract Hook function from string value to hook partial function that
448
+ does run it at runtime.
449
+
450
+ :param hook: A hook value that able to match with Task regex.
451
+ :rtype: Callable[[], TagFunc]
452
+ """
453
+ if not (found := Re.RE_TASK_FMT.search(hook)):
454
+ raise ValueError(
455
+ f"Hook {hook!r} does not match with hook format regex."
456
+ )
457
+
458
+ # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
459
+ hook: HookSearch = HookSearch(**found.groupdict())
460
+
461
+ # NOTE: Registry object should implement on this package only.
462
+ rgt: dict[str, Registry] = make_registry(f"{hook.path}")
463
+ if hook.func not in rgt:
464
+ raise NotImplementedError(
465
+ f"``REGISTER-MODULES.{hook.path}.registries`` does not "
466
+ f"implement registry: {hook.func!r}."
467
+ )
468
+
469
+ if hook.tag not in rgt[hook.func]:
470
+ raise NotImplementedError(
471
+ f"tag: {hook.tag!r} does not found on registry func: "
472
+ f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
473
+ )
474
+ return rgt[hook.func][hook.tag]
475
+
476
+
297
477
  class HookStage(BaseStage):
298
478
  """Hook executor that hook the Python function from registry with tag
299
479
  decorator function in ``utils`` module and run it with input arguments.
@@ -306,7 +486,7 @@ class HookStage(BaseStage):
306
486
  Data Validate:
307
487
  >>> stage = {
308
488
  ... "name": "Task stage execution",
309
- ... "task": "tasks/function-name@tag-name",
489
+ ... "uses": "tasks/function-name@tag-name",
310
490
  ... "args": {
311
491
  ... "FOO": "BAR",
312
492
  ... },
@@ -314,37 +494,15 @@ class HookStage(BaseStage):
314
494
  """
315
495
 
316
496
  uses: str = Field(
317
- description="A pointer that want to load function from registry",
497
+ description="A pointer that want to load function from registry.",
498
+ )
499
+ args: DictData = Field(
500
+ default_factory=dict,
501
+ description="An arguments that want to pass to the hook function.",
502
+ alias="with",
318
503
  )
319
- args: DictData = Field(alias="with")
320
-
321
- @staticmethod
322
- def extract_hook(hook: str) -> Callable[[], TagFunc]:
323
- """Extract Hook string value to hook function.
324
-
325
- :param hook: A hook value that able to match with Task regex.
326
- """
327
- if not (found := Re.RE_TASK_FMT.search(hook)):
328
- raise ValueError("Task does not match with task format regex.")
329
-
330
- # NOTE: Pass the searching hook string to `path`, `func`, and `tag`.
331
- hook: HookSearch = HookSearch(**found.groupdict())
332
-
333
- # NOTE: Registry object should implement on this package only.
334
- rgt: dict[str, Registry] = make_registry(f"{hook.path}")
335
- if hook.func not in rgt:
336
- raise NotImplementedError(
337
- f"``REGISTER-MODULES.{hook.path}.registries`` does not "
338
- f"implement registry: {hook.func!r}."
339
- )
340
-
341
- if hook.tag not in rgt[hook.func]:
342
- raise NotImplementedError(
343
- f"tag: {hook.tag!r} does not found on registry func: "
344
- f"``REGISTER-MODULES.{hook.path}.registries.{hook.func}``"
345
- )
346
- return rgt[hook.func][hook.tag]
347
504
 
505
+ @handler_result()
348
506
  def execute(self, params: DictData) -> Result:
349
507
  """Execute the Hook function that already in the hook registry.
350
508
 
@@ -352,9 +510,8 @@ class HookStage(BaseStage):
352
510
  :type params: DictData
353
511
  :rtype: Result
354
512
  """
355
- t_func: TagFunc = self.extract_hook(param2template(self.uses, params))()
356
- if not callable(t_func):
357
- raise ImportError("Hook caller function does not callable.")
513
+ t_func_hook: str = param2template(self.uses, params)
514
+ t_func: TagFunc = extract_hook(t_func_hook)()
358
515
 
359
516
  # VALIDATE: check input task caller parameters that exists before
360
517
  # calling.
@@ -366,59 +523,68 @@ class HookStage(BaseStage):
366
523
  if ips.parameters[k].default == Parameter.empty
367
524
  ):
368
525
  raise ValueError(
369
- f"Necessary params, ({', '.join(ips.parameters.keys())}), "
526
+ f"Necessary params, ({', '.join(ips.parameters.keys())}, ), "
370
527
  f"does not set to args"
371
528
  )
372
-
373
529
  # NOTE: add '_' prefix if it want to use.
374
530
  for k in ips.parameters:
375
531
  if k.removeprefix("_") in args:
376
532
  args[k] = args.pop(k.removeprefix("_"))
377
533
 
378
- try:
379
- logging.info(f"[STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}")
380
- rs: DictData = t_func(**param2template(args, params))
381
- except Exception as err:
382
- raise StageException(f"{err.__class__.__name__}: {err}") from err
534
+ logging.info(
535
+ f"({self.run_id}) [STAGE]: Hook-Execute: {t_func.name}@{t_func.tag}"
536
+ )
537
+ rs: DictData = t_func(**param2template(args, params))
383
538
 
384
- # VALIDATE: Check the result type from hook function, it should be dict.
539
+ # VALIDATE:
540
+ # Check the result type from hook function, it should be dict.
385
541
  if not isinstance(rs, dict):
386
- raise StageException(
387
- f"Return of hook function: {t_func.name}@{t_func.tag} does not "
388
- f"serialize to result model, you should fix it to `dict` type."
542
+ raise TypeError(
543
+ f"Return type: '{t_func.name}@{t_func.tag}' does not serialize "
544
+ f"to result model, you change return type to `dict`."
389
545
  )
390
546
  return Result(status=0, context=rs)
391
547
 
392
548
 
393
549
  class TriggerStage(BaseStage):
394
- """Trigger Pipeline execution stage that execute another pipeline object."""
550
+ """Trigger Pipeline execution stage that execute another pipeline object.
551
+
552
+ Data Validate:
553
+ >>> stage = {
554
+ ... "name": "Trigger pipeline stage execution",
555
+ ... "trigger": 'pipeline-name-for-loader',
556
+ ... "params": {
557
+ ... "run-date": "2024-08-01",
558
+ ... "source": "src",
559
+ ... },
560
+ ... }
561
+ """
395
562
 
396
563
  trigger: str = Field(description="A trigger pipeline name.")
397
- params: DictData = Field(default_factory=dict)
564
+ params: DictData = Field(
565
+ default_factory=dict,
566
+ description="A parameter that want to pass to pipeline execution.",
567
+ )
398
568
 
569
+ @handler_result("Raise from TriggerStage")
399
570
  def execute(self, params: DictData) -> Result:
400
- """Trigger execution.
571
+ """Trigger pipeline execution.
401
572
 
402
573
  :param params: A parameter data that want to use in this execution.
403
574
  :rtype: Result
404
575
  """
405
- from .exceptions import PipelineException
406
576
  from .pipeline import Pipeline
407
577
 
408
- try:
409
- # NOTE: Loading pipeline object from trigger name.
410
- pipe: Pipeline = Pipeline.from_loader(
411
- name=self.trigger, externals={}
412
- )
413
- rs: Result = pipe.execute(
414
- params=param2template(self.params, params)
415
- )
416
- except PipelineException as err:
417
- _alias_stage: str = self.id or self.name
418
- raise StageException(
419
- f"Trigger Stage: {_alias_stage} get trigger pipeline exception."
420
- ) from err
421
- return rs
578
+ # NOTE: Loading pipeline object from trigger name.
579
+ _trigger: str = param2template(self.trigger, params=params)
580
+
581
+ # NOTE: Set running pipeline ID from running stage ID to external
582
+ # params on Loader object.
583
+ pipe: Pipeline = Pipeline.from_loader(
584
+ name=_trigger, externals={"run_id": self.run_id}
585
+ )
586
+ logging.info(f"({self.run_id}) [STAGE]: Trigger-Execute: {_trigger!r}")
587
+ return pipe.execute(params=param2template(self.params, params))
422
588
 
423
589
 
424
590
  # NOTE: Order of parsing stage data