ddeutil-workflow 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,303 +5,53 @@
5
5
  # ------------------------------------------------------------------------------
6
6
  from __future__ import annotations
7
7
 
8
- import contextlib
9
- import inspect
10
- import itertools
8
+ import copy
11
9
  import logging
12
- import subprocess
13
- import sys
10
+ import os
14
11
  import time
15
- import uuid
16
- from abc import ABC, abstractmethod
17
- from inspect import Parameter
18
- from pathlib import Path
12
+ from concurrent.futures import (
13
+ FIRST_EXCEPTION,
14
+ Future,
15
+ ProcessPoolExecutor,
16
+ ThreadPoolExecutor,
17
+ as_completed,
18
+ wait,
19
+ )
20
+ from datetime import datetime
21
+ from multiprocessing import Event, Manager
22
+ from pickle import PickleError
19
23
  from queue import Queue
20
- from subprocess import CompletedProcess
21
- from typing import Any, Callable, Optional, Union
24
+ from typing import Optional
25
+ from zoneinfo import ZoneInfo
22
26
 
23
27
  from pydantic import BaseModel, Field
24
28
  from pydantic.functional_validators import model_validator
25
29
  from typing_extensions import Self
26
30
 
27
- from .__regex import RegexConf
28
- from .__types import DictData, DictStr
29
- from .exceptions import TaskException
30
- from .loader import Loader, map_params
31
- from .utils import Params, TaskSearch, make_exec, make_registry
32
-
33
-
34
- class BaseStage(BaseModel, ABC):
35
- """Base Stage Model that keep only id and name fields for the stage
36
- metadata. If you want to implement any custom stage, you can use this class
37
- to parent and implement ``self.execute()`` method only.
38
- """
39
-
40
- id: Optional[str] = Field(
41
- default=None,
42
- description=(
43
- "The stage ID that use to keep execution output or getting by job "
44
- "owner."
45
- ),
46
- )
47
- name: str = Field(
48
- description="The stage name that want to logging when start execution."
49
- )
50
-
51
- @abstractmethod
52
- def execute(self, params: DictData) -> DictData:
53
- """Execute abstraction method that action something by sub-model class.
54
- This is important method that make this class is able to be the stage.
55
-
56
- :param params: A parameter data that want to use in this execution.
57
- :rtype: DictData
58
- """
59
- raise NotImplementedError("Stage should implement ``execute`` method.")
60
-
61
- def set_outputs(self, rs: DictData, params: DictData) -> DictData:
62
- """Set an outputs from execution process to an input params.
63
-
64
- :param rs: A result data that want to extract to an output key.
65
- :param params: A context data that want to add output result.
66
- :rtype: DictData
67
- """
68
- if self.id is None:
69
- return params
70
-
71
- if "stages" not in params:
72
- params["stages"] = {}
73
-
74
- params["stages"][self.id] = {"outputs": rs}
75
- return params
76
-
77
-
78
- class EmptyStage(BaseStage):
79
- """Empty stage that do nothing (context equal empty stage) and logging the
80
- name of stage only to stdout.
81
- """
82
-
83
- def execute(self, params: DictData) -> DictData:
84
- """Execution method for the Empty stage that do only logging out to
85
- stdout.
86
-
87
- :param params: A context data that want to add output result. But this
88
- stage does not pass any output.
89
- """
90
- logging.info(f"[STAGE]: Empty-Execute: {self.name!r}")
91
- return params
92
-
93
-
94
- class ShellStage(BaseStage):
95
- """Shell stage that execute bash script on the current OS. That mean if your
96
- current OS is Windows, it will running bash in the WSL.
97
- """
98
-
99
- shell: str = Field(description="A shell statement that want to execute.")
100
- env: DictStr = Field(
101
- default_factory=dict,
102
- description=(
103
- "An environment variable mapping that want to set before execute "
104
- "this shell statement."
105
- ),
106
- )
107
-
108
- @contextlib.contextmanager
109
- def __prepare_shell(self):
110
- """Return context of prepared shell statement that want to execute. This
111
- step will write the `.sh` file before giving this file name to context.
112
- After that, it will auto delete this file automatic.
113
- """
114
- f_name: str = f"{uuid.uuid4()}.sh"
115
- f_shebang: str = "bash" if sys.platform.startswith("win") else "sh"
116
- with open(f"./{f_name}", mode="w", newline="\n") as f:
117
- f.write(f"#!/bin/{f_shebang}\n")
118
-
119
- for k in self.env:
120
- f.write(f"{k}='{self.env[k]}';\n")
121
-
122
- # NOTE: make sure that shell script file does not have `\r` char.
123
- f.write(self.shell.replace("\r\n", "\n"))
124
-
125
- make_exec(f"./{f_name}")
126
-
127
- yield [f_shebang, f_name]
128
-
129
- Path(f_name).unlink()
130
-
131
- def set_outputs(self, rs: CompletedProcess, params: DictData) -> DictData:
132
- """Set outputs to params"""
133
- # NOTE: skipping set outputs of stage execution when id does not set.
134
- if self.id is None:
135
- return params
136
-
137
- if "stages" not in params:
138
- params["stages"] = {}
139
-
140
- params["stages"][self.id] = {
141
- # NOTE: The output will fileter unnecessary keys from ``_locals``.
142
- "outputs": {
143
- "return_code": rs.returncode,
144
- "stdout": rs.stdout.rstrip("\n"),
145
- },
146
- }
147
- return params
148
-
149
- def execute(self, params: DictData) -> DictData:
150
- """Execute the Shell & Powershell statement with the Python build-in
151
- ``subprocess`` package.
152
- """
153
- with self.__prepare_shell() as sh:
154
- logging.info(f"[STAGE]: Shell-Execute: {sh}")
155
- rs: CompletedProcess = subprocess.run(
156
- sh,
157
- shell=False,
158
- capture_output=True,
159
- text=True,
160
- )
161
- if rs.returncode > 0:
162
- err: str = (
163
- rs.stderr.encode("utf-8").decode("utf-16")
164
- if "\\x00" in rs.stderr
165
- else rs.stderr
166
- )
167
- logging.error(f"{err}\nRunning Statement:\n---\n{self.shell}")
168
- raise TaskException(f"{err}\nRunning Statement:\n---\n{self.shell}")
169
- self.set_outputs(rs, params)
170
- return params
171
-
172
-
173
- class PyStage(BaseStage):
174
- """Python executor stage that running the Python statement that receive
175
- globals nad additional variables.
176
- """
177
-
178
- run: str
179
- vars: DictData = Field(default_factory=dict)
180
-
181
- def get_vars(self, params: DictData) -> DictData:
182
- """Return variables"""
183
- rs = self.vars.copy()
184
- for p, v in self.vars.items():
185
- rs[p] = map_params(v, params)
186
- return rs
187
-
188
- def set_outputs(self, rs: DictData, params: DictData) -> DictData:
189
- """Set an outputs from execution process to an input params.
190
-
191
- :param rs: A result data that want to extract to an output key.
192
- :param params: A context data that want to add output result.
193
- :rtype: DictData
194
- """
195
- # NOTE: skipping set outputs of stage execution when id does not set.
196
- if self.id is None:
197
- return params
198
-
199
- if "stages" not in params:
200
- params["stages"] = {}
201
-
202
- params["stages"][self.id] = {
203
- # NOTE: The output will fileter unnecessary keys from ``_locals``.
204
- "outputs": {k: rs[k] for k in rs if k != "__annotations__"},
205
- }
206
- return params
207
-
208
- def execute(self, params: DictData) -> DictData:
209
- """Execute the Python statement that pass all globals and input params
210
- to globals argument on ``exec`` build-in function.
211
-
212
- :param params: A parameter that want to pass before run any statement.
213
- :type params: DictData
214
-
215
- :rtype: DictData
216
- :returns: A parameters from an input that was mapped output if the stage
217
- ID was set.
218
- """
219
- _globals: DictData = globals() | params | self.get_vars(params)
220
- _locals: DictData = {}
221
- try:
222
- exec(map_params(self.run, params), _globals, _locals)
223
- except Exception as err:
224
- raise TaskException(
225
- f"{err.__class__.__name__}: {err}\nRunning Statement:\n---\n"
226
- f"{self.run}"
227
- ) from None
228
-
229
- # NOTE: set outputs from ``_locals`` value from ``exec``.
230
- self.set_outputs(_locals, params)
231
- return params | {k: _globals[k] for k in params if k in _globals}
232
-
233
-
234
- class TaskStage(BaseStage):
235
- """Task executor stage that running the Python function."""
236
-
237
- task: str
238
- args: DictData
239
-
240
- @staticmethod
241
- def extract_task(task: str) -> Callable[[], Callable[[Any], Any]]:
242
- """Extract Task string value to task function."""
243
- if not (found := RegexConf.RE_TASK_FMT.search(task)):
244
- raise ValueError("Task does not match with task format regex.")
245
- tasks: TaskSearch = TaskSearch(**found.groupdict())
246
-
247
- # NOTE: Registry object should implement on this package only.
248
- # TODO: This prefix value to search registry should dynamic with
249
- # config file.
250
- rgt = make_registry(f"ddeutil.workflow.{tasks.path}")
251
- if tasks.func not in rgt:
252
- raise NotImplementedError(
253
- f"ddeutil.workflow.{tasks.path}.registries does not "
254
- f"implement registry: {tasks.func}."
255
- )
256
-
257
- if tasks.tag not in rgt[tasks.func]:
258
- raise NotImplementedError(
259
- f"tag: {tasks.tag} does not found on registry func: "
260
- f"ddeutil.workflow.{tasks.path}.registries."
261
- f"{tasks.func}"
262
- )
263
- return rgt[tasks.func][tasks.tag]
264
-
265
- def execute(self, params: DictData) -> DictData:
266
- """Execute the Task function."""
267
- task_caller = self.extract_task(self.task)()
268
- if not callable(task_caller):
269
- raise ImportError("Task caller function does not callable.")
270
-
271
- # NOTE: check task caller parameters
272
- ips = inspect.signature(task_caller)
273
- if any(
274
- k not in self.args
275
- for k in ips.parameters
276
- if ips.parameters[k].default == Parameter.empty
277
- ):
278
- raise ValueError(
279
- f"necessary parameters, ({', '.join(ips.parameters.keys())}), "
280
- f"does not set to args"
281
- )
282
- try:
283
- rs = task_caller(**map_params(self.args, params))
284
- except Exception as err:
285
- raise TaskException(f"{err.__class__.__name__}: {err}") from err
286
- self.set_outputs(rs, params)
287
- return params
288
-
289
-
290
- # NOTE: Order of parsing stage data
291
- Stage = Union[
292
- PyStage,
293
- ShellStage,
294
- TaskStage,
295
- EmptyStage,
296
- ]
31
+ from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
32
+ from .exceptions import JobException, PipelineException, StageException
33
+ from .loader import Loader
34
+ from .on import On
35
+ from .scheduler import CronRunner
36
+ from .stage import Stage
37
+ from .utils import (
38
+ Param,
39
+ Result,
40
+ cross_product,
41
+ dash2underscore,
42
+ gen_id,
43
+ get_diff_sec,
44
+ )
297
45
 
298
46
 
299
47
  class Strategy(BaseModel):
300
48
  """Strategy Model that will combine a matrix together for running the
301
49
  special job.
302
50
 
303
- Examples:
51
+ Data Validate:
304
52
  >>> strategy = {
53
+ ... 'max-parallel': 1,
54
+ ... 'fail-fast': False,
305
55
  ... 'matrix': {
306
56
  ... 'first': [1, 2, 3],
307
57
  ... 'second': ['foo', 'bar']
@@ -312,35 +62,115 @@ class Strategy(BaseModel):
312
62
  """
313
63
 
314
64
  fail_fast: bool = Field(default=False)
315
- max_parallel: int = Field(default=-1)
316
- matrix: dict[str, Union[list[str], list[int]]] = Field(default_factory=dict)
317
- include: list[dict[str, Union[str, int]]] = Field(default_factory=list)
318
- exclude: list[dict[str, Union[str, int]]] = Field(default_factory=list)
65
+ max_parallel: int = Field(default=1, gt=0)
66
+ matrix: Matrix = Field(default_factory=dict)
67
+ include: MatrixInclude = Field(
68
+ default_factory=list,
69
+ description="A list of additional matrix that want to adds-in.",
70
+ )
71
+ exclude: MatrixExclude = Field(
72
+ default_factory=list,
73
+ description="A list of exclude matrix that want to filter-out.",
74
+ )
319
75
 
320
76
  @model_validator(mode="before")
321
77
  def __prepare_keys(cls, values: DictData) -> DictData:
322
- if "max-parallel" in values:
323
- values["max_parallel"] = values.pop("max-parallel")
324
- if "fail-fast" in values:
325
- values["fail_fast"] = values.pop("fail-fast")
78
+ """Rename key that use dash to underscore because Python does not
79
+ support this character exist in any variable name.
80
+ """
81
+ dash2underscore("max-parallel", values)
82
+ dash2underscore("fail-fast", values)
326
83
  return values
327
84
 
85
+ def make(self) -> list[DictStr]:
86
+ """Return List of product of matrix values that already filter with
87
+ exclude and add include.
88
+
89
+ :rtype: list[DictStr]
90
+ """
91
+ # NOTE: If it does not set matrix, it will return list of an empty dict.
92
+ if not (mt := self.matrix):
93
+ return [{}]
94
+
95
+ final: list[DictStr] = []
96
+ for r in cross_product(matrix=mt):
97
+ if any(
98
+ all(r[k] == v for k, v in exclude.items())
99
+ for exclude in self.exclude
100
+ ):
101
+ continue
102
+ final.append(r)
103
+
104
+ # NOTE: If it is empty matrix and include, it will return list of an
105
+ # empty dict.
106
+ if not final and not self.include:
107
+ return [{}]
108
+
109
+ # NOTE: Add include to generated matrix with exclude list.
110
+ add: list[DictStr] = []
111
+ for include in self.include:
112
+ # VALIDATE:
113
+ # Validate any key in include list should be a subset of some one
114
+ # in matrix.
115
+ if all(not (set(include.keys()) <= set(m.keys())) for m in final):
116
+ raise ValueError("Include should have the keys equal to matrix")
117
+
118
+ # VALIDATE:
119
+ # Validate value of include does not duplicate with generated
120
+ # matrix.
121
+ if any(
122
+ all(include.get(k) == v for k, v in m.items())
123
+ for m in [*final, *add]
124
+ ):
125
+ continue
126
+ add.append(include)
127
+ final.extend(add)
128
+ return final
129
+
328
130
 
329
131
  class Job(BaseModel):
330
- """Job Model that is able to call a group of stages."""
132
+ """Job Model (group of stages).
133
+
134
+ This job model allow you to use for-loop that call matrix strategy. If
135
+ you pass matrix mapping and it able to generate, you will see it running
136
+ with loop of matrix values.
137
+
138
+ Data Validate:
139
+ >>> job = {
140
+ ... "runs-on": None,
141
+ ... "strategy": {},
142
+ ... "needs": [],
143
+ ... "stages": [
144
+ ... {
145
+ ... "name": "Some stage",
146
+ ... "run": "print('Hello World')",
147
+ ... },
148
+ ... ],
149
+ ... }
150
+ """
331
151
 
152
+ name: Optional[str] = Field(default=None)
153
+ desc: Optional[str] = Field(default=None)
332
154
  runs_on: Optional[str] = Field(default=None)
333
- stages: list[Stage] = Field(default_factory=list)
155
+ stages: list[Stage] = Field(
156
+ default_factory=list,
157
+ description="A list of Stage of this job.",
158
+ )
334
159
  needs: list[str] = Field(
335
160
  default_factory=list,
336
161
  description="A list of the job ID that want to run before this job.",
337
162
  )
338
- strategy: Strategy = Field(default_factory=Strategy)
163
+ strategy: Strategy = Field(
164
+ default_factory=Strategy,
165
+ description="A strategy matrix that want to generate.",
166
+ )
339
167
 
340
168
  @model_validator(mode="before")
341
169
  def __prepare_keys(cls, values: DictData) -> DictData:
342
- if "runs-on" in values:
343
- values["runs_on"] = values.pop("runs-on")
170
+ """Rename key that use dash to underscore because Python does not
171
+ support this character exist in any variable name.
172
+ """
173
+ dash2underscore("runs-on", values)
344
174
  return values
345
175
 
346
176
  def stage(self, stage_id: str) -> Stage:
@@ -350,57 +180,212 @@ class Job(BaseModel):
350
180
  return stage
351
181
  raise ValueError(f"Stage ID {stage_id} does not exists")
352
182
 
353
- def make_strategy(self) -> list[DictStr]:
354
- """Return List of combination of matrix values that already filter with
355
- exclude and add include values.
356
- """
357
- if not (mt := self.strategy.matrix):
358
- return [{}]
359
- final: list[DictStr] = []
360
- for r in [
361
- {_k: _v for e in mapped for _k, _v in e.items()}
362
- for mapped in itertools.product(
363
- *[[{k: v} for v in vs] for k, vs in mt.items()]
364
- )
365
- ]:
366
- if any(
367
- all(r[k] == v for k, v in exclude.items())
368
- for exclude in self.strategy.exclude
369
- ):
370
- continue
371
- final.append(r)
183
+ @staticmethod
184
+ def set_outputs(output: DictData) -> DictData:
185
+ if len(output) > 1:
186
+ return {"strategies": output}
372
187
 
373
- if not final:
374
- return [{}]
188
+ return output[next(iter(output))]
375
189
 
376
- for include in self.strategy.include:
377
- if include.keys() != final[0].keys():
378
- raise ValueError("Include should have the keys equal to matrix")
379
- if any(all(include[k] == v for k, v in f.items()) for f in final):
190
+ def strategy_execute(
191
+ self,
192
+ strategy: DictData,
193
+ params: DictData,
194
+ *,
195
+ event: Event | None = None,
196
+ ) -> Result:
197
+ """Strategy execution with passing dynamic parameters from the pipeline
198
+ stage execution.
199
+
200
+ :param strategy:
201
+ :param params:
202
+ :param event: An manger event that pass to the PoolThreadExecutor.
203
+ :rtype: Result
204
+ """
205
+ _stop_rs: Result = Result(
206
+ status=1,
207
+ context={
208
+ gen_id(strategy): {
209
+ "matrix": strategy,
210
+ "stages": {},
211
+ "error": "Event stopped",
212
+ },
213
+ },
214
+ )
215
+ if event and event.is_set():
216
+ return _stop_rs
217
+
218
+ # NOTE: Create strategy execution context and update a matrix and copied
219
+ # of params. So, the context value will have structure like;
220
+ # ---
221
+ # {
222
+ # "params": { ... }, <== Current input params
223
+ # "jobs": { ... }, <== Current input params
224
+ # "matrix": { ... } <== Current strategy value
225
+ # }
226
+ #
227
+ context: DictData = params
228
+ context.update({"matrix": strategy})
229
+
230
+ # IMPORTANT: The stage execution only run sequentially one-by-one.
231
+ for stage in self.stages:
232
+ _st_name: str = stage.id or stage.name
233
+
234
+ if stage.is_skip(params=context):
235
+ logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
380
236
  continue
381
- final.append(include)
382
- return final
237
+ logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
383
238
 
384
- def execute(self, params: DictData | None = None) -> DictData:
385
- """Execute job with passing dynamic parameters from the pipeline."""
386
- for strategy in self.make_strategy():
387
- params.update({"matrix": strategy})
239
+ # NOTE: Logging a matrix that pass on this stage execution.
240
+ if strategy:
241
+ logging.info(f"[...]: Matrix: {strategy}")
388
242
 
389
- # IMPORTANT: The stage execution only run sequentially one-by-one.
390
- for stage in self.stages:
391
- logging.info(
392
- f"[JOB]: Start execute the stage: "
393
- f"{(stage.id if stage.id else stage.name)!r}"
243
+ # NOTE:
244
+ # I do not use below syntax because `params` dict be the
245
+ # reference memory pointer and it was changed when I action
246
+ # anything like update or re-construct this.
247
+ #
248
+ # ... params |= stage.execute(params=params)
249
+ #
250
+ # This step will add the stage result to ``stages`` key in
251
+ # that stage id. It will have structure like;
252
+ # ---
253
+ # {
254
+ # "params": { ... },
255
+ # "jobs": { ... },
256
+ # "matrix": { ... },
257
+ # "stages": { { "stage-id-1": ... }, ... }
258
+ # }
259
+ #
260
+ if event and event.is_set():
261
+ return _stop_rs
262
+ rs: Result = stage.execute(params=context)
263
+ if rs.status == 0:
264
+ stage.set_outputs(rs.context, params=context)
265
+ else:
266
+ raise JobException(
267
+ f"Getting status does not equal zero on stage: "
268
+ f"{stage.name}."
394
269
  )
270
+ # TODO: Filter and warning if it pass any objects to context between
271
+ # strategy job executor like function, etc.
272
+ return Result(
273
+ status=0,
274
+ context={
275
+ gen_id(strategy): {
276
+ "matrix": strategy,
277
+ "stages": context.pop("stages", {}),
278
+ },
279
+ },
280
+ )
395
281
 
396
- # NOTE:
397
- # I do not use below syntax because `params` dict be the
398
- # reference memory pointer and it was changed when I action
399
- # anything like update or re-construct this.
400
- # ... params |= stage.execute(params=params)
401
- stage.execute(params=params)
402
- # TODO: We should not return matrix key to outside
403
- return params
282
+ def execute(self, params: DictData | None = None) -> Result:
283
+ """Job execution with passing dynamic parameters from the pipeline
284
+ execution. It will generate matrix values at the first step and for-loop
285
+ any metrix to all stages dependency.
286
+
287
+ :param params: An input parameters that use on job execution.
288
+ :rtype: Result
289
+ """
290
+ strategy_context: DictData = {}
291
+ rs = Result(context=strategy_context)
292
+
293
+ if self.strategy.max_parallel == 1:
294
+ for strategy in self.strategy.make():
295
+ rs: Result = self.strategy_execute(
296
+ strategy, params=copy.deepcopy(params)
297
+ )
298
+ strategy_context.update(rs.context)
299
+ return rs
300
+
301
+ # FIXME: (WF001) I got error that raise when use
302
+ # ``ProcessPoolExecutor``;
303
+ # ---
304
+ # _pickle.PicklingError: Can't pickle
305
+ # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
306
+ # on ddeutil.workflow.stage failed
307
+ #
308
+ with Manager() as manager:
309
+ event: Event = manager.Event()
310
+
311
+ with ProcessPoolExecutor(
312
+ max_workers=self.strategy.max_parallel
313
+ ) as pool:
314
+ pool_result: list[Future] = [
315
+ pool.submit(
316
+ self.strategy_execute,
317
+ st,
318
+ params=copy.deepcopy(params),
319
+ event=event,
320
+ )
321
+ for st in self.strategy.make()
322
+ ]
323
+ if self.strategy.fail_fast:
324
+
325
+ # NOTE: Get results from a collection of tasks with a
326
+ # timeout that has the first exception.
327
+ done, not_done = wait(
328
+ pool_result, timeout=60, return_when=FIRST_EXCEPTION
329
+ )
330
+ nd: str = (
331
+ f", the strategies do not run is {not_done}"
332
+ if not_done
333
+ else ""
334
+ )
335
+ logging.warning(f"[JOB]: Strategy is set Fail Fast{nd}")
336
+
337
+ # NOTE: Stop all running tasks
338
+ event.set()
339
+
340
+ # NOTE: Cancel any scheduled tasks
341
+ for future in pool_result:
342
+ future.cancel()
343
+
344
+ rs.status = 0
345
+ for f in done:
346
+ if f.exception():
347
+ rs.status = 1
348
+ logging.error(
349
+ f"One task failed with: {f.exception()}, "
350
+ f"shutting down"
351
+ )
352
+ elif f.cancelled():
353
+ continue
354
+ else:
355
+ rs: Result = f.result(timeout=60)
356
+ strategy_context.update(rs.context)
357
+ rs.context = strategy_context
358
+ return rs
359
+
360
+ for pool_rs in as_completed(pool_result):
361
+ try:
362
+ rs: Result = pool_rs.result(timeout=60)
363
+ strategy_context.update(rs.context)
364
+ except PickleError as err:
365
+ # NOTE: I do not want to fix this issue because it does
366
+ # not make sense and over-engineering with this bug
367
+ # fix process.
368
+ raise JobException(
369
+ f"PyStage that create object on locals does use "
370
+ f"parallel in strategy;\n\t{err}"
371
+ ) from None
372
+ except TimeoutError:
373
+ rs.status = 1
374
+ logging.warning("Task is hanging. Attempting to kill.")
375
+ pool_rs.cancel()
376
+ if not pool_rs.cancelled():
377
+ logging.warning("Failed to cancel the task.")
378
+ else:
379
+ logging.warning("Task canceled successfully.")
380
+ except StageException as err:
381
+ rs.status = 1
382
+ logging.warning(
383
+ f"Get stage exception with fail-fast does not set;"
384
+ f"\n\t{err}"
385
+ )
386
+ rs.status = 0
387
+ rs.context = strategy_context
388
+ return rs
404
389
 
405
390
 
406
391
  class Pipeline(BaseModel):
@@ -409,13 +394,73 @@ class Pipeline(BaseModel):
409
394
  coding line to execute it.
410
395
  """
411
396
 
412
- desc: Optional[str] = Field(default=None)
413
- params: dict[str, Params] = Field(default_factory=dict)
414
- on: dict[str, DictStr] = Field(default_factory=dict)
415
- jobs: dict[str, Job]
397
+ name: str = Field(description="A pipeline name.")
398
+ desc: Optional[str] = Field(
399
+ default=None,
400
+ description=(
401
+ "A pipeline description that is able to be string of markdown "
402
+ "content."
403
+ ),
404
+ )
405
+ params: dict[str, Param] = Field(
406
+ default_factory=dict,
407
+ description="A parameters that want to use on this pipeline.",
408
+ )
409
+ on: list[On] = Field(
410
+ default_factory=list,
411
+ description="A list of On instance for this pipeline schedule.",
412
+ )
413
+ jobs: dict[str, Job] = Field(
414
+ default_factory=dict,
415
+ description="A mapping of job ID and job model that already loaded.",
416
+ )
417
+
418
+ @classmethod
419
+ def from_loader(
420
+ cls,
421
+ name: str,
422
+ externals: DictData | None = None,
423
+ ) -> Self:
424
+ """Create Pipeline instance from the Loader object.
425
+
426
+ :param name: A pipeline name that want to pass to Loader object.
427
+ :param externals: An external parameters that want to pass to Loader
428
+ object.
429
+ """
430
+ loader: Loader = Loader(name, externals=(externals or {}))
431
+ loader_data: DictData = copy.deepcopy(loader.data)
432
+
433
+ # NOTE: Add name to loader data
434
+ loader_data["name"] = name.replace(" ", "_")
435
+
436
+ if "jobs" not in loader_data:
437
+ raise ValueError("Config does not set ``jobs`` value")
438
+
439
+ # NOTE: Prepare `on` data
440
+ cls.__bypass_on(loader_data)
441
+ return cls.model_validate(loader_data)
442
+
443
+ @classmethod
444
+ def __bypass_on(cls, data: DictData, externals: DictData | None = None):
445
+ """Bypass the on data to loaded config data."""
446
+ if on := data.pop("on", []):
447
+ if isinstance(on, str):
448
+ on = [on]
449
+ if any(not isinstance(i, (dict, str)) for i in on):
450
+ raise TypeError("The ``on`` key should be list of str or dict")
451
+ data["on"] = [
452
+ (
453
+ Loader(n, externals=(externals or {})).data
454
+ if isinstance(n, str)
455
+ else n
456
+ )
457
+ for n in on
458
+ ]
459
+ return data
416
460
 
417
461
  @model_validator(mode="before")
418
462
  def __prepare_params(cls, values: DictData) -> DictData:
463
+ """Prepare the params key."""
419
464
  # NOTE: Prepare params type if it passing with only type value.
420
465
  if params := values.pop("params", {}):
421
466
  values["params"] = {
@@ -428,23 +473,16 @@ class Pipeline(BaseModel):
428
473
  }
429
474
  return values
430
475
 
431
- @classmethod
432
- def from_loader(
433
- cls,
434
- name: str,
435
- externals: DictData | None = None,
436
- ) -> Self:
437
- """Create Pipeline instance from the Loader object."""
438
- loader: Loader = Loader(name, externals=(externals or {}))
439
- if "jobs" not in loader.data:
440
- raise ValueError("Config does not set ``jobs`` value")
441
- return cls(
442
- jobs=loader.data["jobs"],
443
- params=loader.data["params"],
444
- )
445
-
446
476
  @model_validator(mode="after")
447
- def job_checking_needs(self):
477
+ def __validate_jobs_need(self):
478
+ for job in self.jobs:
479
+ if not_exist := [
480
+ need for need in self.jobs[job].needs if need not in self.jobs
481
+ ]:
482
+ raise PipelineException(
483
+ f"This needed jobs: {not_exist} do not exist in this "
484
+ f"pipeline."
485
+ )
448
486
  return self
449
487
 
450
488
  def job(self, name: str) -> Job:
@@ -460,17 +498,148 @@ class Pipeline(BaseModel):
460
498
  raise ValueError(f"Job {name!r} does not exists")
461
499
  return self.jobs[name]
462
500
 
501
+ def parameterize(self, params: DictData) -> DictData:
502
+ """Prepare parameters before passing to execution process. This method
503
+ will create jobs key to params mapping that will keep any result from
504
+ job execution.
505
+
506
+ :param params: A parameter mapping that receive from pipeline execution.
507
+ :rtype: DictData
508
+ """
509
+ # VALIDATE: Incoming params should have keys that set on this pipeline.
510
+ if check_key := tuple(
511
+ f"{k!r}"
512
+ for k in self.params
513
+ if (k not in params and self.params[k].required)
514
+ ):
515
+ raise PipelineException(
516
+ f"Required Param on this pipeline setting does not set: "
517
+ f"{', '.join(check_key)}."
518
+ )
519
+
520
+ # NOTE: mapping type of param before adding it to params variable.
521
+ return {
522
+ "params": (
523
+ params
524
+ | {
525
+ k: self.params[k].receive(params[k])
526
+ for k in params
527
+ if k in self.params
528
+ }
529
+ ),
530
+ "jobs": {},
531
+ }
532
+
533
+ def release(
534
+ self,
535
+ on: On,
536
+ params: DictData | None = None,
537
+ *,
538
+ waiting_sec: int = 600,
539
+ sleep_interval: int = 10,
540
+ ) -> str:
541
+ """Start running pipeline with the on schedule in period of 30 minutes.
542
+ That mean it will still running at background 30 minutes until the
543
+ schedule matching with its time.
544
+ """
545
+ params: DictData = params or {}
546
+ logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
547
+
548
+ gen: CronRunner = on.generate(datetime.now())
549
+ tz: ZoneInfo = gen.tz
550
+ next_running_time: datetime = gen.next
551
+
552
+ if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
553
+ logging.debug(
554
+ f"[CORE]: {self.name} closely to run >> "
555
+ f"{next_running_time:%Y-%m-%d %H:%M:%S}"
556
+ )
557
+
558
+ # NOTE: Release when the time is nearly to schedule time.
559
+ while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
560
+ time.sleep(sleep_interval)
561
+ logging.debug(
562
+ f"[CORE]: {self.name!r} : Sleep until: {duration}"
563
+ )
564
+
565
+ time.sleep(1)
566
+ rs: Result = self.execute(params=params)
567
+ logging.debug(f"{rs.context}")
568
+
569
+ return f"[CORE]: Start Execute: {self.name}"
570
+ return f"[CORE]: {self.name} does not closely to run yet."
571
+
572
+ def poke(self, params: DictData | None = None):
573
+ """Poke pipeline threading task for executing with its schedules that
574
+ was set on the `on`.
575
+ """
576
+ params: DictData = params or {}
577
+ logging.info(
578
+ f"[CORE]: Start Poking: {self.name!r} :"
579
+ f"{gen_id(self.name, unique=True)}"
580
+ )
581
+ results = []
582
+ with ThreadPoolExecutor(
583
+ max_workers=int(
584
+ os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
585
+ ),
586
+ ) as executor:
587
+ futures: list[Future] = [
588
+ executor.submit(
589
+ self.release,
590
+ on,
591
+ params=params,
592
+ )
593
+ for on in self.on
594
+ ]
595
+ for future in as_completed(futures):
596
+ rs = future.result()
597
+ logging.info(rs)
598
+ results.append(rs)
599
+ return results
600
+
601
+ def job_execute(
602
+ self,
603
+ job: str,
604
+ params: DictData,
605
+ ):
606
+ """Job Executor that use on pipeline executor.
607
+ :param job: A job ID that want to execute.
608
+ :param params: A params that was parameterized from pipeline execution.
609
+ """
610
+ # VALIDATE: check a job ID that exists in this pipeline or not.
611
+ if job not in self.jobs:
612
+ raise PipelineException(
613
+ f"The job ID: {job} does not exists on {self.name!r} pipeline."
614
+ )
615
+
616
+ job_obj: Job = self.jobs[job]
617
+
618
+ rs: Result = job_obj.execute(params=params)
619
+ if rs.status != 0:
620
+ logging.warning(
621
+ f"Getting status does not equal zero on job: {job}."
622
+ )
623
+ return Result(
624
+ status=1, context={job: job_obj.set_outputs(rs.context)}
625
+ )
626
+
627
+ return Result(status=0, context={job: job_obj.set_outputs(rs.context)})
628
+
463
629
  def execute(
464
630
  self,
465
631
  params: DictData | None = None,
466
- time_out: int = 60,
467
- ) -> DictData:
632
+ *,
633
+ timeout: int = 60,
634
+ ) -> Result:
468
635
  """Execute pipeline with passing dynamic parameters to any jobs that
469
636
  included in the pipeline.
470
637
 
471
- :param params: An input parameters that use on pipeline execution.
472
- :param time_out: A time out in second unit that use for limit time of
473
- this pipeline execution.
638
+ :param params: An input parameters that use on pipeline execution that
639
+ will parameterize before using it.
640
+ :param timeout: A pipeline execution time out in second unit that use
641
+ for limit time of execution and waiting job dependency.
642
+ :rtype: Result
474
643
 
475
644
  ---
476
645
 
@@ -483,66 +652,82 @@ class Pipeline(BaseModel):
483
652
  For example, when I want to use the output from previous stage, I
484
653
  can access it with syntax:
485
654
 
486
- ... "<job-name>.stages.<stage-id>.outputs.<key>"
655
+ ... ${job-name}.stages.${stage-id}.outputs.${key}
487
656
 
488
657
  """
658
+ logging.info(
659
+ f"[CORE]: Start Execute: {self.name}:"
660
+ f"{gen_id(self.name, unique=True)}"
661
+ )
489
662
  params: DictData = params or {}
490
- if check_key := tuple(f"{k!r}" for k in self.params if k not in params):
491
- raise ValueError(
492
- f"Parameters that needed on pipeline does not pass: "
493
- f"{', '.join(check_key)}."
494
- )
495
663
 
496
- if any(p not in params for p in self.params if self.params[p].required):
497
- raise ValueError("Required parameter does not pass")
498
-
499
- # NOTE: mapping type of param before adding it to params variable.
500
- params: DictData = {
501
- "params": (
502
- params
503
- | {
504
- k: self.params[k].receive(params[k])
505
- for k in params
506
- if k in self.params
507
- }
508
- ),
509
- "jobs": {},
510
- }
664
+ # NOTE: It should not do anything if it does not have job.
665
+ if not self.jobs:
666
+ logging.warning("[PIPELINE]: This pipeline does not have any jobs")
667
+ return Result(status=0, context=params)
511
668
 
512
669
  # NOTE: create a job queue that keep the job that want to running after
513
670
  # it dependency condition.
514
- jq = Queue()
671
+ jq: Queue = Queue()
515
672
  for job_id in self.jobs:
516
673
  jq.put(job_id)
517
674
 
518
675
  ts: float = time.monotonic()
519
- not_time_out_flag = True
520
-
521
- # IMPORTANT: The job execution can run parallel and waiting by needed.
522
- while not jq.empty() and (
523
- not_time_out_flag := ((time.monotonic() - ts) < time_out)
524
- ):
525
- job_id: str = jq.get()
526
- logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
527
- job: Job = self.jobs[job_id]
528
-
529
- # TODO: Condition on ``needs`` of this job was set. It should create
530
- # multithreading process on this step.
531
- # But, I don't know how to handle changes params between each job
532
- # execution while its use them together.
533
- # ---
534
- # >>> import multiprocessing
535
- # >>> with multiprocessing.Pool(processes=3) as pool:
536
- # ... results = pool.starmap(merge_names, ('', '', ...))
537
- #
538
- if any(params["jobs"].get(need) for need in job.needs):
539
- jq.put(job_id)
676
+ not_time_out_flag: bool = True
677
+
678
+ # NOTE: Create result context that will pass this context to any
679
+ # execution dependency.
680
+ rs: Result = Result(context=self.parameterize(params))
681
+ if (
682
+ worker := int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1"))
683
+ ) > 1:
684
+ # IMPORTANT: The job execution can run parallel and waiting by
685
+ # needed.
686
+ with ThreadPoolExecutor(max_workers=worker) as executor:
687
+ futures: list[Future] = []
688
+ while not jq.empty() and (
689
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
690
+ ):
691
+ job_id: str = jq.get()
692
+ logging.info(
693
+ f"[PIPELINE]: Start execute the job: {job_id!r}"
694
+ )
695
+ job: Job = self.jobs[job_id]
696
+ if any(
697
+ need not in rs.context["jobs"] for need in job.needs
698
+ ):
699
+ jq.put(job_id)
700
+ futures.append(
701
+ executor.submit(
702
+ self.job_execute,
703
+ job_id,
704
+ params=copy.deepcopy(rs.context),
705
+ ),
706
+ )
707
+ for future in as_completed(futures):
708
+ job_rs: Result = future.result(timeout=20)
709
+ rs.context["jobs"].update(job_rs.context)
710
+ else:
711
+ logging.info(
712
+ f"[CORE]: Run {self.name} with non-threading job executor"
713
+ )
714
+ while not jq.empty() and (
715
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
716
+ ):
717
+ job_id: str = jq.get()
718
+ logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
719
+ job: Job = self.jobs[job_id]
720
+ if any(need not in rs.context["jobs"] for need in job.needs):
721
+ jq.put(job_id)
722
+
723
+ job_rs = self.job_execute(
724
+ job_id, params=copy.deepcopy(rs.context)
725
+ )
726
+ rs.context["jobs"].update(job_rs.context)
540
727
 
541
- job.execute(params=params)
542
- params["jobs"][job_id] = {
543
- "stages": params.pop("stages", {}),
544
- "matrix": params.pop("matrix", {}),
545
- }
546
728
  if not not_time_out_flag:
547
- raise RuntimeError("Execution of pipeline was time out")
548
- return params
729
+ logging.warning("Execution of pipeline was time out")
730
+ rs.status = 1
731
+ return rs
732
+ rs.status = 0
733
+ return rs