ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,572 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ from __future__ import annotations
7
+
8
+ import copy
9
+ import time
10
+ from concurrent.futures import (
11
+ FIRST_EXCEPTION,
12
+ Future,
13
+ ThreadPoolExecutor,
14
+ as_completed,
15
+ wait,
16
+ )
17
+ from pickle import PickleError
18
+ from textwrap import dedent
19
+ from threading import Event
20
+ from typing import Optional
21
+
22
+ from pydantic import BaseModel, Field
23
+ from pydantic.functional_validators import field_validator, model_validator
24
+ from typing_extensions import Self
25
+
26
+ from .__types import (
27
+ DictData,
28
+ DictStr,
29
+ Matrix,
30
+ MatrixExclude,
31
+ MatrixInclude,
32
+ TupleStr,
33
+ )
34
+ from .exceptions import (
35
+ JobException,
36
+ StageException,
37
+ UtilException,
38
+ )
39
+ from .log import get_logger
40
+ from .stage import Stage
41
+ from .utils import (
42
+ Result,
43
+ cross_product,
44
+ dash2underscore,
45
+ filter_func,
46
+ gen_id,
47
+ has_template,
48
+ )
49
+
50
+ logger = get_logger("ddeutil.workflow")
51
+
52
+
53
+ __all__: TupleStr = (
54
+ "Strategy",
55
+ "Job",
56
+ )
57
+
58
+
59
+ class Strategy(BaseModel):
60
+ """Strategy Model that will combine a matrix together for running the
61
+ special job.
62
+
63
+ Data Validate:
64
+ >>> strategy = {
65
+ ... 'max-parallel': 1,
66
+ ... 'fail-fast': False,
67
+ ... 'matrix': {
68
+ ... 'first': [1, 2, 3],
69
+ ... 'second': ['foo', 'bar'],
70
+ ... },
71
+ ... 'include': [{'first': 4, 'second': 'foo'}],
72
+ ... 'exclude': [{'first': 1, 'second': 'bar'}],
73
+ ... }
74
+ """
75
+
76
+ fail_fast: bool = Field(
77
+ default=False,
78
+ serialization_alias="fail-fast",
79
+ )
80
+ max_parallel: int = Field(
81
+ default=1,
82
+ gt=0,
83
+ description=(
84
+ "The maximum number of executor thread pool that want to run "
85
+ "parallel"
86
+ ),
87
+ serialization_alias="max-parallel",
88
+ )
89
+ matrix: Matrix = Field(
90
+ default_factory=dict,
91
+ description=(
92
+ "A matrix values that want to cross product to possible strategies."
93
+ ),
94
+ )
95
+ include: MatrixInclude = Field(
96
+ default_factory=list,
97
+ description="A list of additional matrix that want to adds-in.",
98
+ )
99
+ exclude: MatrixExclude = Field(
100
+ default_factory=list,
101
+ description="A list of exclude matrix that want to filter-out.",
102
+ )
103
+
104
+ @model_validator(mode="before")
105
+ def __prepare_keys(cls, values: DictData) -> DictData:
106
+ """Rename key that use dash to underscore because Python does not
107
+ support this character exist in any variable name.
108
+ """
109
+ dash2underscore("max-parallel", values)
110
+ dash2underscore("fail-fast", values)
111
+ return values
112
+
113
+ def is_set(self) -> bool:
114
+ """Return True if this strategy was set from yaml template."""
115
+ return len(self.matrix) > 0
116
+
117
+ def make(self) -> list[DictStr]:
118
+ """Return List of product of matrix values that already filter with
119
+ exclude and add include.
120
+
121
+ :rtype: list[DictStr]
122
+ """
123
+ # NOTE: If it does not set matrix, it will return list of an empty dict.
124
+ if not (mt := self.matrix):
125
+ return [{}]
126
+
127
+ final: list[DictStr] = []
128
+ for r in cross_product(matrix=mt):
129
+ if any(
130
+ all(r[k] == v for k, v in exclude.items())
131
+ for exclude in self.exclude
132
+ ):
133
+ continue
134
+ final.append(r)
135
+
136
+ # NOTE: If it is empty matrix and include, it will return list of an
137
+ # empty dict.
138
+ if not final and not self.include:
139
+ return [{}]
140
+
141
+ # NOTE: Add include to generated matrix with exclude list.
142
+ add: list[DictStr] = []
143
+ for include in self.include:
144
+ # VALIDATE:
145
+ # Validate any key in include list should be a subset of some one
146
+ # in matrix.
147
+ if all(not (set(include.keys()) <= set(m.keys())) for m in final):
148
+ raise ValueError("Include should have the keys equal to matrix")
149
+
150
+ # VALIDATE:
151
+ # Validate value of include does not duplicate with generated
152
+ # matrix.
153
+ if any(
154
+ all(include.get(k) == v for k, v in m.items())
155
+ for m in [*final, *add]
156
+ ):
157
+ continue
158
+ add.append(include)
159
+ final.extend(add)
160
+ return final
161
+
162
+
163
+ class Job(BaseModel):
164
+ """Job Model (group of stages).
165
+
166
+ This job model allow you to use for-loop that call matrix strategy. If
167
+ you pass matrix mapping and it able to generate, you will see it running
168
+ with loop of matrix values.
169
+
170
+ Data Validate:
171
+ >>> job = {
172
+ ... "runs-on": None,
173
+ ... "strategy": {
174
+ ... "max-parallel": 1,
175
+ ... "matrix": {
176
+ ... "first": [1, 2, 3],
177
+ ... "second": ['foo', 'bar'],
178
+ ... },
179
+ ... },
180
+ ... "needs": [],
181
+ ... "stages": [
182
+ ... {
183
+ ... "name": "Some stage",
184
+ ... "run": "print('Hello World')",
185
+ ... },
186
+ ... ...
187
+ ... ],
188
+ ... }
189
+ """
190
+
191
+ id: Optional[str] = Field(
192
+ default=None,
193
+ description=(
194
+ "A job ID, this value will add from workflow after validation "
195
+ "process."
196
+ ),
197
+ )
198
+ desc: Optional[str] = Field(
199
+ default=None,
200
+ description="A job description that can be string of markdown content.",
201
+ )
202
+ runs_on: Optional[str] = Field(
203
+ default=None,
204
+ description="A target executor node for this job use to execution.",
205
+ serialization_alias="runs-on",
206
+ )
207
+ stages: list[Stage] = Field(
208
+ default_factory=list,
209
+ description="A list of Stage of this job.",
210
+ )
211
+ needs: list[str] = Field(
212
+ default_factory=list,
213
+ description="A list of the job ID that want to run before this job.",
214
+ )
215
+ strategy: Strategy = Field(
216
+ default_factory=Strategy,
217
+ description="A strategy matrix that want to generate.",
218
+ )
219
+ run_id: Optional[str] = Field(
220
+ default=None,
221
+ description="A running job ID.",
222
+ repr=False,
223
+ exclude=True,
224
+ )
225
+
226
+ @model_validator(mode="before")
227
+ def __prepare_keys(cls, values: DictData) -> DictData:
228
+ """Rename key that use dash to underscore because Python does not
229
+ support this character exist in any variable name.
230
+ """
231
+ dash2underscore("runs-on", values)
232
+ return values
233
+
234
+ @field_validator("desc", mode="after")
235
+ def ___prepare_desc(cls, value: str) -> str:
236
+ """Prepare description string that was created on a template."""
237
+ return dedent(value)
238
+
239
+ @model_validator(mode="after")
240
+ def __prepare_running_id(self):
241
+ if self.run_id is None:
242
+ self.run_id = gen_id(self.id or "", unique=True)
243
+
244
+ # VALIDATE: Validate job id should not dynamic with params template.
245
+ if has_template(self.id):
246
+ raise ValueError("Job ID should not has any template.")
247
+
248
+ return self
249
+
250
+ def get_running_id(self, run_id: str) -> Self:
251
+ """Return Job model object that changing job running ID with an
252
+ input running ID.
253
+
254
+ :param run_id: A replace job running ID.
255
+ :rtype: Self
256
+ """
257
+ return self.model_copy(update={"run_id": run_id})
258
+
259
+ def stage(self, stage_id: str) -> Stage:
260
+ """Return stage model that match with an input stage ID."""
261
+ for stage in self.stages:
262
+ if stage_id == (stage.id or ""):
263
+ return stage
264
+ raise ValueError(f"Stage ID {stage_id} does not exists")
265
+
266
+ def set_outputs(self, output: DictData) -> DictData:
267
+ """Setting output of job execution"""
268
+ if len(output) > 1 and self.strategy.is_set():
269
+ return {"strategies": output}
270
+ return output[next(iter(output))]
271
+
272
+ def execute_strategy(
273
+ self,
274
+ strategy: DictData,
275
+ params: DictData,
276
+ *,
277
+ event: Event | None = None,
278
+ ) -> Result:
279
+ """Job Strategy execution with passing dynamic parameters from the
280
+ workflow execution to strategy matrix.
281
+
282
+ This execution is the minimum level execution of job model.
283
+
284
+ :param strategy: A metrix strategy value.
285
+ :param params: A dynamic parameters.
286
+ :param event: An manger event that pass to the PoolThreadExecutor.
287
+ :rtype: Result
288
+
289
+ :raise JobException: If it has any error from StageException or
290
+ UtilException.
291
+ """
292
+ # NOTE: Force stop this execution if event was set from main execution.
293
+ if event and event.is_set():
294
+ return Result(
295
+ status=1,
296
+ context={
297
+ gen_id(strategy): {
298
+ "matrix": strategy,
299
+ "stages": {},
300
+ "error_message": {
301
+ "message": "Process Event stopped before execution"
302
+ },
303
+ },
304
+ },
305
+ )
306
+
307
+ # NOTE: Create strategy execution context and update a matrix and copied
308
+ # of params. So, the context value will have structure like;
309
+ # ---
310
+ # {
311
+ # "params": { ... }, <== Current input params
312
+ # "jobs": { ... }, <== Current input params
313
+ # "matrix": { ... } <== Current strategy value
314
+ # }
315
+ #
316
+ context: DictData = params
317
+ context.update({"matrix": strategy})
318
+
319
+ # IMPORTANT: The stage execution only run sequentially one-by-one.
320
+ for stage in self.stages:
321
+
322
+ # IMPORTANT: Change any stage running IDs to this job running ID.
323
+ stage: Stage = stage.get_running_id(self.run_id)
324
+
325
+ _st_name: str = stage.id or stage.name
326
+
327
+ if stage.is_skipped(params=context):
328
+ logger.info(
329
+ f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
330
+ )
331
+ continue
332
+
333
+ logger.info(
334
+ f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
335
+ )
336
+
337
+ # NOTE: Logging a matrix that pass on this stage execution.
338
+ if strategy:
339
+ logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
340
+
341
+ # NOTE:
342
+ # I do not use below syntax because `params` dict be the
343
+ # reference memory pointer and it was changed when I action
344
+ # anything like update or re-construct this.
345
+ #
346
+ # ... params |= stage.execute(params=params)
347
+ #
348
+ # This step will add the stage result to ``stages`` key in
349
+ # that stage id. It will have structure like;
350
+ # ---
351
+ # {
352
+ # "params": { ... },
353
+ # "jobs": { ... },
354
+ # "matrix": { ... },
355
+ # "stages": { { "stage-id-1": ... }, ... }
356
+ # }
357
+ #
358
+ if event and event.is_set():
359
+ return Result(
360
+ status=1,
361
+ context={
362
+ gen_id(strategy): {
363
+ "matrix": strategy,
364
+ # NOTE: If job strategy executor use multithreading,
365
+ # it will not filter function object from context.
366
+ # ---
367
+ # "stages": filter_func(context.pop("stages", {})),
368
+ "stages": context.pop("stages", {}),
369
+ "error_message": {
370
+ "message": (
371
+ "Process Event stopped before execution"
372
+ ),
373
+ },
374
+ },
375
+ },
376
+ )
377
+ try:
378
+ rs: Result = stage.execute(params=context)
379
+ stage.set_outputs(rs.context, to=context)
380
+ except (StageException, UtilException) as err:
381
+ logger.error(
382
+ f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
383
+ )
384
+ raise JobException(
385
+ f"Get stage execution error: {err.__class__.__name__}: "
386
+ f"{err}"
387
+ ) from None
388
+
389
+ # NOTE: Remove new stage object that was created from
390
+ # ``get_running_id`` method.
391
+ del stage
392
+
393
+ return Result(
394
+ status=0,
395
+ context={
396
+ gen_id(strategy): {
397
+ "matrix": strategy,
398
+ # NOTE: (WF001) filter own created function from stages
399
+ # value, because it does not dump with pickle when you
400
+ # execute with multiprocess.
401
+ #
402
+ "stages": filter_func(context.pop("stages", {})),
403
+ },
404
+ },
405
+ )
406
+
407
+ def execute(self, params: DictData | None = None) -> Result:
408
+ """Job execution with passing dynamic parameters from the workflow
409
+ execution. It will generate matrix values at the first step and for-loop
410
+ any metrix to all stages dependency.
411
+
412
+ :param params: An input parameters that use on job execution.
413
+ :rtype: Result
414
+ """
415
+ context: DictData = {}
416
+
417
+ # NOTE: Normal Job execution.
418
+ if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
419
+ for strategy in self.strategy.make():
420
+ rs: Result = self.execute_strategy(
421
+ strategy, params=copy.deepcopy(params)
422
+ )
423
+ context.update(rs.context)
424
+ return Result(
425
+ status=0,
426
+ context=context,
427
+ )
428
+
429
+ # # WARNING: (WF001) I got error that raise when use
430
+ # # ``ProcessPoolExecutor``;
431
+ # # ---
432
+ # # _pickle.PicklingError: Can't pickle
433
+ # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
434
+ # # on ddeutil.workflow.stage failed
435
+ # #
436
+ # # from multiprocessing import Event, Manager
437
+ # with Manager() as manager:
438
+ # event: Event = manager.Event()
439
+ #
440
+ # # NOTE: Start process pool executor for running strategy executor
441
+ # # in parallel mode.
442
+ # with ProcessPoolExecutor(
443
+ # max_workers=self.strategy.max_parallel
444
+ # ) as executor:
445
+ # futures: list[Future] = [
446
+ # executor.submit(
447
+ # self.execute_strategy,
448
+ # strategy,
449
+ # params=copy.deepcopy(params),
450
+ # event=event,
451
+ # )
452
+ # for strategy in self.strategy.make()
453
+ # ]
454
+ # if self.strategy.fail_fast:
455
+ # rs = self.__catch_fail_fast(event, futures)
456
+ # else:
457
+ # rs = self.__catch_all_completed(futures)
458
+
459
+ # NOTE: Create event for cancel executor stop running.
460
+ event: Event = Event()
461
+
462
+ with ThreadPoolExecutor(
463
+ max_workers=self.strategy.max_parallel
464
+ ) as executor:
465
+ futures: list[Future] = [
466
+ executor.submit(
467
+ self.execute_strategy,
468
+ strategy,
469
+ params=copy.deepcopy(params),
470
+ event=event,
471
+ )
472
+ for strategy in self.strategy.make()
473
+ ]
474
+
475
+ # NOTE: Dynamic catching futures object with fail-fast flag.
476
+ if self.strategy.fail_fast:
477
+ rs: Result = self.__catch_fail_fast(event, futures)
478
+ else:
479
+ rs: Result = self.__catch_all_completed(futures)
480
+ return Result(
481
+ status=0,
482
+ context=rs.context,
483
+ )
484
+
485
+ def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
486
+ """Job parallel pool futures catching with fail-fast mode. That will
487
+ stop all not done futures if it receive the first exception from all
488
+ running futures.
489
+
490
+ :param event:
491
+ :param futures: A list of futures.
492
+ :rtype: Result
493
+ """
494
+ context: DictData = {}
495
+ # NOTE: Get results from a collection of tasks with a
496
+ # timeout that has the first exception.
497
+ done, not_done = wait(
498
+ futures, timeout=1800, return_when=FIRST_EXCEPTION
499
+ )
500
+ nd: str = (
501
+ f", the strategies do not run is {not_done}" if not_done else ""
502
+ )
503
+ logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
504
+
505
+ if len(done) != len(futures):
506
+
507
+ # NOTE: Stop all running tasks
508
+ event.set()
509
+
510
+ # NOTE: Cancel any scheduled tasks
511
+ for future in futures:
512
+ future.cancel()
513
+
514
+ status: int = 0
515
+ for future in done:
516
+ if future.exception():
517
+ status = 1
518
+ logger.error(
519
+ f"({self.run_id}) [JOB]: One stage failed with: "
520
+ f"{future.exception()}, shutting down this future."
521
+ )
522
+ elif future.cancelled():
523
+ continue
524
+ else:
525
+ rs: Result = future.result(timeout=60)
526
+ context.update(rs.context)
527
+ return Result(status=status, context=context)
528
+
529
+ def __catch_all_completed(self, futures: list[Future]) -> Result:
530
+ """Job parallel pool futures catching with all-completed mode.
531
+
532
+ :param futures: A list of futures.
533
+ :rtype: Result
534
+ """
535
+ context: DictData = {}
536
+ status: int = 0
537
+ for future in as_completed(futures):
538
+ try:
539
+ rs: Result = future.result(timeout=60)
540
+ context.update(rs.context)
541
+ except PickleError as err:
542
+ # NOTE: (WF001) I do not want to fix this issue because
543
+ # it does not make sense and over-engineering with
544
+ # this bug fix process.
545
+ raise JobException(
546
+ f"PyStage that create object on locals does use "
547
+ f"parallel in strategy execution;\n\t{err}"
548
+ ) from None
549
+ except TimeoutError:
550
+ status = 1
551
+ logger.warning(
552
+ f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
553
+ f"kill."
554
+ )
555
+ future.cancel()
556
+ time.sleep(0.1)
557
+ if not future.cancelled():
558
+ logger.warning(
559
+ f"({self.run_id}) [JOB]: Failed to cancel the task."
560
+ )
561
+ else:
562
+ logger.warning(
563
+ f"({self.run_id}) [JOB]: Task canceled successfully."
564
+ )
565
+ except JobException as err:
566
+ status = 1
567
+ logger.error(
568
+ f"({self.run_id}) [JOB]: Get stage exception with "
569
+ f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
570
+ f"{err}"
571
+ )
572
+ return Result(status=status, context=context)