ddeutil-workflow 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/job.py CHANGED
@@ -3,6 +3,8 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
+ """Job Model that use for keeping stages and node that running its stages.
7
+ """
6
8
  from __future__ import annotations
7
9
 
8
10
  import copy
@@ -15,7 +17,6 @@ from concurrent.futures import (
15
17
  wait,
16
18
  )
17
19
  from functools import lru_cache
18
- from pickle import PickleError
19
20
  from textwrap import dedent
20
21
  from threading import Event
21
22
  from typing import Optional
@@ -61,9 +62,13 @@ __all__: TupleStr = (
61
62
 
62
63
  @freeze_args
63
64
  @lru_cache
64
- def make(matrix, include, exclude) -> list[DictStr]:
65
- """Return List of product of matrix values that already filter with
66
- exclude and add include.
65
+ def make(
66
+ matrix: Matrix,
67
+ include: MatrixInclude,
68
+ exclude: MatrixExclude,
69
+ ) -> list[DictStr]:
70
+ """Make a list of product of matrix values that already filter with
71
+ exclude matrix and add specific matrix with include.
67
72
 
68
73
  :param matrix: A matrix values that want to cross product to possible
69
74
  parallelism values.
@@ -72,11 +77,12 @@ def make(matrix, include, exclude) -> list[DictStr]:
72
77
  :rtype: list[DictStr]
73
78
  """
74
79
  # NOTE: If it does not set matrix, it will return list of an empty dict.
75
- if not (mt := matrix):
80
+ if len(matrix) == 0:
76
81
  return [{}]
77
82
 
83
+ # NOTE: Remove matrix that exists on the exclude.
78
84
  final: list[DictStr] = []
79
- for r in cross_product(matrix=mt):
85
+ for r in cross_product(matrix=matrix):
80
86
  if any(
81
87
  all(r[k] == v for k, v in exclude.items()) for exclude in exclude
82
88
  ):
@@ -85,7 +91,7 @@ def make(matrix, include, exclude) -> list[DictStr]:
85
91
 
86
92
  # NOTE: If it is empty matrix and include, it will return list of an
87
93
  # empty dict.
88
- if not final and not include:
94
+ if len(final) == 0 and not include:
89
95
  return [{}]
90
96
 
91
97
  # NOTE: Add include to generated matrix with exclude list.
@@ -95,16 +101,20 @@ def make(matrix, include, exclude) -> list[DictStr]:
95
101
  # Validate any key in include list should be a subset of some one
96
102
  # in matrix.
97
103
  if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
98
- raise ValueError("Include should have the keys equal to matrix")
104
+ raise ValueError(
105
+ "Include should have the keys that equal to all final matrix."
106
+ )
99
107
 
100
108
  # VALIDATE:
101
- # Validate value of include does not duplicate with generated
102
- # matrix.
109
+ # Validate value of include should not duplicate with generated
110
+ # matrix. So, it will skip if this value already exists.
103
111
  if any(
104
112
  all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
105
113
  ):
106
114
  continue
107
115
  add.append(inc)
116
+
117
+ # NOTE: Merge all matrix together.
108
118
  final.extend(add)
109
119
  return final
110
120
 
@@ -190,7 +200,7 @@ class Strategy(BaseModel):
190
200
 
191
201
 
192
202
  class Job(BaseModel):
193
- """Job Model (group of stages).
203
+ """Job Pydantic model object (group of stages).
194
204
 
195
205
  This job model allow you to use for-loop that call matrix strategy. If
196
206
  you pass matrix mapping and it able to generate, you will see it running
@@ -220,8 +230,7 @@ class Job(BaseModel):
220
230
  id: Optional[str] = Field(
221
231
  default=None,
222
232
  description=(
223
- "A job ID, this value will add from workflow after validation "
224
- "process."
233
+ "A job ID that it will add from workflow after validation process."
225
234
  ),
226
235
  )
227
236
  desc: Optional[str] = Field(
@@ -256,6 +265,9 @@ class Job(BaseModel):
256
265
  def __prepare_keys(cls, values: DictData) -> DictData:
257
266
  """Rename key that use dash to underscore because Python does not
258
267
  support this character exist in any variable name.
268
+
269
+ :param values: A passing value that coming for initialize this object.
270
+ :rtype: DictData
259
271
  """
260
272
  dash2underscore("runs-on", values)
261
273
  return values
@@ -266,8 +278,11 @@ class Job(BaseModel):
266
278
  return dedent(value)
267
279
 
268
280
  @model_validator(mode="after")
269
- def __prepare_running_id(self):
270
- """Prepare the job running ID."""
281
+ def __prepare_running_id(self) -> Self:
282
+ """Prepare the job running ID.
283
+
284
+ :rtype: Self
285
+ """
271
286
  if self.run_id is None:
272
287
  self.run_id = gen_id(self.id or "", unique=True)
273
288
 
@@ -287,17 +302,53 @@ class Job(BaseModel):
287
302
  return self.model_copy(update={"run_id": run_id})
288
303
 
289
304
  def stage(self, stage_id: str) -> Stage:
290
- """Return stage model that match with an input stage ID."""
305
+ """Return stage model that match with an input stage ID.
306
+
307
+ :param stage_id: A stage ID that want to extract from this job.
308
+ :rtype: Stage
309
+ """
291
310
  for stage in self.stages:
292
311
  if stage_id == (stage.id or ""):
293
312
  return stage
294
313
  raise ValueError(f"Stage ID {stage_id} does not exists")
295
314
 
296
- def set_outputs(self, output: DictData) -> DictData:
297
- """Setting output of job execution"""
298
- if len(output) > 1 and self.strategy.is_set():
299
- return {"strategies": output}
300
- return output[next(iter(output))]
315
+ def set_outputs(self, output: DictData, to: DictData) -> DictData:
316
+ """Set an outputs from execution process to the receive context. The
317
+ result from execution will pass to value of ``strategies`` key.
318
+
319
+ For example of setting output method, If you receive execute output
320
+ and want to set on the `to` like;
321
+
322
+ ... (i) output: {'strategy01': bar, 'strategy02': bar}
323
+ ... (ii) to: {'jobs'}
324
+
325
+ The result of the `to` variable will be;
326
+
327
+ ... (iii) to: {
328
+ 'jobs': {
329
+ 'strategies': {
330
+ 'strategy01': bar, 'strategy02': bar
331
+ }
332
+ }
333
+ }
334
+
335
+ :param output: An output context.
336
+ :param to: A context data that want to add output result.
337
+ :rtype: DictData
338
+ """
339
+ if self.id is None:
340
+ raise JobException(
341
+ "This job do not set the ID before setting output."
342
+ )
343
+
344
+ to["jobs"][self.id] = (
345
+ {"strategies": output}
346
+ if self.strategy.is_set()
347
+ # NOTE:
348
+ # This is the best way to get single key from dict.
349
+ else output[next(iter(output))]
350
+ )
351
+ return to
301
352
 
302
353
  def execute_strategy(
303
354
  self,
@@ -305,46 +356,38 @@ class Job(BaseModel):
305
356
  params: DictData,
306
357
  *,
307
358
  event: Event | None = None,
359
+ raise_error: bool = True,
308
360
  ) -> Result:
309
361
  """Job Strategy execution with passing dynamic parameters from the
310
362
  workflow execution to strategy matrix.
311
363
 
312
- This execution is the minimum level execution of job model.
364
+ This execution is the minimum level of execution of this job model.
365
+ It different with ``self.execute`` because this method run only one
366
+ strategy and return with context of this strategy data.
367
+
368
+ :raise JobException: If it has any error from StageException or
369
+ UtilException.
313
370
 
314
371
  :param strategy: A metrix strategy value.
315
372
  :param params: A dynamic parameters.
316
373
  :param event: An manger event that pass to the PoolThreadExecutor.
374
+ :param raise_error: A flag that raise error instead catching to result
375
+ if it get exception from stage execution.
317
376
  :rtype: Result
318
-
319
- :raise JobException: If it has any error from StageException or
320
- UtilException.
321
377
  """
322
- # NOTE: Force stop this execution if event was set from main execution.
323
- if event and event.is_set():
324
- return Result(
325
- status=1,
326
- context={
327
- gen_id(strategy): {
328
- "matrix": strategy,
329
- "stages": {},
330
- "error_message": {
331
- "message": "Process Event stopped before execution"
332
- },
333
- },
334
- },
335
- )
378
+ strategy_id: str = gen_id(strategy)
336
379
 
337
380
  # NOTE: Create strategy execution context and update a matrix and copied
338
381
  # of params. So, the context value will have structure like;
339
- # ---
382
+ #
340
383
  # {
341
384
  # "params": { ... }, <== Current input params
342
385
  # "jobs": { ... }, <== Current input params
343
386
  # "matrix": { ... } <== Current strategy value
344
387
  # }
345
388
  #
346
- context: DictData = params
347
- context.update({"matrix": strategy})
389
+ context: DictData = copy.deepcopy(params)
390
+ context.update({"matrix": strategy, "stages": {}})
348
391
 
349
392
  # IMPORTANT: The stage execution only run sequentially one-by-one.
350
393
  for stage in self.stages:
@@ -355,9 +398,7 @@ class Job(BaseModel):
355
398
  _st_name: str = stage.id or stage.name
356
399
 
357
400
  if stage.is_skipped(params=context):
358
- logger.info(
359
- f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
360
- )
401
+ logger.info(f"({self.run_id}) [JOB]: Skip stage: {_st_name!r}")
361
402
  continue
362
403
 
363
404
  logger.info(
@@ -368,34 +409,23 @@ class Job(BaseModel):
368
409
  if strategy:
369
410
  logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
370
411
 
371
- # NOTE:
372
- # I do not use below syntax because `params` dict be the
373
- # reference memory pointer and it was changed when I action
374
- # anything like update or re-construct this.
375
- #
376
- # ... params |= stage.execute(params=params)
377
- #
378
- # This step will add the stage result to ``stages`` key in
379
- # that stage id. It will have structure like;
380
- # ---
381
- # {
382
- # "params": { ... },
383
- # "jobs": { ... },
384
- # "matrix": { ... },
385
- # "stages": { { "stage-id-1": ... }, ... }
386
- # }
387
- #
412
+ # NOTE: Force stop this execution if event was set from main
413
+ # execution.
388
414
  if event and event.is_set():
389
415
  return Result(
390
416
  status=1,
391
417
  context={
392
- gen_id(strategy): {
418
+ strategy_id: {
393
419
  "matrix": strategy,
394
420
  # NOTE: If job strategy executor use multithreading,
395
421
  # it will not filter function object from context.
396
422
  # ---
397
423
  # "stages": filter_func(context.pop("stages", {})),
398
424
  "stages": context.pop("stages", {}),
425
+ # NOTE: Set the error keys.
426
+ "error": JobException(
427
+ "Process Event stopped before execution"
428
+ ),
399
429
  "error_message": {
400
430
  "message": (
401
431
  "Process Event stopped before execution"
@@ -404,17 +434,40 @@ class Job(BaseModel):
404
434
  },
405
435
  },
406
436
  )
437
+
438
+ # NOTE:
439
+ # I do not use below syntax because `params` dict be the
440
+ # reference memory pointer and it was changed when I action
441
+ # anything like update or re-construct this.
442
+ #
443
+ # ... params |= stage.execute(params=params)
444
+ #
445
+ # This step will add the stage result to ``stages`` key in
446
+ # that stage id. It will have structure like;
447
+ #
448
+ # {
449
+ # "params": { ... },
450
+ # "jobs": { ... },
451
+ # "matrix": { ... },
452
+ # "stages": { { "stage-id-1": ... }, ... }
453
+ # }
454
+ #
407
455
  try:
408
- rs: Result = stage.execute(params=context)
409
- stage.set_outputs(rs.context, to=context)
456
+ stage.set_outputs(
457
+ stage.execute(params=context).context,
458
+ to=context,
459
+ )
410
460
  except (StageException, UtilException) as err:
411
461
  logger.error(
412
462
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
413
463
  )
414
- raise JobException(
415
- f"Get stage execution error: {err.__class__.__name__}: "
416
- f"{err}"
417
- ) from None
464
+ if raise_error:
465
+ raise JobException(
466
+ f"Get stage execution error: {err.__class__.__name__}: "
467
+ f"{err}"
468
+ ) from None
469
+ else:
470
+ raise NotImplementedError() from None
418
471
 
419
472
  # NOTE: Remove new stage object that was created from
420
473
  # ``get_running_id`` method.
@@ -423,12 +476,8 @@ class Job(BaseModel):
423
476
  return Result(
424
477
  status=0,
425
478
  context={
426
- gen_id(strategy): {
479
+ strategy_id: {
427
480
  "matrix": strategy,
428
- # NOTE: (WF001) filter own created function from stages
429
- # value, because it does not dump with pickle when you
430
- # execute with multiprocess.
431
- #
432
481
  "stages": filter_func(context.pop("stages", {})),
433
482
  },
434
483
  },
@@ -436,19 +485,21 @@ class Job(BaseModel):
436
485
 
437
486
  def execute(self, params: DictData | None = None) -> Result:
438
487
  """Job execution with passing dynamic parameters from the workflow
439
- execution. It will generate matrix values at the first step and for-loop
440
- any metrix to all stages dependency.
488
+ execution. It will generate matrix values at the first step and run
489
+ multithread on this metrics to the ``stages`` field of this job.
441
490
 
442
491
  :param params: An input parameters that use on job execution.
443
492
  :rtype: Result
444
493
  """
445
494
  context: DictData = {}
495
+ params: DictData = {} if params is None else params
446
496
 
447
- # NOTE: Normal Job execution.
497
+ # NOTE: Normal Job execution without parallel strategy.
448
498
  if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
449
499
  for strategy in self.strategy.make():
450
500
  rs: Result = self.execute_strategy(
451
- strategy, params=copy.deepcopy(params)
501
+ strategy=strategy,
502
+ params=copy.deepcopy(params),
452
503
  )
453
504
  context.update(rs.context)
454
505
  return Result(
@@ -456,36 +507,6 @@ class Job(BaseModel):
456
507
  context=context,
457
508
  )
458
509
 
459
- # # WARNING: (WF001) I got error that raise when use
460
- # # ``ProcessPoolExecutor``;
461
- # # ---
462
- # # _pickle.PicklingError: Can't pickle
463
- # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
464
- # # on ddeutil.workflow.stage failed
465
- # #
466
- # # from multiprocessing import Event, Manager
467
- # with Manager() as manager:
468
- # event: Event = manager.Event()
469
- #
470
- # # NOTE: Start process pool executor for running strategy executor
471
- # # in parallel mode.
472
- # with ProcessPoolExecutor(
473
- # max_workers=self.strategy.max_parallel
474
- # ) as executor:
475
- # futures: list[Future] = [
476
- # executor.submit(
477
- # self.execute_strategy,
478
- # strategy,
479
- # params=copy.deepcopy(params),
480
- # event=event,
481
- # )
482
- # for strategy in self.strategy.make()
483
- # ]
484
- # if self.strategy.fail_fast:
485
- # rs = self.__catch_fail_fast(event, futures)
486
- # else:
487
- # rs = self.__catch_all_completed(futures)
488
-
489
510
  # NOTE: Create event for cancel executor stop running.
490
511
  event: Event = Event()
491
512
 
@@ -495,53 +516,65 @@ class Job(BaseModel):
495
516
  futures: list[Future] = [
496
517
  executor.submit(
497
518
  self.execute_strategy,
498
- strategy,
499
- params=copy.deepcopy(params),
519
+ strategy=strategy,
520
+ params=params,
500
521
  event=event,
501
522
  )
502
523
  for strategy in self.strategy.make()
503
524
  ]
504
525
 
505
526
  # NOTE: Dynamic catching futures object with fail-fast flag.
506
- if self.strategy.fail_fast:
507
- rs: Result = self.__catch_fail_fast(event, futures)
508
- else:
509
- rs: Result = self.__catch_all_completed(futures)
510
- return Result(
511
- status=0,
512
- context=rs.context,
513
- )
527
+ return (
528
+ self.__catch_fail_fast(event=event, futures=futures)
529
+ if self.strategy.fail_fast
530
+ else self.__catch_all_completed(futures=futures)
531
+ )
514
532
 
515
- def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
533
+ def __catch_fail_fast(
534
+ self,
535
+ event: Event,
536
+ futures: list[Future],
537
+ *,
538
+ timeout: int = 1800,
539
+ result_timeout: int = 60,
540
+ ) -> Result:
516
541
  """Job parallel pool futures catching with fail-fast mode. That will
517
542
  stop all not done futures if it receive the first exception from all
518
543
  running futures.
519
544
 
520
- :param event: An event
545
+ :param event: An event manager instance that able to set stopper on the
546
+ observing thread/process.
521
547
  :param futures: A list of futures.
548
+ :param timeout: A timeout to waiting all futures complete.
549
+ :param result_timeout: A timeout of getting result from the future
550
+ instance when it was running completely.
522
551
  :rtype: Result
523
552
  """
553
+ rs_final: Result = Result()
524
554
  context: DictData = {}
525
- # NOTE: Get results from a collection of tasks with a
526
- # timeout that has the first exception.
555
+ status: int = 0
556
+
557
+ # NOTE: Get results from a collection of tasks with a timeout that has
558
+ # the first exception.
527
559
  done, not_done = wait(
528
- futures, timeout=1800, return_when=FIRST_EXCEPTION
560
+ futures,
561
+ timeout=timeout,
562
+ return_when=FIRST_EXCEPTION,
529
563
  )
530
564
  nd: str = (
531
565
  f", the strategies do not run is {not_done}" if not_done else ""
532
566
  )
533
567
  logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
534
568
 
569
+ # NOTE: Stop all running tasks with setting the event manager and cancel
570
+ # any scheduled tasks.
535
571
  if len(done) != len(futures):
536
-
537
- # NOTE: Stop all running tasks
538
572
  event.set()
539
-
540
- # NOTE: Cancel any scheduled tasks
541
573
  for future in futures:
542
574
  future.cancel()
543
575
 
544
- status: int = 0
576
+ del future
577
+
545
578
  for future in done:
546
579
  if future.exception():
547
580
  status = 1
@@ -551,32 +584,36 @@ class Job(BaseModel):
551
584
  )
552
585
  elif future.cancelled():
553
586
  continue
554
- else:
555
- rs: Result = future.result(timeout=60)
556
- context.update(rs.context)
557
- return Result(status=status, context=context)
558
587
 
559
- def __catch_all_completed(self, futures: list[Future]) -> Result:
588
+ # NOTE: Update the result context to main job context.
589
+ context.update(future.result(timeout=result_timeout).context)
590
+
591
+ del future
592
+
593
+ return rs_final.catch(status=status, context=context)
594
+
595
+ def __catch_all_completed(
596
+ self,
597
+ futures: list[Future],
598
+ *,
599
+ timeout: int = 1800,
600
+ result_timeout: int = 60,
601
+ ) -> Result:
560
602
  """Job parallel pool futures catching with all-completed mode.
561
603
 
562
604
  :param futures: A list of futures that want to catch all completed
563
605
  result.
606
+ :param timeout: A timeout to waiting all futures complete.
607
+ :param result_timeout: A timeout of getting result from the future
608
+ instance when it was running completely.
564
609
  :rtype: Result
565
610
  """
611
+ rs_final: Result = Result()
566
612
  context: DictData = {}
567
613
  status: int = 0
568
- for future in as_completed(futures):
614
+ for future in as_completed(futures, timeout=timeout):
569
615
  try:
570
- rs: Result = future.result(timeout=60)
571
- context.update(rs.context)
572
- except PickleError as err:
573
- # NOTE: (WF001) I do not want to fix this issue because
574
- # it does not make sense and over-engineering with
575
- # this bug fix process.
576
- raise JobException(
577
- f"PyStage that create object on locals does use "
578
- f"parallel in strategy execution;\n\t{err}"
579
- ) from None
616
+ context.update(future.result(timeout=result_timeout).context)
580
617
  except TimeoutError:
581
618
  status = 1
582
619
  logger.warning(
@@ -585,14 +622,13 @@ class Job(BaseModel):
585
622
  )
586
623
  future.cancel()
587
624
  time.sleep(0.1)
588
- if not future.cancelled():
589
- logger.warning(
590
- f"({self.run_id}) [JOB]: Failed to cancel the task."
591
- )
592
- else:
593
- logger.warning(
594
- f"({self.run_id}) [JOB]: Task canceled successfully."
595
- )
625
+
626
+ stmt: str = (
627
+ "Failed to cancel the task."
628
+ if not future.cancelled()
629
+ else "Task canceled successfully."
630
+ )
631
+ logger.warning(f"({self.run_id}) [JOB]: {stmt}")
596
632
  except JobException as err:
597
633
  status = 1
598
634
  logger.error(
@@ -600,4 +636,6 @@ class Job(BaseModel):
600
636
  f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
601
637
  f"{err}"
602
638
  )
603
- return Result(status=status, context=context)
639
+ finally:
640
+ del future
641
+ return rs_final.catch(status=status, context=context)