ddeutil-workflow 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/job.py CHANGED
@@ -3,6 +3,8 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
+ """Job Model that use for keeping stages and node that running its stages.
7
+ """
6
8
  from __future__ import annotations
7
9
 
8
10
  import copy
@@ -15,7 +17,6 @@ from concurrent.futures import (
15
17
  wait,
16
18
  )
17
19
  from functools import lru_cache
18
- from pickle import PickleError
19
20
  from textwrap import dedent
20
21
  from threading import Event
21
22
  from typing import Optional
@@ -61,9 +62,13 @@ __all__: TupleStr = (
61
62
 
62
63
  @freeze_args
63
64
  @lru_cache
64
- def make(matrix, include, exclude) -> list[DictStr]:
65
- """Return List of product of matrix values that already filter with
66
- exclude and add include.
65
+ def make(
66
+ matrix: Matrix,
67
+ include: MatrixInclude,
68
+ exclude: MatrixExclude,
69
+ ) -> list[DictStr]:
70
+ """Make a list of product of matrix values that already filter with
71
+ exclude matrix and add specific matrix with include.
67
72
 
68
73
  :param matrix: A matrix values that want to cross product to possible
69
74
  parallelism values.
@@ -72,11 +77,12 @@ def make(matrix, include, exclude) -> list[DictStr]:
72
77
  :rtype: list[DictStr]
73
78
  """
74
79
  # NOTE: If it does not set matrix, it will return list of an empty dict.
75
- if not (mt := matrix):
80
+ if len(matrix) == 0:
76
81
  return [{}]
77
82
 
83
+ # NOTE: Remove matrix that exists on the exclude.
78
84
  final: list[DictStr] = []
79
- for r in cross_product(matrix=mt):
85
+ for r in cross_product(matrix=matrix):
80
86
  if any(
81
87
  all(r[k] == v for k, v in exclude.items()) for exclude in exclude
82
88
  ):
@@ -85,7 +91,7 @@ def make(matrix, include, exclude) -> list[DictStr]:
85
91
 
86
92
  # NOTE: If it is empty matrix and include, it will return list of an
87
93
  # empty dict.
88
- if not final and not include:
94
+ if len(final) == 0 and not include:
89
95
  return [{}]
90
96
 
91
97
  # NOTE: Add include to generated matrix with exclude list.
@@ -95,16 +101,20 @@ def make(matrix, include, exclude) -> list[DictStr]:
95
101
  # Validate any key in include list should be a subset of some one
96
102
  # in matrix.
97
103
  if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
98
- raise ValueError("Include should have the keys equal to matrix")
104
+ raise ValueError(
105
+ "Include should have the keys that equal to all final matrix."
106
+ )
99
107
 
100
108
  # VALIDATE:
101
- # Validate value of include does not duplicate with generated
102
- # matrix.
109
+ # Validate value of include should not duplicate with generated
110
+ # matrix. So, it will skip if this value already exists.
103
111
  if any(
104
112
  all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
105
113
  ):
106
114
  continue
107
115
  add.append(inc)
116
+
117
+ # NOTE: Merge all matrix together.
108
118
  final.extend(add)
109
119
  return final
110
120
 
@@ -190,7 +200,7 @@ class Strategy(BaseModel):
190
200
 
191
201
 
192
202
  class Job(BaseModel):
193
- """Job Model (group of stages).
203
+ """Job Pydantic model object (group of stages).
194
204
 
195
205
  This job model allow you to use for-loop that call matrix strategy. If
196
206
  you pass matrix mapping and it able to generate, you will see it running
@@ -220,8 +230,7 @@ class Job(BaseModel):
220
230
  id: Optional[str] = Field(
221
231
  default=None,
222
232
  description=(
223
- "A job ID, this value will add from workflow after validation "
224
- "process."
233
+ "A job ID that it will add from workflow after validation process."
225
234
  ),
226
235
  )
227
236
  desc: Optional[str] = Field(
@@ -256,6 +265,9 @@ class Job(BaseModel):
256
265
  def __prepare_keys(cls, values: DictData) -> DictData:
257
266
  """Rename key that use dash to underscore because Python does not
258
267
  support this character exist in any variable name.
268
+
269
+ :param values: A passing value that coming for initialize this object.
270
+ :rtype: DictData
259
271
  """
260
272
  dash2underscore("runs-on", values)
261
273
  return values
@@ -266,8 +278,11 @@ class Job(BaseModel):
266
278
  return dedent(value)
267
279
 
268
280
  @model_validator(mode="after")
269
- def __prepare_running_id(self):
270
- """Prepare the job running ID."""
281
+ def __prepare_running_id(self) -> Self:
282
+ """Prepare the job running ID.
283
+
284
+ :rtype: Self
285
+ """
271
286
  if self.run_id is None:
272
287
  self.run_id = gen_id(self.id or "", unique=True)
273
288
 
@@ -287,17 +302,51 @@ class Job(BaseModel):
287
302
  return self.model_copy(update={"run_id": run_id})
288
303
 
289
304
  def stage(self, stage_id: str) -> Stage:
290
- """Return stage model that match with an input stage ID."""
305
+ """Return stage model that match with an input stage ID.
306
+
307
+ :param stage_id: A stage ID that want to extract from this job.
308
+ :rtype: Stage
309
+ """
291
310
  for stage in self.stages:
292
311
  if stage_id == (stage.id or ""):
293
312
  return stage
294
313
  raise ValueError(f"Stage ID {stage_id} does not exists")
295
314
 
296
- def set_outputs(self, output: DictData) -> DictData:
297
- """Setting output of job execution"""
298
- if len(output) > 1 and self.strategy.is_set():
299
- return {"strategies": output}
300
- return output[next(iter(output))]
315
+ def set_outputs(self, output: DictData, to: DictData) -> DictData:
316
+ """Set an outputs from execution process to the receive context. The
317
+ result from execution will pass to value of ``strategies`` key.
318
+
319
+ For example of setting output method, If you receive execute output
320
+ and want to set on the `to` like;
321
+
322
+ ... (i) output: {'strategy01': bar, 'strategy02': bar}
323
+ ... (ii) to: {}
324
+
325
+ The result of the `to` variable will be;
326
+
327
+ ... (iii) to: {
328
+ 'strategies': {
329
+ 'strategy01': bar, 'strategy02': bar
330
+ }
331
+ }
332
+
333
+ :param output: An output context.
334
+ :param to: A context data that want to add output result.
335
+ :rtype: DictData
336
+ """
337
+ if self.id is None:
338
+ raise JobException(
339
+ "This job do not set the ID before setting output."
340
+ )
341
+
342
+ to[self.id] = (
343
+ {"strategies": output}
344
+ if self.strategy.is_set()
345
+ # NOTE:
346
+ # This is the best way to get single key from dict.
347
+ else output[next(iter(output))]
348
+ )
349
+ return to
301
350
 
302
351
  def execute_strategy(
303
352
  self,
@@ -305,46 +354,38 @@ class Job(BaseModel):
305
354
  params: DictData,
306
355
  *,
307
356
  event: Event | None = None,
357
+ raise_error: bool = True,
308
358
  ) -> Result:
309
359
  """Job Strategy execution with passing dynamic parameters from the
310
360
  workflow execution to strategy matrix.
311
361
 
312
- This execution is the minimum level execution of job model.
362
+ This execution is the minimum level of execution of this job model.
363
+ It different with ``self.execute`` because this method run only one
364
+ strategy and return with context of this strategy data.
365
+
366
+ :raise JobException: If it has any error from StageException or
367
+ UtilException.
313
368
 
314
369
  :param strategy: A metrix strategy value.
315
370
  :param params: A dynamic parameters.
316
371
  :param event: An manger event that pass to the PoolThreadExecutor.
372
+ :param raise_error: A flag that raise error instead catching to result
373
+ if it get exception from stage execution.
317
374
  :rtype: Result
318
-
319
- :raise JobException: If it has any error from StageException or
320
- UtilException.
321
375
  """
322
- # NOTE: Force stop this execution if event was set from main execution.
323
- if event and event.is_set():
324
- return Result(
325
- status=1,
326
- context={
327
- gen_id(strategy): {
328
- "matrix": strategy,
329
- "stages": {},
330
- "error_message": {
331
- "message": "Process Event stopped before execution"
332
- },
333
- },
334
- },
335
- )
376
+ strategy_id: str = gen_id(strategy)
336
377
 
337
378
  # NOTE: Create strategy execution context and update a matrix and copied
338
379
  # of params. So, the context value will have structure like;
339
- # ---
380
+ #
340
381
  # {
341
382
  # "params": { ... }, <== Current input params
342
383
  # "jobs": { ... }, <== Current input params
343
384
  # "matrix": { ... } <== Current strategy value
344
385
  # }
345
386
  #
346
- context: DictData = params
347
- context.update({"matrix": strategy})
387
+ context: DictData = copy.deepcopy(params)
388
+ context.update({"matrix": strategy, "stages": {}})
348
389
 
349
390
  # IMPORTANT: The stage execution only run sequentially one-by-one.
350
391
  for stage in self.stages:
@@ -355,9 +396,7 @@ class Job(BaseModel):
355
396
  _st_name: str = stage.id or stage.name
356
397
 
357
398
  if stage.is_skipped(params=context):
358
- logger.info(
359
- f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
360
- )
399
+ logger.info(f"({self.run_id}) [JOB]: Skip stage: {_st_name!r}")
361
400
  continue
362
401
 
363
402
  logger.info(
@@ -368,34 +407,23 @@ class Job(BaseModel):
368
407
  if strategy:
369
408
  logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
370
409
 
371
- # NOTE:
372
- # I do not use below syntax because `params` dict be the
373
- # reference memory pointer and it was changed when I action
374
- # anything like update or re-construct this.
375
- #
376
- # ... params |= stage.execute(params=params)
377
- #
378
- # This step will add the stage result to ``stages`` key in
379
- # that stage id. It will have structure like;
380
- # ---
381
- # {
382
- # "params": { ... },
383
- # "jobs": { ... },
384
- # "matrix": { ... },
385
- # "stages": { { "stage-id-1": ... }, ... }
386
- # }
387
- #
410
+ # NOTE: Force stop this execution if event was set from main
411
+ # execution.
388
412
  if event and event.is_set():
389
413
  return Result(
390
414
  status=1,
391
415
  context={
392
- gen_id(strategy): {
416
+ strategy_id: {
393
417
  "matrix": strategy,
394
418
  # NOTE: If job strategy executor use multithreading,
395
419
  # it will not filter function object from context.
396
420
  # ---
397
421
  # "stages": filter_func(context.pop("stages", {})),
398
422
  "stages": context.pop("stages", {}),
423
+ # NOTE: Set the error keys.
424
+ "error": JobException(
425
+ "Process Event stopped before execution"
426
+ ),
399
427
  "error_message": {
400
428
  "message": (
401
429
  "Process Event stopped before execution"
@@ -404,17 +432,40 @@ class Job(BaseModel):
404
432
  },
405
433
  },
406
434
  )
435
+
436
+ # NOTE:
437
+ # I do not use below syntax because `params` dict be the
438
+ # reference memory pointer and it was changed when I action
439
+ # anything like update or re-construct this.
440
+ #
441
+ # ... params |= stage.execute(params=params)
442
+ #
443
+ # This step will add the stage result to ``stages`` key in
444
+ # that stage id. It will have structure like;
445
+ #
446
+ # {
447
+ # "params": { ... },
448
+ # "jobs": { ... },
449
+ # "matrix": { ... },
450
+ # "stages": { { "stage-id-1": ... }, ... }
451
+ # }
452
+ #
407
453
  try:
408
- rs: Result = stage.execute(params=context)
409
- stage.set_outputs(rs.context, to=context)
454
+ stage.set_outputs(
455
+ stage.execute(params=context).context,
456
+ to=context,
457
+ )
410
458
  except (StageException, UtilException) as err:
411
459
  logger.error(
412
460
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
413
461
  )
414
- raise JobException(
415
- f"Get stage execution error: {err.__class__.__name__}: "
416
- f"{err}"
417
- ) from None
462
+ if raise_error:
463
+ raise JobException(
464
+ f"Get stage execution error: {err.__class__.__name__}: "
465
+ f"{err}"
466
+ ) from None
467
+ else:
468
+ raise NotImplementedError() from None
418
469
 
419
470
  # NOTE: Remove new stage object that was created from
420
471
  # ``get_running_id`` method.
@@ -423,12 +474,8 @@ class Job(BaseModel):
423
474
  return Result(
424
475
  status=0,
425
476
  context={
426
- gen_id(strategy): {
477
+ strategy_id: {
427
478
  "matrix": strategy,
428
- # NOTE: (WF001) filter own created function from stages
429
- # value, because it does not dump with pickle when you
430
- # execute with multiprocess.
431
- #
432
479
  "stages": filter_func(context.pop("stages", {})),
433
480
  },
434
481
  },
@@ -436,19 +483,21 @@ class Job(BaseModel):
436
483
 
437
484
  def execute(self, params: DictData | None = None) -> Result:
438
485
  """Job execution with passing dynamic parameters from the workflow
439
- execution. It will generate matrix values at the first step and for-loop
440
- any metrix to all stages dependency.
486
+ execution. It will generate matrix values at the first step and run
487
+ multithread on this metrics to the ``stages`` field of this job.
441
488
 
442
489
  :param params: An input parameters that use on job execution.
443
490
  :rtype: Result
444
491
  """
445
492
  context: DictData = {}
493
+ params: DictData = {} if params is None else params
446
494
 
447
- # NOTE: Normal Job execution.
495
+ # NOTE: Normal Job execution without parallel strategy.
448
496
  if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
449
497
  for strategy in self.strategy.make():
450
498
  rs: Result = self.execute_strategy(
451
- strategy, params=copy.deepcopy(params)
499
+ strategy=strategy,
500
+ params=copy.deepcopy(params),
452
501
  )
453
502
  context.update(rs.context)
454
503
  return Result(
@@ -456,36 +505,6 @@ class Job(BaseModel):
456
505
  context=context,
457
506
  )
458
507
 
459
- # # WARNING: (WF001) I got error that raise when use
460
- # # ``ProcessPoolExecutor``;
461
- # # ---
462
- # # _pickle.PicklingError: Can't pickle
463
- # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
464
- # # on ddeutil.workflow.stage failed
465
- # #
466
- # # from multiprocessing import Event, Manager
467
- # with Manager() as manager:
468
- # event: Event = manager.Event()
469
- #
470
- # # NOTE: Start process pool executor for running strategy executor
471
- # # in parallel mode.
472
- # with ProcessPoolExecutor(
473
- # max_workers=self.strategy.max_parallel
474
- # ) as executor:
475
- # futures: list[Future] = [
476
- # executor.submit(
477
- # self.execute_strategy,
478
- # strategy,
479
- # params=copy.deepcopy(params),
480
- # event=event,
481
- # )
482
- # for strategy in self.strategy.make()
483
- # ]
484
- # if self.strategy.fail_fast:
485
- # rs = self.__catch_fail_fast(event, futures)
486
- # else:
487
- # rs = self.__catch_all_completed(futures)
488
-
489
508
  # NOTE: Create event for cancel executor stop running.
490
509
  event: Event = Event()
491
510
 
@@ -495,53 +514,65 @@ class Job(BaseModel):
495
514
  futures: list[Future] = [
496
515
  executor.submit(
497
516
  self.execute_strategy,
498
- strategy,
499
- params=copy.deepcopy(params),
517
+ strategy=strategy,
518
+ params=params,
500
519
  event=event,
501
520
  )
502
521
  for strategy in self.strategy.make()
503
522
  ]
504
523
 
505
524
  # NOTE: Dynamic catching futures object with fail-fast flag.
506
- if self.strategy.fail_fast:
507
- rs: Result = self.__catch_fail_fast(event, futures)
508
- else:
509
- rs: Result = self.__catch_all_completed(futures)
510
- return Result(
511
- status=0,
512
- context=rs.context,
513
- )
525
+ return (
526
+ self.__catch_fail_fast(event=event, futures=futures)
527
+ if self.strategy.fail_fast
528
+ else self.__catch_all_completed(futures=futures)
529
+ )
514
530
 
515
- def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
531
+ def __catch_fail_fast(
532
+ self,
533
+ event: Event,
534
+ futures: list[Future],
535
+ *,
536
+ timeout: int = 1800,
537
+ result_timeout: int = 60,
538
+ ) -> Result:
516
539
  """Job parallel pool futures catching with fail-fast mode. That will
517
540
  stop all not done futures if it receive the first exception from all
518
541
  running futures.
519
542
 
520
- :param event: An event
543
+ :param event: An event manager instance that able to set stopper on the
544
+ observing thread/process.
521
545
  :param futures: A list of futures.
546
+ :param timeout: A timeout to waiting all futures complete.
547
+ :param result_timeout: A timeout of getting result from the future
548
+ instance when it was running completely.
522
549
  :rtype: Result
523
550
  """
551
+ rs_final: Result = Result()
524
552
  context: DictData = {}
525
- # NOTE: Get results from a collection of tasks with a
526
- # timeout that has the first exception.
553
+ status: int = 0
554
+
555
+ # NOTE: Get results from a collection of tasks with a timeout that has
556
+ # the first exception.
527
557
  done, not_done = wait(
528
- futures, timeout=1800, return_when=FIRST_EXCEPTION
558
+ futures,
559
+ timeout=timeout,
560
+ return_when=FIRST_EXCEPTION,
529
561
  )
530
562
  nd: str = (
531
563
  f", the strategies do not run is {not_done}" if not_done else ""
532
564
  )
533
565
  logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
534
566
 
567
+ # NOTE: Stop all running tasks with setting the event manager and cancel
568
+ # any scheduled tasks.
535
569
  if len(done) != len(futures):
536
-
537
- # NOTE: Stop all running tasks
538
570
  event.set()
539
-
540
- # NOTE: Cancel any scheduled tasks
541
571
  for future in futures:
542
572
  future.cancel()
543
573
 
544
- status: int = 0
574
+ del future
575
+
545
576
  for future in done:
546
577
  if future.exception():
547
578
  status = 1
@@ -551,32 +582,36 @@ class Job(BaseModel):
551
582
  )
552
583
  elif future.cancelled():
553
584
  continue
554
- else:
555
- rs: Result = future.result(timeout=60)
556
- context.update(rs.context)
557
- return Result(status=status, context=context)
558
585
 
559
- def __catch_all_completed(self, futures: list[Future]) -> Result:
586
+ # NOTE: Update the result context to main job context.
587
+ context.update(future.result(timeout=result_timeout).context)
588
+
589
+ del future
590
+
591
+ return rs_final.catch(status=status, context=context)
592
+
593
+ def __catch_all_completed(
594
+ self,
595
+ futures: list[Future],
596
+ *,
597
+ timeout: int = 1800,
598
+ result_timeout: int = 60,
599
+ ) -> Result:
560
600
  """Job parallel pool futures catching with all-completed mode.
561
601
 
562
602
  :param futures: A list of futures that want to catch all completed
563
603
  result.
604
+ :param timeout: A timeout to waiting all futures complete.
605
+ :param result_timeout: A timeout of getting result from the future
606
+ instance when it was running completely.
564
607
  :rtype: Result
565
608
  """
609
+ rs_final: Result = Result()
566
610
  context: DictData = {}
567
611
  status: int = 0
568
- for future in as_completed(futures):
612
+ for future in as_completed(futures, timeout=timeout):
569
613
  try:
570
- rs: Result = future.result(timeout=60)
571
- context.update(rs.context)
572
- except PickleError as err:
573
- # NOTE: (WF001) I do not want to fix this issue because
574
- # it does not make sense and over-engineering with
575
- # this bug fix process.
576
- raise JobException(
577
- f"PyStage that create object on locals does use "
578
- f"parallel in strategy execution;\n\t{err}"
579
- ) from None
614
+ context.update(future.result(timeout=result_timeout).context)
580
615
  except TimeoutError:
581
616
  status = 1
582
617
  logger.warning(
@@ -585,14 +620,13 @@ class Job(BaseModel):
585
620
  )
586
621
  future.cancel()
587
622
  time.sleep(0.1)
588
- if not future.cancelled():
589
- logger.warning(
590
- f"({self.run_id}) [JOB]: Failed to cancel the task."
591
- )
592
- else:
593
- logger.warning(
594
- f"({self.run_id}) [JOB]: Task canceled successfully."
595
- )
623
+
624
+ stmt: str = (
625
+ "Failed to cancel the task."
626
+ if not future.cancelled()
627
+ else "Task canceled successfully."
628
+ )
629
+ logger.warning(f"({self.run_id}) [JOB]: {stmt}")
596
630
  except JobException as err:
597
631
  status = 1
598
632
  logger.error(
@@ -600,4 +634,6 @@ class Job(BaseModel):
600
634
  f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
601
635
  f"{err}"
602
636
  )
603
- return Result(status=status, context=context)
637
+ finally:
638
+ del future
639
+ return rs_final.catch(status=status, context=context)