ddeutil-workflow 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/job.py CHANGED
@@ -3,6 +3,8 @@
3
3
  # Licensed under the MIT License. See LICENSE in the project root for
4
4
  # license information.
5
5
  # ------------------------------------------------------------------------------
6
+ """Job Model that use for keeping stages and node that running its stages.
7
+ """
6
8
  from __future__ import annotations
7
9
 
8
10
  import copy
@@ -14,11 +16,12 @@ from concurrent.futures import (
14
16
  as_completed,
15
17
  wait,
16
18
  )
17
- from pickle import PickleError
19
+ from functools import lru_cache
18
20
  from textwrap import dedent
19
21
  from threading import Event
20
22
  from typing import Optional
21
23
 
24
+ from ddeutil.core import freeze_args
22
25
  from pydantic import BaseModel, Field
23
26
  from pydantic.functional_validators import field_validator, model_validator
24
27
  from typing_extensions import Self
@@ -53,12 +56,79 @@ logger = get_logger("ddeutil.workflow")
53
56
  __all__: TupleStr = (
54
57
  "Strategy",
55
58
  "Job",
59
+ "make",
56
60
  )
57
61
 
58
62
 
63
+ @freeze_args
64
+ @lru_cache
65
+ def make(
66
+ matrix: Matrix,
67
+ include: MatrixInclude,
68
+ exclude: MatrixExclude,
69
+ ) -> list[DictStr]:
70
+ """Make a list of product of matrix values that already filter with
71
+ exclude matrix and add specific matrix with include.
72
+
73
+ :param matrix: A matrix values that want to cross product to possible
74
+ parallelism values.
75
+ :param include: A list of additional matrix that want to adds-in.
76
+ :param exclude: A list of exclude matrix that want to filter-out.
77
+ :rtype: list[DictStr]
78
+ """
79
+ # NOTE: If it does not set matrix, it will return list of an empty dict.
80
+ if len(matrix) == 0:
81
+ return [{}]
82
+
83
+ # NOTE: Remove matrix that exists on the exclude.
84
+ final: list[DictStr] = []
85
+ for r in cross_product(matrix=matrix):
86
+ if any(
87
+ all(r[k] == v for k, v in exclude.items()) for exclude in exclude
88
+ ):
89
+ continue
90
+ final.append(r)
91
+
92
+ # NOTE: If it is empty matrix and include, it will return list of an
93
+ # empty dict.
94
+ if len(final) == 0 and not include:
95
+ return [{}]
96
+
97
+ # NOTE: Add include to generated matrix with exclude list.
98
+ add: list[DictStr] = []
99
+ for inc in include:
100
+ # VALIDATE:
101
+ # Validate any key in include list should be a subset of some one
102
+ # in matrix.
103
+ if all(not (set(inc.keys()) <= set(m.keys())) for m in final):
104
+ raise ValueError(
105
+ "Include should have the keys that equal to all final matrix."
106
+ )
107
+
108
+ # VALIDATE:
109
+ # Validate value of include should not duplicate with generated
110
+ # matrix. So, it will skip if this value already exists.
111
+ if any(
112
+ all(inc.get(k) == v for k, v in m.items()) for m in [*final, *add]
113
+ ):
114
+ continue
115
+ add.append(inc)
116
+
117
+ # NOTE: Merge all matrix together.
118
+ final.extend(add)
119
+ return final
120
+
121
+
59
122
  class Strategy(BaseModel):
60
123
  """Strategy Model that will combine a matrix together for running the
61
- special job.
124
+ special job with combination of matrix data.
125
+
126
+ This model does not be the part of job only because you can use it to
127
+ any model object. The propose of this model is generate metrix result that
128
+ comming from combination logic with any matrix values for running it with
129
+ parallelism.
130
+
131
+ [1, 2, 3] x [a, b] --> [1a], [1b], [2a], [2b], [3a], [3b]
62
132
 
63
133
  Data Validate:
64
134
  >>> strategy = {
@@ -105,13 +175,19 @@ class Strategy(BaseModel):
105
175
  def __prepare_keys(cls, values: DictData) -> DictData:
106
176
  """Rename key that use dash to underscore because Python does not
107
177
  support this character exist in any variable name.
178
+
179
+ :param values: A parsing values to this models
180
+ :rtype: DictData
108
181
  """
109
182
  dash2underscore("max-parallel", values)
110
183
  dash2underscore("fail-fast", values)
111
184
  return values
112
185
 
113
186
  def is_set(self) -> bool:
114
- """Return True if this strategy was set from yaml template."""
187
+ """Return True if this strategy was set from yaml template.
188
+
189
+ :rtype: bool
190
+ """
115
191
  return len(self.matrix) > 0
116
192
 
117
193
  def make(self) -> list[DictStr]:
@@ -120,48 +196,11 @@ class Strategy(BaseModel):
120
196
 
121
197
  :rtype: list[DictStr]
122
198
  """
123
- # NOTE: If it does not set matrix, it will return list of an empty dict.
124
- if not (mt := self.matrix):
125
- return [{}]
126
-
127
- final: list[DictStr] = []
128
- for r in cross_product(matrix=mt):
129
- if any(
130
- all(r[k] == v for k, v in exclude.items())
131
- for exclude in self.exclude
132
- ):
133
- continue
134
- final.append(r)
135
-
136
- # NOTE: If it is empty matrix and include, it will return list of an
137
- # empty dict.
138
- if not final and not self.include:
139
- return [{}]
140
-
141
- # NOTE: Add include to generated matrix with exclude list.
142
- add: list[DictStr] = []
143
- for include in self.include:
144
- # VALIDATE:
145
- # Validate any key in include list should be a subset of some one
146
- # in matrix.
147
- if all(not (set(include.keys()) <= set(m.keys())) for m in final):
148
- raise ValueError("Include should have the keys equal to matrix")
149
-
150
- # VALIDATE:
151
- # Validate value of include does not duplicate with generated
152
- # matrix.
153
- if any(
154
- all(include.get(k) == v for k, v in m.items())
155
- for m in [*final, *add]
156
- ):
157
- continue
158
- add.append(include)
159
- final.extend(add)
160
- return final
199
+ return make(self.matrix, self.include, self.exclude)
161
200
 
162
201
 
163
202
  class Job(BaseModel):
164
- """Job Model (group of stages).
203
+ """Job Pydantic model object (group of stages).
165
204
 
166
205
  This job model allow you to use for-loop that call matrix strategy. If
167
206
  you pass matrix mapping and it able to generate, you will see it running
@@ -191,8 +230,7 @@ class Job(BaseModel):
191
230
  id: Optional[str] = Field(
192
231
  default=None,
193
232
  description=(
194
- "A job ID, this value will add from workflow after validation "
195
- "process."
233
+ "A job ID that it will add from workflow after validation process."
196
234
  ),
197
235
  )
198
236
  desc: Optional[str] = Field(
@@ -227,6 +265,9 @@ class Job(BaseModel):
227
265
  def __prepare_keys(cls, values: DictData) -> DictData:
228
266
  """Rename key that use dash to underscore because Python does not
229
267
  support this character exist in any variable name.
268
+
269
+ :param values: A passing value that coming for initialize this object.
270
+ :rtype: DictData
230
271
  """
231
272
  dash2underscore("runs-on", values)
232
273
  return values
@@ -237,7 +278,11 @@ class Job(BaseModel):
237
278
  return dedent(value)
238
279
 
239
280
  @model_validator(mode="after")
240
- def __prepare_running_id(self):
281
+ def __prepare_running_id(self) -> Self:
282
+ """Prepare the job running ID.
283
+
284
+ :rtype: Self
285
+ """
241
286
  if self.run_id is None:
242
287
  self.run_id = gen_id(self.id or "", unique=True)
243
288
 
@@ -257,17 +302,51 @@ class Job(BaseModel):
257
302
  return self.model_copy(update={"run_id": run_id})
258
303
 
259
304
  def stage(self, stage_id: str) -> Stage:
260
- """Return stage model that match with an input stage ID."""
305
+ """Return stage model that match with an input stage ID.
306
+
307
+ :param stage_id: A stage ID that want to extract from this job.
308
+ :rtype: Stage
309
+ """
261
310
  for stage in self.stages:
262
311
  if stage_id == (stage.id or ""):
263
312
  return stage
264
313
  raise ValueError(f"Stage ID {stage_id} does not exists")
265
314
 
266
- def set_outputs(self, output: DictData) -> DictData:
267
- """Setting output of job execution"""
268
- if len(output) > 1 and self.strategy.is_set():
269
- return {"strategies": output}
270
- return output[next(iter(output))]
315
+ def set_outputs(self, output: DictData, to: DictData) -> DictData:
316
+ """Set an outputs from execution process to the receive context. The
317
+ result from execution will pass to value of ``strategies`` key.
318
+
319
+ For example of setting output method, If you receive execute output
320
+ and want to set on the `to` like;
321
+
322
+ ... (i) output: {'strategy01': bar, 'strategy02': bar}
323
+ ... (ii) to: {}
324
+
325
+ The result of the `to` variable will be;
326
+
327
+ ... (iii) to: {
328
+ 'strategies': {
329
+ 'strategy01': bar, 'strategy02': bar
330
+ }
331
+ }
332
+
333
+ :param output: An output context.
334
+ :param to: A context data that want to add output result.
335
+ :rtype: DictData
336
+ """
337
+ if self.id is None:
338
+ raise JobException(
339
+ "This job do not set the ID before setting output."
340
+ )
341
+
342
+ to[self.id] = (
343
+ {"strategies": output}
344
+ if self.strategy.is_set()
345
+ # NOTE:
346
+ # This is the best way to get single key from dict.
347
+ else output[next(iter(output))]
348
+ )
349
+ return to
271
350
 
272
351
  def execute_strategy(
273
352
  self,
@@ -275,46 +354,38 @@ class Job(BaseModel):
275
354
  params: DictData,
276
355
  *,
277
356
  event: Event | None = None,
357
+ raise_error: bool = True,
278
358
  ) -> Result:
279
359
  """Job Strategy execution with passing dynamic parameters from the
280
360
  workflow execution to strategy matrix.
281
361
 
282
- This execution is the minimum level execution of job model.
362
+ This execution is the minimum level of execution of this job model.
363
+ It different with ``self.execute`` because this method run only one
364
+ strategy and return with context of this strategy data.
365
+
366
+ :raise JobException: If it has any error from StageException or
367
+ UtilException.
283
368
 
284
369
  :param strategy: A metrix strategy value.
285
370
  :param params: A dynamic parameters.
286
371
  :param event: An manger event that pass to the PoolThreadExecutor.
372
+ :param raise_error: A flag that raise error instead catching to result
373
+ if it get exception from stage execution.
287
374
  :rtype: Result
288
-
289
- :raise JobException: If it has any error from StageException or
290
- UtilException.
291
375
  """
292
- # NOTE: Force stop this execution if event was set from main execution.
293
- if event and event.is_set():
294
- return Result(
295
- status=1,
296
- context={
297
- gen_id(strategy): {
298
- "matrix": strategy,
299
- "stages": {},
300
- "error_message": {
301
- "message": "Process Event stopped before execution"
302
- },
303
- },
304
- },
305
- )
376
+ strategy_id: str = gen_id(strategy)
306
377
 
307
378
  # NOTE: Create strategy execution context and update a matrix and copied
308
379
  # of params. So, the context value will have structure like;
309
- # ---
380
+ #
310
381
  # {
311
382
  # "params": { ... }, <== Current input params
312
383
  # "jobs": { ... }, <== Current input params
313
384
  # "matrix": { ... } <== Current strategy value
314
385
  # }
315
386
  #
316
- context: DictData = params
317
- context.update({"matrix": strategy})
387
+ context: DictData = copy.deepcopy(params)
388
+ context.update({"matrix": strategy, "stages": {}})
318
389
 
319
390
  # IMPORTANT: The stage execution only run sequentially one-by-one.
320
391
  for stage in self.stages:
@@ -325,9 +396,7 @@ class Job(BaseModel):
325
396
  _st_name: str = stage.id or stage.name
326
397
 
327
398
  if stage.is_skipped(params=context):
328
- logger.info(
329
- f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
330
- )
399
+ logger.info(f"({self.run_id}) [JOB]: Skip stage: {_st_name!r}")
331
400
  continue
332
401
 
333
402
  logger.info(
@@ -338,34 +407,23 @@ class Job(BaseModel):
338
407
  if strategy:
339
408
  logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
340
409
 
341
- # NOTE:
342
- # I do not use below syntax because `params` dict be the
343
- # reference memory pointer and it was changed when I action
344
- # anything like update or re-construct this.
345
- #
346
- # ... params |= stage.execute(params=params)
347
- #
348
- # This step will add the stage result to ``stages`` key in
349
- # that stage id. It will have structure like;
350
- # ---
351
- # {
352
- # "params": { ... },
353
- # "jobs": { ... },
354
- # "matrix": { ... },
355
- # "stages": { { "stage-id-1": ... }, ... }
356
- # }
357
- #
410
+ # NOTE: Force stop this execution if event was set from main
411
+ # execution.
358
412
  if event and event.is_set():
359
413
  return Result(
360
414
  status=1,
361
415
  context={
362
- gen_id(strategy): {
416
+ strategy_id: {
363
417
  "matrix": strategy,
364
418
  # NOTE: If job strategy executor use multithreading,
365
419
  # it will not filter function object from context.
366
420
  # ---
367
421
  # "stages": filter_func(context.pop("stages", {})),
368
422
  "stages": context.pop("stages", {}),
423
+ # NOTE: Set the error keys.
424
+ "error": JobException(
425
+ "Process Event stopped before execution"
426
+ ),
369
427
  "error_message": {
370
428
  "message": (
371
429
  "Process Event stopped before execution"
@@ -374,17 +432,40 @@ class Job(BaseModel):
374
432
  },
375
433
  },
376
434
  )
435
+
436
+ # NOTE:
437
+ # I do not use below syntax because `params` dict be the
438
+ # reference memory pointer and it was changed when I action
439
+ # anything like update or re-construct this.
440
+ #
441
+ # ... params |= stage.execute(params=params)
442
+ #
443
+ # This step will add the stage result to ``stages`` key in
444
+ # that stage id. It will have structure like;
445
+ #
446
+ # {
447
+ # "params": { ... },
448
+ # "jobs": { ... },
449
+ # "matrix": { ... },
450
+ # "stages": { { "stage-id-1": ... }, ... }
451
+ # }
452
+ #
377
453
  try:
378
- rs: Result = stage.execute(params=context)
379
- stage.set_outputs(rs.context, to=context)
454
+ stage.set_outputs(
455
+ stage.execute(params=context).context,
456
+ to=context,
457
+ )
380
458
  except (StageException, UtilException) as err:
381
459
  logger.error(
382
460
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
383
461
  )
384
- raise JobException(
385
- f"Get stage execution error: {err.__class__.__name__}: "
386
- f"{err}"
387
- ) from None
462
+ if raise_error:
463
+ raise JobException(
464
+ f"Get stage execution error: {err.__class__.__name__}: "
465
+ f"{err}"
466
+ ) from None
467
+ else:
468
+ raise NotImplementedError() from None
388
469
 
389
470
  # NOTE: Remove new stage object that was created from
390
471
  # ``get_running_id`` method.
@@ -393,12 +474,8 @@ class Job(BaseModel):
393
474
  return Result(
394
475
  status=0,
395
476
  context={
396
- gen_id(strategy): {
477
+ strategy_id: {
397
478
  "matrix": strategy,
398
- # NOTE: (WF001) filter own created function from stages
399
- # value, because it does not dump with pickle when you
400
- # execute with multiprocess.
401
- #
402
479
  "stages": filter_func(context.pop("stages", {})),
403
480
  },
404
481
  },
@@ -406,19 +483,21 @@ class Job(BaseModel):
406
483
 
407
484
  def execute(self, params: DictData | None = None) -> Result:
408
485
  """Job execution with passing dynamic parameters from the workflow
409
- execution. It will generate matrix values at the first step and for-loop
410
- any metrix to all stages dependency.
486
+ execution. It will generate matrix values at the first step and run
487
+ multithread on this metrics to the ``stages`` field of this job.
411
488
 
412
489
  :param params: An input parameters that use on job execution.
413
490
  :rtype: Result
414
491
  """
415
492
  context: DictData = {}
493
+ params: DictData = {} if params is None else params
416
494
 
417
- # NOTE: Normal Job execution.
495
+ # NOTE: Normal Job execution without parallel strategy.
418
496
  if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
419
497
  for strategy in self.strategy.make():
420
498
  rs: Result = self.execute_strategy(
421
- strategy, params=copy.deepcopy(params)
499
+ strategy=strategy,
500
+ params=copy.deepcopy(params),
422
501
  )
423
502
  context.update(rs.context)
424
503
  return Result(
@@ -426,36 +505,6 @@ class Job(BaseModel):
426
505
  context=context,
427
506
  )
428
507
 
429
- # # WARNING: (WF001) I got error that raise when use
430
- # # ``ProcessPoolExecutor``;
431
- # # ---
432
- # # _pickle.PicklingError: Can't pickle
433
- # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
434
- # # on ddeutil.workflow.stage failed
435
- # #
436
- # # from multiprocessing import Event, Manager
437
- # with Manager() as manager:
438
- # event: Event = manager.Event()
439
- #
440
- # # NOTE: Start process pool executor for running strategy executor
441
- # # in parallel mode.
442
- # with ProcessPoolExecutor(
443
- # max_workers=self.strategy.max_parallel
444
- # ) as executor:
445
- # futures: list[Future] = [
446
- # executor.submit(
447
- # self.execute_strategy,
448
- # strategy,
449
- # params=copy.deepcopy(params),
450
- # event=event,
451
- # )
452
- # for strategy in self.strategy.make()
453
- # ]
454
- # if self.strategy.fail_fast:
455
- # rs = self.__catch_fail_fast(event, futures)
456
- # else:
457
- # rs = self.__catch_all_completed(futures)
458
-
459
508
  # NOTE: Create event for cancel executor stop running.
460
509
  event: Event = Event()
461
510
 
@@ -465,53 +514,65 @@ class Job(BaseModel):
465
514
  futures: list[Future] = [
466
515
  executor.submit(
467
516
  self.execute_strategy,
468
- strategy,
469
- params=copy.deepcopy(params),
517
+ strategy=strategy,
518
+ params=params,
470
519
  event=event,
471
520
  )
472
521
  for strategy in self.strategy.make()
473
522
  ]
474
523
 
475
524
  # NOTE: Dynamic catching futures object with fail-fast flag.
476
- if self.strategy.fail_fast:
477
- rs: Result = self.__catch_fail_fast(event, futures)
478
- else:
479
- rs: Result = self.__catch_all_completed(futures)
480
- return Result(
481
- status=0,
482
- context=rs.context,
483
- )
525
+ return (
526
+ self.__catch_fail_fast(event=event, futures=futures)
527
+ if self.strategy.fail_fast
528
+ else self.__catch_all_completed(futures=futures)
529
+ )
484
530
 
485
- def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
531
+ def __catch_fail_fast(
532
+ self,
533
+ event: Event,
534
+ futures: list[Future],
535
+ *,
536
+ timeout: int = 1800,
537
+ result_timeout: int = 60,
538
+ ) -> Result:
486
539
  """Job parallel pool futures catching with fail-fast mode. That will
487
540
  stop all not done futures if it receive the first exception from all
488
541
  running futures.
489
542
 
490
- :param event:
543
+ :param event: An event manager instance that able to set stopper on the
544
+ observing thread/process.
491
545
  :param futures: A list of futures.
546
+ :param timeout: A timeout to waiting all futures complete.
547
+ :param result_timeout: A timeout of getting result from the future
548
+ instance when it was running completely.
492
549
  :rtype: Result
493
550
  """
551
+ rs_final: Result = Result()
494
552
  context: DictData = {}
495
- # NOTE: Get results from a collection of tasks with a
496
- # timeout that has the first exception.
553
+ status: int = 0
554
+
555
+ # NOTE: Get results from a collection of tasks with a timeout that has
556
+ # the first exception.
497
557
  done, not_done = wait(
498
- futures, timeout=1800, return_when=FIRST_EXCEPTION
558
+ futures,
559
+ timeout=timeout,
560
+ return_when=FIRST_EXCEPTION,
499
561
  )
500
562
  nd: str = (
501
563
  f", the strategies do not run is {not_done}" if not_done else ""
502
564
  )
503
565
  logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
504
566
 
567
+ # NOTE: Stop all running tasks with setting the event manager and cancel
568
+ # any scheduled tasks.
505
569
  if len(done) != len(futures):
506
-
507
- # NOTE: Stop all running tasks
508
570
  event.set()
509
-
510
- # NOTE: Cancel any scheduled tasks
511
571
  for future in futures:
512
572
  future.cancel()
513
573
 
514
- status: int = 0
574
+ del future
575
+
515
576
  for future in done:
516
577
  if future.exception():
517
578
  status = 1
@@ -521,31 +582,36 @@ class Job(BaseModel):
521
582
  )
522
583
  elif future.cancelled():
523
584
  continue
524
- else:
525
- rs: Result = future.result(timeout=60)
526
- context.update(rs.context)
527
- return Result(status=status, context=context)
528
585
 
529
- def __catch_all_completed(self, futures: list[Future]) -> Result:
586
+ # NOTE: Update the result context to main job context.
587
+ context.update(future.result(timeout=result_timeout).context)
588
+
589
+ del future
590
+
591
+ return rs_final.catch(status=status, context=context)
592
+
593
+ def __catch_all_completed(
594
+ self,
595
+ futures: list[Future],
596
+ *,
597
+ timeout: int = 1800,
598
+ result_timeout: int = 60,
599
+ ) -> Result:
530
600
  """Job parallel pool futures catching with all-completed mode.
531
601
 
532
- :param futures: A list of futures.
602
+ :param futures: A list of futures that want to catch all completed
603
+ result.
604
+ :param timeout: A timeout to waiting all futures complete.
605
+ :param result_timeout: A timeout of getting result from the future
606
+ instance when it was running completely.
533
607
  :rtype: Result
534
608
  """
609
+ rs_final: Result = Result()
535
610
  context: DictData = {}
536
611
  status: int = 0
537
- for future in as_completed(futures):
612
+ for future in as_completed(futures, timeout=timeout):
538
613
  try:
539
- rs: Result = future.result(timeout=60)
540
- context.update(rs.context)
541
- except PickleError as err:
542
- # NOTE: (WF001) I do not want to fix this issue because
543
- # it does not make sense and over-engineering with
544
- # this bug fix process.
545
- raise JobException(
546
- f"PyStage that create object on locals does use "
547
- f"parallel in strategy execution;\n\t{err}"
548
- ) from None
614
+ context.update(future.result(timeout=result_timeout).context)
549
615
  except TimeoutError:
550
616
  status = 1
551
617
  logger.warning(
@@ -554,14 +620,13 @@ class Job(BaseModel):
554
620
  )
555
621
  future.cancel()
556
622
  time.sleep(0.1)
557
- if not future.cancelled():
558
- logger.warning(
559
- f"({self.run_id}) [JOB]: Failed to cancel the task."
560
- )
561
- else:
562
- logger.warning(
563
- f"({self.run_id}) [JOB]: Task canceled successfully."
564
- )
623
+
624
+ stmt: str = (
625
+ "Failed to cancel the task."
626
+ if not future.cancelled()
627
+ else "Task canceled successfully."
628
+ )
629
+ logger.warning(f"({self.run_id}) [JOB]: {stmt}")
565
630
  except JobException as err:
566
631
  status = 1
567
632
  logger.error(
@@ -569,4 +634,6 @@ class Job(BaseModel):
569
634
  f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
570
635
  f"{err}"
571
636
  )
572
- return Result(status=status, context=context)
637
+ finally:
638
+ del future
639
+ return rs_final.catch(status=status, context=context)