ddeutil-workflow 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,6 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import copy
9
- import logging
10
9
  import os
11
10
  import time
12
11
  from concurrent.futures import (
@@ -44,11 +43,11 @@ from .exceptions import (
44
43
  StageException,
45
44
  UtilException,
46
45
  )
47
- from .loader import Loader
48
- from .log import FileLog, Log
46
+ from .log import FileLog, Log, get_logger
49
47
  from .on import On
50
48
  from .stage import Stage
51
49
  from .utils import (
50
+ Loader,
52
51
  Param,
53
52
  Result,
54
53
  cross_product,
@@ -61,6 +60,9 @@ from .utils import (
61
60
  param2template,
62
61
  )
63
62
 
63
+ logger = get_logger("ddeutil.workflow")
64
+
65
+
64
66
  __all__: TupleStr = (
65
67
  "Strategy",
66
68
  "Job",
@@ -85,9 +87,25 @@ class Strategy(BaseModel):
85
87
  ... }
86
88
  """
87
89
 
88
- fail_fast: bool = Field(default=False)
89
- max_parallel: int = Field(default=1, gt=0)
90
- matrix: Matrix = Field(default_factory=dict)
90
+ fail_fast: bool = Field(
91
+ default=False,
92
+ serialization_alias="fail-fast",
93
+ )
94
+ max_parallel: int = Field(
95
+ default=1,
96
+ gt=0,
97
+ description=(
98
+ "The maximum number of executor thread pool that want to run "
99
+ "parallel"
100
+ ),
101
+ serialization_alias="max-parallel",
102
+ )
103
+ matrix: Matrix = Field(
104
+ default_factory=dict,
105
+ description=(
106
+ "A matrix values that want to cross product to possible strategies."
107
+ ),
108
+ )
91
109
  include: MatrixInclude = Field(
92
110
  default_factory=list,
93
111
  description="A list of additional matrix that want to adds-in.",
@@ -184,7 +202,13 @@ class Job(BaseModel):
184
202
  ... }
185
203
  """
186
204
 
187
- id: Optional[str] = Field(default=None, description="A job ID.")
205
+ id: Optional[str] = Field(
206
+ default=None,
207
+ description=(
208
+ "A job ID, this value will add from pipeline after validation "
209
+ "process."
210
+ ),
211
+ )
188
212
  desc: Optional[str] = Field(
189
213
  default=None,
190
214
  description="A job description that can be string of markdown content.",
@@ -192,6 +216,7 @@ class Job(BaseModel):
192
216
  runs_on: Optional[str] = Field(
193
217
  default=None,
194
218
  description="A target executor node for this job use to execution.",
219
+ serialization_alias="runs-on",
195
220
  )
196
221
  stages: list[Stage] = Field(
197
222
  default_factory=list,
@@ -209,6 +234,7 @@ class Job(BaseModel):
209
234
  default=None,
210
235
  description="A running job ID.",
211
236
  repr=False,
237
+ exclude=True,
212
238
  )
213
239
 
214
240
  @model_validator(mode="before")
@@ -252,11 +278,12 @@ class Job(BaseModel):
252
278
  raise ValueError(f"Stage ID {stage_id} does not exists")
253
279
 
254
280
  def set_outputs(self, output: DictData) -> DictData:
281
+ """Setting output of job execution"""
255
282
  if len(output) > 1 and self.strategy.is_set():
256
283
  return {"strategies": output}
257
284
  return output[next(iter(output))]
258
285
 
259
- def strategy_execute(
286
+ def execute_strategy(
260
287
  self,
261
288
  strategy: DictData,
262
289
  params: DictData,
@@ -276,6 +303,7 @@ class Job(BaseModel):
276
303
  :raise JobException: If it has any error from StageException or
277
304
  UtilException.
278
305
  """
306
+ # NOTE: Force stop this execution if event was set from main execution.
279
307
  if event and event.is_set():
280
308
  return Result(
281
309
  status=1,
@@ -283,7 +311,7 @@ class Job(BaseModel):
283
311
  gen_id(strategy): {
284
312
  "matrix": strategy,
285
313
  "stages": {},
286
- "error": {
314
+ "error_message": {
287
315
  "message": "Process Event stopped before execution"
288
316
  },
289
317
  },
@@ -311,18 +339,18 @@ class Job(BaseModel):
311
339
  _st_name: str = stage.id or stage.name
312
340
 
313
341
  if stage.is_skipped(params=context):
314
- logging.info(
342
+ logger.info(
315
343
  f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
316
344
  )
317
345
  continue
318
346
 
319
- logging.info(
347
+ logger.info(
320
348
  f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
321
349
  )
322
350
 
323
351
  # NOTE: Logging a matrix that pass on this stage execution.
324
352
  if strategy:
325
- logging.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
353
+ logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
326
354
 
327
355
  # NOTE:
328
356
  # I do not use below syntax because `params` dict be the
@@ -352,7 +380,7 @@ class Job(BaseModel):
352
380
  # ---
353
381
  # "stages": filter_func(context.pop("stages", {})),
354
382
  "stages": context.pop("stages", {}),
355
- "error": {
383
+ "error_message": {
356
384
  "message": (
357
385
  "Process Event stopped before execution"
358
386
  ),
@@ -364,7 +392,7 @@ class Job(BaseModel):
364
392
  rs: Result = stage.execute(params=context)
365
393
  stage.set_outputs(rs.context, to=context)
366
394
  except (StageException, UtilException) as err:
367
- logging.error(
395
+ logger.error(
368
396
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
369
397
  )
370
398
  raise JobException(
@@ -398,18 +426,18 @@ class Job(BaseModel):
398
426
  :param params: An input parameters that use on job execution.
399
427
  :rtype: Result
400
428
  """
401
- strategy_context: DictData = {}
429
+ context: DictData = {}
402
430
 
403
431
  # NOTE: Normal Job execution.
404
432
  if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
405
433
  for strategy in self.strategy.make():
406
- rs: Result = self.strategy_execute(
434
+ rs: Result = self.execute_strategy(
407
435
  strategy, params=copy.deepcopy(params)
408
436
  )
409
- strategy_context.update(rs.context)
437
+ context.update(rs.context)
410
438
  return Result(
411
439
  status=0,
412
- context=strategy_context,
440
+ context=context,
413
441
  )
414
442
 
415
443
  # # WARNING: (WF001) I got error that raise when use
@@ -430,7 +458,7 @@ class Job(BaseModel):
430
458
  # ) as executor:
431
459
  # futures: list[Future] = [
432
460
  # executor.submit(
433
- # self.strategy_execute,
461
+ # self.execute_strategy,
434
462
  # strategy,
435
463
  # params=copy.deepcopy(params),
436
464
  # event=event,
@@ -450,13 +478,15 @@ class Job(BaseModel):
450
478
  ) as executor:
451
479
  futures: list[Future] = [
452
480
  executor.submit(
453
- self.strategy_execute,
481
+ self.execute_strategy,
454
482
  strategy,
455
483
  params=copy.deepcopy(params),
456
484
  event=event,
457
485
  )
458
486
  for strategy in self.strategy.make()
459
487
  ]
488
+
489
+ # NOTE: Dynamic catching futures object with fail-fast flag.
460
490
  if self.strategy.fail_fast:
461
491
  rs: Result = self.__catch_fail_fast(event, futures)
462
492
  else:
@@ -475,7 +505,7 @@ class Job(BaseModel):
475
505
  :param futures: A list of futures.
476
506
  :rtype: Result
477
507
  """
478
- strategy_context: DictData = {}
508
+ context: DictData = {}
479
509
  # NOTE: Get results from a collection of tasks with a
480
510
  # timeout that has the first exception.
481
511
  done, not_done = wait(
@@ -484,20 +514,22 @@ class Job(BaseModel):
484
514
  nd: str = (
485
515
  f", the strategies do not run is {not_done}" if not_done else ""
486
516
  )
487
- logging.debug(f"[JOB]: Strategy is set Fail Fast{nd}")
517
+ logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
488
518
 
489
- # NOTE: Stop all running tasks
490
- event.set()
519
+ if len(done) != len(futures):
491
520
 
492
- # NOTE: Cancel any scheduled tasks
493
- for future in futures:
494
- future.cancel()
521
+ # NOTE: Stop all running tasks
522
+ event.set()
523
+
524
+ # NOTE: Cancel any scheduled tasks
525
+ for future in futures:
526
+ future.cancel()
495
527
 
496
528
  status: int = 0
497
529
  for future in done:
498
530
  if future.exception():
499
531
  status = 1
500
- logging.error(
532
+ logger.error(
501
533
  f"({self.run_id}) [JOB]: One stage failed with: "
502
534
  f"{future.exception()}, shutting down this future."
503
535
  )
@@ -505,11 +537,8 @@ class Job(BaseModel):
505
537
  continue
506
538
  else:
507
539
  rs: Result = future.result(timeout=60)
508
- strategy_context.update(rs.context)
509
- return Result(
510
- status=status,
511
- context=strategy_context,
512
- )
540
+ context.update(rs.context)
541
+ return Result(status=status, context=context)
513
542
 
514
543
  def __catch_all_completed(self, futures: list[Future]) -> Result:
515
544
  """Job parallel pool futures catching with all-completed mode.
@@ -517,12 +546,12 @@ class Job(BaseModel):
517
546
  :param futures: A list of futures.
518
547
  :rtype: Result
519
548
  """
520
- strategy_context: DictData = {}
549
+ context: DictData = {}
521
550
  status: int = 0
522
551
  for future in as_completed(futures):
523
552
  try:
524
553
  rs: Result = future.result(timeout=60)
525
- strategy_context.update(rs.context)
554
+ context.update(rs.context)
526
555
  except PickleError as err:
527
556
  # NOTE: (WF001) I do not want to fix this issue because
528
557
  # it does not make sense and over-engineering with
@@ -533,26 +562,35 @@ class Job(BaseModel):
533
562
  ) from None
534
563
  except TimeoutError:
535
564
  status = 1
536
- logging.warning("Task is hanging. Attempting to kill.")
565
+ logger.warning(
566
+ f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
567
+ f"kill."
568
+ )
537
569
  future.cancel()
570
+ time.sleep(0.1)
538
571
  if not future.cancelled():
539
- logging.warning("Failed to cancel the task.")
572
+ logger.warning(
573
+ f"({self.run_id}) [JOB]: Failed to cancel the task."
574
+ )
540
575
  else:
541
- logging.warning("Task canceled successfully.")
576
+ logger.warning(
577
+ f"({self.run_id}) [JOB]: Task canceled successfully."
578
+ )
542
579
  except JobException as err:
543
580
  status = 1
544
- logging.error(
581
+ logger.error(
545
582
  f"({self.run_id}) [JOB]: Get stage exception with "
546
583
  f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
547
584
  f"{err}"
548
585
  )
549
- return Result(status=status, context=strategy_context)
586
+ return Result(status=status, context=context)
550
587
 
551
588
 
552
589
  class Pipeline(BaseModel):
553
590
  """Pipeline Model this is the main future of this project because it use to
554
- be workflow data for running everywhere that you want. It use lightweight
555
- coding line to execute it.
591
+ be workflow data for running everywhere that you want or using it to
592
+ scheduler task in background. It use lightweight coding line from Pydantic
593
+ Model and enhance execute method on it.
556
594
  """
557
595
 
558
596
  name: str = Field(description="A pipeline name.")
@@ -578,6 +616,7 @@ class Pipeline(BaseModel):
578
616
  default=None,
579
617
  description="A running pipeline ID.",
580
618
  repr=False,
619
+ exclude=True,
581
620
  )
582
621
 
583
622
  @property
@@ -601,17 +640,19 @@ class Pipeline(BaseModel):
601
640
  :rtype: Self
602
641
  """
603
642
  loader: Loader = Loader(name, externals=(externals or {}))
643
+
644
+ # NOTE: Validate the config type match with current connection model
645
+ if loader.type != cls:
646
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
647
+
604
648
  loader_data: DictData = copy.deepcopy(loader.data)
605
649
 
606
650
  # NOTE: Add name to loader data
607
651
  loader_data["name"] = name.replace(" ", "_")
608
652
 
609
- if "jobs" not in loader_data:
610
- raise ValueError("Config does not set ``jobs`` value")
611
-
612
653
  # NOTE: Prepare `on` data
613
654
  cls.__bypass_on(loader_data)
614
- return cls.model_validate(loader_data)
655
+ return cls.model_validate(obj=loader_data)
615
656
 
616
657
  @classmethod
617
658
  def __bypass_on(cls, data: DictData, externals: DictData | None = None):
@@ -742,120 +783,128 @@ class Pipeline(BaseModel):
742
783
  self,
743
784
  on: On,
744
785
  params: DictData,
786
+ queue: list[datetime],
745
787
  *,
746
- waiting_sec: int = 55,
788
+ waiting_sec: int = 60,
747
789
  sleep_interval: int = 15,
748
790
  log: Log = None,
749
- lq: list[datetime] = None,
750
791
  ) -> Result:
751
792
  """Start running pipeline with the on schedule in period of 30 minutes.
752
793
  That mean it will still running at background 30 minutes until the
753
794
  schedule matching with its time.
754
795
 
755
796
  This method allow pipeline use log object to save the execution
756
- result to log destination like file log to local /logs directory.
757
-
797
+ result to log destination like file log to local `/logs` directory.
798
+
799
+ :param on: An on schedule value.
800
+ :param params: A pipeline parameter that pass to execute method.
801
+ :param queue: A list of release time that already running.
802
+ :param waiting_sec: A second period value that allow pipeline execute.
803
+ :param sleep_interval: A second value that want to waiting until time
804
+ to execute.
805
+ :param log: A log object that want to save execution result.
758
806
  :rtype: Result
759
807
  """
760
- delay()
761
808
  log: Log = log or FileLog
762
- current_running_time = datetime.now()
763
- if not (
764
- latest_running_time := log.latest_point(name=self.name, queue=lq)
765
- ) or (
766
- latest_running_time.replace(tzinfo=ZoneInfo(on.tz))
767
- < current_running_time.replace(tzinfo=ZoneInfo(on.tz))
768
- ):
769
- latest_running_time: datetime = current_running_time.replace(
770
- tzinfo=ZoneInfo(on.tz)
771
- )
772
- else:
773
- latest_running_time: datetime = latest_running_time.replace(
774
- tzinfo=ZoneInfo(on.tz)
775
- )
776
-
809
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
777
810
  gen: CronRunner = on.generate(
778
- latest_running_time + timedelta(seconds=1)
811
+ datetime.now(tz=tz).replace(second=0, microsecond=0)
812
+ + timedelta(seconds=1)
779
813
  )
780
- tz: ZoneInfo = gen.tz
814
+ cron_tz: ZoneInfo = gen.tz
781
815
 
782
816
  # NOTE: get next schedule time that generate from now.
783
- next_running_time: datetime = gen.next
817
+ next_time: datetime = gen.next
784
818
 
785
- # NOTE: get next utils it does not logging.
786
- # while log.is_pointed(self.name, next_running_time, queue=lq):
787
- # next_running_time: datetime = gen.next
788
- while log.is_pointed(self.name, next_running_time, queue=lq):
789
- next_running_time: datetime = gen.next
819
+ # NOTE: get next utils it does not logger.
820
+ while log.is_pointed(self.name, next_time, queue=queue):
821
+ next_time: datetime = gen.next
790
822
 
791
- heappush(lq, next_running_time)
823
+ # NOTE: push this next running time to log queue
824
+ heappush(queue, next_time)
792
825
 
793
826
  # VALIDATE: Check the different time between the next schedule time and
794
827
  # now that less than waiting period (second unit).
795
- if get_diff_sec(next_running_time, tz=tz) <= waiting_sec:
796
- logging.debug(
828
+ if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
829
+ logger.debug(
797
830
  f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
798
- f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
831
+ f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
799
832
  )
800
833
 
801
- # NOTE: Release when the time is nearly to schedule time.
802
- while (duration := get_diff_sec(next_running_time, tz=tz)) > (
803
- sleep_interval + 5
804
- ):
805
- logging.debug(
806
- f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
807
- f"Sleep until: {duration}"
808
- )
809
- time.sleep(sleep_interval)
810
-
811
- time.sleep(0.5)
834
+ # NOTE: Remove next datetime from queue.
835
+ queue.remove(next_time)
812
836
 
813
- # NOTE: Release parameter that use to change if params has
814
- # templating.
815
- release_params: DictData = {
816
- "release": {
817
- "logical_date": next_running_time,
837
+ time.sleep(0.15)
838
+ return Result(
839
+ status=0,
840
+ context={
841
+ "params": params,
842
+ "poking": {"skipped": [str(on.cronjob)], "run": []},
818
843
  },
819
- }
820
-
821
- # WARNING: Re-create pipeline object that use new running pipeline
822
- # ID.
823
- pipeline: Self = self.get_running_id(run_id=self.new_run_id)
824
- rs: Result = pipeline.execute(
825
- params=param2template(params, release_params),
826
- )
827
- logging.debug(
828
- f"({pipeline.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
829
- f"End release"
830
844
  )
831
845
 
832
- del pipeline
833
-
834
- rs.set_parent_run_id(self.run_id)
835
- rs_log: Log = log.model_validate(
836
- {
837
- "name": self.name,
838
- "on": str(on.cronjob),
839
- "release": next_running_time,
840
- "context": rs.context,
841
- "parent_run_id": rs.run_id,
842
- "run_id": rs.run_id,
843
- }
844
- )
845
- rs_log.save()
846
- else:
847
- logging.debug(
846
+ logger.debug(
847
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
848
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
849
+ )
850
+
851
+ # NOTE: Release when the time is nearly to schedule time.
852
+ while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
853
+ sleep_interval + 5
854
+ ):
855
+ logger.debug(
848
856
  f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
849
- f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
857
+ f"Sleep until: {duration}"
850
858
  )
851
- rs = Result(status=0, context={"params": params})
859
+ time.sleep(sleep_interval)
860
+
861
+ time.sleep(0.5)
852
862
 
853
- if lq is None:
854
- return rs
863
+ # NOTE: Release parameter that use to change if params has
864
+ # templating.
865
+ release_params: DictData = {
866
+ "release": {
867
+ "logical_date": next_time,
868
+ },
869
+ }
855
870
 
856
- lq.remove(next_running_time)
857
- time.sleep(0.25)
858
- return rs
871
+ # WARNING: Re-create pipeline object that use new running pipeline
872
+ # ID.
873
+ runner: Self = self.get_running_id(run_id=self.new_run_id)
874
+ rs: Result = runner.execute(
875
+ params=param2template(params, release_params),
876
+ )
877
+ logger.debug(
878
+ f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
879
+ f"End release {next_time:%Y-%m-%d %H:%M:%S}"
880
+ )
881
+
882
+ # NOTE: Delete a copied pipeline instance for saving memory.
883
+ del runner
884
+
885
+ rs.set_parent_run_id(self.run_id)
886
+ rs_log: Log = log.model_validate(
887
+ {
888
+ "name": self.name,
889
+ "on": str(on.cronjob),
890
+ "release": next_time,
891
+ "context": rs.context,
892
+ "parent_run_id": rs.run_id,
893
+ "run_id": rs.run_id,
894
+ }
895
+ )
896
+ # NOTE: Saving execution result to destination of the input log object.
897
+ rs_log.save(excluded=None)
898
+
899
+ queue.remove(next_time)
900
+ time.sleep(0.05)
901
+ return Result(
902
+ status=0,
903
+ context={
904
+ "params": params,
905
+ "poking": {"skipped": [], "run": [str(on.cronjob)]},
906
+ },
907
+ )
859
908
 
860
909
  def poke(
861
910
  self,
@@ -871,45 +920,49 @@ class Pipeline(BaseModel):
871
920
  :param log: A log object that want to use on this poking process.
872
921
  :rtype: list[Result]
873
922
  """
874
- params: DictData = params or {}
875
- logging.info(f"({self.run_id}) [CORE]: Start Poking: {self.name!r} ...")
876
- results: list[Result] = []
877
- log_queue: list[datetime] = []
923
+ logger.info(
924
+ f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
925
+ )
878
926
 
879
- # NOTE: If this pipeline does not set schedule, it will return empty
880
- # result.
927
+ # NOTE: If this pipeline does not set the on schedule, it will return
928
+ # empty result.
881
929
  if len(self.on) == 0:
882
- return results
930
+ return []
883
931
 
884
- with ThreadPoolExecutor(
885
- max_workers=int(
886
- os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
887
- ),
888
- ) as executor:
889
- futures: list[Future] = [
890
- executor.submit(
891
- self.release,
892
- on,
893
- params=params,
894
- log=log,
895
- lq=log_queue,
932
+ params: DictData = params or {}
933
+ queue: list[datetime] = []
934
+ results: list[Result] = []
935
+
936
+ wk: int = int(os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING") or "4")
937
+ with ThreadPoolExecutor(max_workers=wk) as executor:
938
+ # TODO: If I want to run infinite loop.
939
+ futures: list[Future] = []
940
+ for on in self.on:
941
+ futures.append(
942
+ executor.submit(
943
+ self.release,
944
+ on,
945
+ params=params,
946
+ log=log,
947
+ queue=queue,
948
+ )
896
949
  )
897
- for on in self.on
898
- ]
950
+ delay()
951
+
952
+ # WARNING: This poking method does not allow to use fail-fast logic
953
+ # to catching parallel execution result.
899
954
  for future in as_completed(futures):
900
- rs: Result = future.result()
901
- logging.info(rs.context.get("params", {}))
902
- results.append(rs)
903
-
904
- if len(log_queue) > 0:
905
- logging.error(
906
- f"({self.run_id}) [CORE]: Log Queue does empty when poke "
907
- f"is finishing."
955
+ results.append(future.result(timeout=60))
956
+
957
+ if len(queue) > 0:
958
+ logger.error(
959
+ f"({self.run_id}) [POKING]: Log Queue does empty when poking "
960
+ f"process was finishing."
908
961
  )
909
962
 
910
963
  return results
911
964
 
912
- def job_execute(
965
+ def execute_job(
913
966
  self,
914
967
  job: str,
915
968
  params: DictData,
@@ -918,6 +971,7 @@ class Pipeline(BaseModel):
918
971
 
919
972
  :param job: A job ID that want to execute.
920
973
  :param params: A params that was parameterized from pipeline execution.
974
+ :rtype: Result
921
975
  """
922
976
  # VALIDATE: check a job ID that exists in this pipeline or not.
923
977
  if job not in self.jobs:
@@ -925,7 +979,7 @@ class Pipeline(BaseModel):
925
979
  f"The job ID: {job} does not exists on {self.name!r} pipeline."
926
980
  )
927
981
  try:
928
- logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
982
+ logger.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
929
983
 
930
984
  # IMPORTANT:
931
985
  # Change any job running IDs to this pipeline running ID.
@@ -933,10 +987,8 @@ class Pipeline(BaseModel):
933
987
  j_rs: Result = job_obj.execute(params=params)
934
988
 
935
989
  except JobException as err:
936
- raise PipelineException(
937
- f"The job ID: {job} get error: {err.__class__.__name__}:"
938
- f"\n{err}"
939
- ) from None
990
+ raise PipelineException(f"{job}: JobException: {err}") from None
991
+
940
992
  return Result(
941
993
  status=j_rs.status,
942
994
  context={job: job_obj.set_outputs(j_rs.context)},
@@ -970,12 +1022,16 @@ class Pipeline(BaseModel):
970
1022
  ... ${job-name}.stages.${stage-id}.outputs.${key}
971
1023
 
972
1024
  """
973
- logging.info(f"({self.run_id}) [CORE]: Start Execute: {self.name} ...")
1025
+ logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
974
1026
  params: DictData = params or {}
1027
+ ts: float = time.monotonic()
975
1028
 
976
1029
  # NOTE: It should not do anything if it does not have job.
977
1030
  if not self.jobs:
978
- logging.warning("[PIPELINE]: This pipeline does not have any jobs")
1031
+ logger.warning(
1032
+ f"({self.run_id}) [PIPELINE]: This pipeline: {self.name!r} "
1033
+ f"does not have any jobs"
1034
+ )
979
1035
  return Result(status=0, context=params)
980
1036
 
981
1037
  # NOTE: Create a job queue that keep the job that want to running after
@@ -984,79 +1040,70 @@ class Pipeline(BaseModel):
984
1040
  for job_id in self.jobs:
985
1041
  jq.put(job_id)
986
1042
 
987
- # NOTE: Create start timestamp
988
- ts: float = time.monotonic()
989
-
990
1043
  # NOTE: Create result context that will pass this context to any
991
1044
  # execution dependency.
992
- rs: Result = Result(context=self.parameterize(params))
1045
+ context: DictData = self.parameterize(params)
993
1046
  try:
994
- rs.receive(
995
- self.__exec_non_threading(rs, ts, timeout=timeout)
996
- if (
997
- worker := int(
998
- os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2")
999
- )
1000
- )
1001
- == 1
1047
+ worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
1048
+ (
1049
+ self.__exec_non_threading(context, ts, jq, timeout=timeout)
1050
+ if worker == 1
1002
1051
  else self.__exec_threading(
1003
- rs, ts, worker=worker, timeout=timeout
1052
+ context, ts, jq, worker=worker, timeout=timeout
1004
1053
  )
1005
1054
  )
1006
- return rs
1055
+ return Result(status=0, context=context)
1007
1056
  except PipelineException as err:
1008
- rs.context.update({"error": {"message": str(err)}})
1009
- rs.status = 1
1010
- return rs
1057
+ context.update(
1058
+ {"error_message": f"{err.__class__.__name__}: {err}"}
1059
+ )
1060
+ return Result(status=1, context=context)
1011
1061
 
1012
1062
  def __exec_threading(
1013
1063
  self,
1014
- rs: Result,
1064
+ context: DictData,
1015
1065
  ts: float,
1066
+ job_queue: Queue,
1016
1067
  *,
1017
1068
  worker: int = 2,
1018
1069
  timeout: int = 600,
1019
- ) -> Result:
1070
+ ) -> DictData:
1020
1071
  """Pipeline threading execution.
1021
1072
 
1022
- :param rs:
1023
- :param ts:
1073
+ :param context: A context pipeline data that want to downstream passing.
1074
+ :param ts: A start timestamp that use for checking execute time should
1075
+ timeout.
1024
1076
  :param timeout: A second value unit that bounding running time.
1025
1077
  :param worker: A number of threading executor pool size.
1026
- :rtype: Result
1078
+ :rtype: DictData
1027
1079
  """
1028
1080
  not_time_out_flag: bool = True
1029
- logging.debug(
1081
+ logger.debug(
1030
1082
  f"({self.run_id}): [CORE]: Run {self.name} with threading job "
1031
1083
  f"executor"
1032
1084
  )
1033
1085
 
1034
- # NOTE: Create a job queue that keep the job that want to running after
1035
- # it dependency condition.
1036
- job_queue: Queue = Queue()
1037
- for job_id in self.jobs:
1038
- job_queue.put(job_id)
1039
-
1040
1086
  # IMPORTANT: The job execution can run parallel and waiting by
1041
1087
  # needed.
1042
1088
  with ThreadPoolExecutor(max_workers=worker) as executor:
1043
1089
  futures: list[Future] = []
1090
+
1044
1091
  while not job_queue.empty() and (
1045
1092
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
1046
1093
  ):
1047
1094
  job_id: str = job_queue.get()
1048
1095
  job: Job = self.jobs[job_id]
1049
1096
 
1050
- if any(need not in rs.context["jobs"] for need in job.needs):
1097
+ if any(need not in context["jobs"] for need in job.needs):
1051
1098
  job_queue.put(job_id)
1052
- time.sleep(0.5)
1099
+ time.sleep(0.25)
1053
1100
  continue
1054
1101
 
1055
1102
  futures.append(
1056
1103
  executor.submit(
1057
- self.job_execute,
1104
+ self.execute_job,
1058
1105
  job_id,
1059
- params=copy.deepcopy(rs.context),
1106
+ params=copy.deepcopy(context),
1060
1107
  ),
1061
1108
  )
1062
1109
  job_queue.task_done()
@@ -1066,19 +1113,19 @@ class Pipeline(BaseModel):
1066
1113
 
1067
1114
  for future in as_completed(futures):
1068
1115
  if err := future.exception():
1069
- logging.error(f"{err}")
1116
+ logger.error(f"{err}")
1070
1117
  raise PipelineException(f"{err}")
1071
1118
 
1072
1119
  # NOTE: Update job result to pipeline result.
1073
- rs.receive_jobs(future.result(timeout=20))
1120
+ context["jobs"].update(future.result(timeout=20).conext)
1074
1121
 
1075
1122
  if not_time_out_flag:
1076
- rs.status = 0
1077
- return rs
1123
+ return context
1078
1124
 
1079
1125
  # NOTE: Raise timeout error.
1080
- logging.warning(
1081
- f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1126
+ logger.warning(
1127
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline, {self.name!r} "
1128
+ f", was timeout"
1082
1129
  )
1083
1130
  raise PipelineException(
1084
1131
  f"Execution of pipeline: {self.name} was timeout"
@@ -1086,28 +1133,26 @@ class Pipeline(BaseModel):
1086
1133
 
1087
1134
  def __exec_non_threading(
1088
1135
  self,
1089
- rs: Result,
1136
+ context: DictData,
1090
1137
  ts: float,
1138
+ job_queue: Queue,
1091
1139
  *,
1092
1140
  timeout: int = 600,
1093
- ) -> Result:
1094
- """Pipeline non-threading execution.
1141
+ ) -> DictData:
1142
+ """Pipeline non-threading execution that use sequential job running
1143
+ and waiting previous run successful.
1095
1144
 
1096
- :param rs:
1097
- :param ts:
1145
+ :param context: A context pipeline data that want to downstream passing.
1146
+ :param ts: A start timestamp that use for checking execute time should
1147
+ timeout.
1098
1148
  :param timeout: A second value unit that bounding running time.
1099
- :rtype: Result
1149
+ :rtype: DictData
1100
1150
  """
1101
1151
  not_time_out_flag: bool = True
1102
- logging.debug(
1152
+ logger.debug(
1103
1153
  f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
1104
1154
  f"executor"
1105
1155
  )
1106
- # NOTE: Create a job queue that keep the job that want to running after
1107
- # it dependency condition.
1108
- job_queue: Queue = Queue()
1109
- for job_id in self.jobs:
1110
- job_queue.put(job_id)
1111
1156
 
1112
1157
  while not job_queue.empty() and (
1113
1158
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
@@ -1116,25 +1161,24 @@ class Pipeline(BaseModel):
1116
1161
  job: Job = self.jobs[job_id]
1117
1162
 
1118
1163
  # NOTE:
1119
- if any(need not in rs.context["jobs"] for need in job.needs):
1164
+ if any(need not in context["jobs"] for need in job.needs):
1120
1165
  job_queue.put(job_id)
1121
- time.sleep(0.5)
1166
+ time.sleep(0.25)
1122
1167
  continue
1123
1168
 
1124
1169
  # NOTE: Start job execution.
1125
- job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
1126
- rs.context["jobs"].update(job_rs.context)
1170
+ job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
1171
+ context["jobs"].update(job_rs.context)
1127
1172
  job_queue.task_done()
1128
1173
 
1129
1174
  # NOTE: Wait for all items to finish processing
1130
1175
  job_queue.join()
1131
1176
 
1132
1177
  if not_time_out_flag:
1133
- rs.status = 0
1134
- return rs
1178
+ return context
1135
1179
 
1136
1180
  # NOTE: Raise timeout error.
1137
- logging.warning(
1181
+ logger.warning(
1138
1182
  f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1139
1183
  )
1140
1184
  raise PipelineException(