ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,23 +12,32 @@ import time
12
12
  from concurrent.futures import (
13
13
  FIRST_EXCEPTION,
14
14
  Future,
15
- ProcessPoolExecutor,
16
15
  ThreadPoolExecutor,
17
16
  as_completed,
18
17
  wait,
19
18
  )
20
- from datetime import datetime
21
- from multiprocessing import Event, Manager
19
+ from datetime import datetime, timedelta
20
+ from heapq import heappush
22
21
  from pickle import PickleError
23
22
  from queue import Queue
23
+ from textwrap import dedent
24
+ from threading import Event
24
25
  from typing import Optional
25
26
  from zoneinfo import ZoneInfo
26
27
 
27
28
  from pydantic import BaseModel, Field
28
- from pydantic.functional_validators import model_validator
29
+ from pydantic.functional_validators import field_validator, model_validator
29
30
  from typing_extensions import Self
30
31
 
31
- from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
32
+ from .__types import (
33
+ DictData,
34
+ DictStr,
35
+ Matrix,
36
+ MatrixExclude,
37
+ MatrixInclude,
38
+ TupleStr,
39
+ )
40
+ from .cron import CronRunner
32
41
  from .exceptions import (
33
42
  JobException,
34
43
  PipelineException,
@@ -36,17 +45,26 @@ from .exceptions import (
36
45
  UtilException,
37
46
  )
38
47
  from .loader import Loader
48
+ from .log import FileLog, Log
39
49
  from .on import On
40
- from .scheduler import CronRunner
41
50
  from .stage import Stage
42
51
  from .utils import (
43
52
  Param,
44
53
  Result,
45
54
  cross_product,
46
55
  dash2underscore,
56
+ delay,
47
57
  filter_func,
48
58
  gen_id,
49
59
  get_diff_sec,
60
+ has_template,
61
+ param2template,
62
+ )
63
+
64
+ __all__: TupleStr = (
65
+ "Strategy",
66
+ "Job",
67
+ "Pipeline",
50
68
  )
51
69
 
52
70
 
@@ -166,9 +184,15 @@ class Job(BaseModel):
166
184
  ... }
167
185
  """
168
186
 
169
- id: Optional[str] = Field(default=None)
170
- desc: Optional[str] = Field(default=None)
171
- runs_on: Optional[str] = Field(default=None)
187
+ id: Optional[str] = Field(default=None, description="A job ID.")
188
+ desc: Optional[str] = Field(
189
+ default=None,
190
+ description="A job description that can be string of markdown content.",
191
+ )
192
+ runs_on: Optional[str] = Field(
193
+ default=None,
194
+ description="A target executor node for this job use to execution.",
195
+ )
172
196
  stages: list[Stage] = Field(
173
197
  default_factory=list,
174
198
  description="A list of Stage of this job.",
@@ -182,7 +206,9 @@ class Job(BaseModel):
182
206
  description="A strategy matrix that want to generate.",
183
207
  )
184
208
  run_id: Optional[str] = Field(
185
- default=None, description="A running job ID.", repr=False
209
+ default=None,
210
+ description="A running job ID.",
211
+ repr=False,
186
212
  )
187
213
 
188
214
  @model_validator(mode="before")
@@ -193,12 +219,31 @@ class Job(BaseModel):
193
219
  dash2underscore("runs-on", values)
194
220
  return values
195
221
 
222
+ @field_validator("desc", mode="after")
223
+ def ___prepare_desc(cls, value: str) -> str:
224
+ """Prepare description string that was created on a template."""
225
+ return dedent(value)
226
+
196
227
  @model_validator(mode="after")
197
228
  def __prepare_running_id(self):
198
229
  if self.run_id is None:
199
230
  self.run_id = gen_id(self.id or "", unique=True)
231
+
232
+ # VALIDATE: Validate job id should not dynamic with params template.
233
+ if has_template(self.id):
234
+ raise ValueError("Job ID should not has any template.")
235
+
200
236
  return self
201
237
 
238
+ def get_running_id(self, run_id: str) -> Self:
239
+ """Return Job model object that changing job running ID with an
240
+ input running ID.
241
+
242
+ :param run_id: A replace job running ID.
243
+ :rtype: Self
244
+ """
245
+ return self.model_copy(update={"run_id": run_id})
246
+
202
247
  def stage(self, stage_id: str) -> Stage:
203
248
  """Return stage model that match with an input stage ID."""
204
249
  for stage in self.stages:
@@ -209,7 +254,6 @@ class Job(BaseModel):
209
254
  def set_outputs(self, output: DictData) -> DictData:
210
255
  if len(output) > 1 and self.strategy.is_set():
211
256
  return {"strategies": output}
212
-
213
257
  return output[next(iter(output))]
214
258
 
215
259
  def strategy_execute(
@@ -262,7 +306,7 @@ class Job(BaseModel):
262
306
  for stage in self.stages:
263
307
 
264
308
  # IMPORTANT: Change any stage running IDs to this job running ID.
265
- stage.run_id = self.run_id
309
+ stage: Stage = stage.get_running_id(self.run_id)
266
310
 
267
311
  _st_name: str = stage.id or stage.name
268
312
 
@@ -303,7 +347,11 @@ class Job(BaseModel):
303
347
  context={
304
348
  gen_id(strategy): {
305
349
  "matrix": strategy,
306
- "stages": filter_func(context.pop("stages", {})),
350
+ # NOTE: If job strategy executor use multithreading,
351
+ # it will not filter function object from context.
352
+ # ---
353
+ # "stages": filter_func(context.pop("stages", {})),
354
+ "stages": context.pop("stages", {}),
307
355
  "error": {
308
356
  "message": (
309
357
  "Process Event stopped before execution"
@@ -314,7 +362,7 @@ class Job(BaseModel):
314
362
  )
315
363
  try:
316
364
  rs: Result = stage.execute(params=context)
317
- stage.set_outputs(rs.context, params=context)
365
+ stage.set_outputs(rs.context, to=context)
318
366
  except (StageException, UtilException) as err:
319
367
  logging.error(
320
368
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
@@ -323,6 +371,11 @@ class Job(BaseModel):
323
371
  f"Get stage execution error: {err.__class__.__name__}: "
324
372
  f"{err}"
325
373
  ) from None
374
+
375
+ # NOTE: Remove new stage object that was created from
376
+ # ``get_running_id`` method.
377
+ del stage
378
+
326
379
  return Result(
327
380
  status=0,
328
381
  context={
@@ -359,53 +412,74 @@ class Job(BaseModel):
359
412
  context=strategy_context,
360
413
  )
361
414
 
362
- # WARNING: (WF001) I got error that raise when use
363
- # ``ProcessPoolExecutor``;
364
- # ---
365
- # _pickle.PicklingError: Can't pickle
366
- # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
367
- # on ddeutil.workflow.stage failed
415
+ # # WARNING: (WF001) I got error that raise when use
416
+ # # ``ProcessPoolExecutor``;
417
+ # # ---
418
+ # # _pickle.PicklingError: Can't pickle
419
+ # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
420
+ # # on ddeutil.workflow.stage failed
421
+ # #
422
+ # # from multiprocessing import Event, Manager
423
+ # with Manager() as manager:
424
+ # event: Event = manager.Event()
368
425
  #
369
- with Manager() as manager:
370
- event: Event = manager.Event()
371
-
372
- # NOTE: Start process pool executor for running strategy executor in
373
- # parallel mode.
374
- with ProcessPoolExecutor(
375
- max_workers=self.strategy.max_parallel
376
- ) as executor:
377
- features: list[Future] = [
378
- executor.submit(
379
- self.strategy_execute,
380
- strategy,
381
- params=copy.deepcopy(params),
382
- event=event,
383
- )
384
- for strategy in self.strategy.make()
385
- ]
386
- if self.strategy.fail_fast:
387
- rs = self.__catch_fail_fast(event, features)
388
- else:
389
- rs = self.__catch_all_completed(features)
426
+ # # NOTE: Start process pool executor for running strategy executor
427
+ # # in parallel mode.
428
+ # with ProcessPoolExecutor(
429
+ # max_workers=self.strategy.max_parallel
430
+ # ) as executor:
431
+ # futures: list[Future] = [
432
+ # executor.submit(
433
+ # self.strategy_execute,
434
+ # strategy,
435
+ # params=copy.deepcopy(params),
436
+ # event=event,
437
+ # )
438
+ # for strategy in self.strategy.make()
439
+ # ]
440
+ # if self.strategy.fail_fast:
441
+ # rs = self.__catch_fail_fast(event, futures)
442
+ # else:
443
+ # rs = self.__catch_all_completed(futures)
444
+
445
+ # NOTE: Create event for cancel executor stop running.
446
+ event: Event = Event()
447
+
448
+ with ThreadPoolExecutor(
449
+ max_workers=self.strategy.max_parallel
450
+ ) as executor:
451
+ futures: list[Future] = [
452
+ executor.submit(
453
+ self.strategy_execute,
454
+ strategy,
455
+ params=copy.deepcopy(params),
456
+ event=event,
457
+ )
458
+ for strategy in self.strategy.make()
459
+ ]
460
+ if self.strategy.fail_fast:
461
+ rs: Result = self.__catch_fail_fast(event, futures)
462
+ else:
463
+ rs: Result = self.__catch_all_completed(futures)
390
464
  return Result(
391
465
  status=0,
392
466
  context=rs.context,
393
467
  )
394
468
 
395
- def __catch_fail_fast(self, event: Event, features: list[Future]) -> Result:
396
- """Job parallel pool features catching with fail-fast mode. That will
397
- stop all not done features if it receive the first exception from all
398
- running features.
469
+ def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
470
+ """Job parallel pool futures catching with fail-fast mode. That will
471
+ stop all not done futures if it receive the first exception from all
472
+ running futures.
399
473
 
400
474
  :param event:
401
- :param features: A list of features.
475
+ :param futures: A list of futures.
402
476
  :rtype: Result
403
477
  """
404
478
  strategy_context: DictData = {}
405
479
  # NOTE: Get results from a collection of tasks with a
406
480
  # timeout that has the first exception.
407
481
  done, not_done = wait(
408
- features, timeout=1800, return_when=FIRST_EXCEPTION
482
+ futures, timeout=1800, return_when=FIRST_EXCEPTION
409
483
  )
410
484
  nd: str = (
411
485
  f", the strategies do not run is {not_done}" if not_done else ""
@@ -416,37 +490,38 @@ class Job(BaseModel):
416
490
  event.set()
417
491
 
418
492
  # NOTE: Cancel any scheduled tasks
419
- for future in features:
493
+ for future in futures:
420
494
  future.cancel()
421
495
 
422
496
  status: int = 0
423
- for f in done:
424
- if f.exception():
497
+ for future in done:
498
+ if future.exception():
425
499
  status = 1
426
500
  logging.error(
427
501
  f"({self.run_id}) [JOB]: One stage failed with: "
428
- f"{f.exception()}, shutting down this feature."
502
+ f"{future.exception()}, shutting down this future."
429
503
  )
430
- elif f.cancelled():
504
+ elif future.cancelled():
431
505
  continue
432
506
  else:
433
- rs: Result = f.result(timeout=60)
507
+ rs: Result = future.result(timeout=60)
434
508
  strategy_context.update(rs.context)
435
509
  return Result(
436
510
  status=status,
437
511
  context=strategy_context,
438
512
  )
439
513
 
440
- def __catch_all_completed(self, features: list[Future]) -> Result:
441
- """Job parallel pool features catching with all-completed mode.
514
+ def __catch_all_completed(self, futures: list[Future]) -> Result:
515
+ """Job parallel pool futures catching with all-completed mode.
442
516
 
443
- :param features: A list of features.
517
+ :param futures: A list of futures.
518
+ :rtype: Result
444
519
  """
445
520
  strategy_context: DictData = {}
446
521
  status: int = 0
447
- for feature in as_completed(features):
522
+ for future in as_completed(futures):
448
523
  try:
449
- rs: Result = feature.result(timeout=60)
524
+ rs: Result = future.result(timeout=60)
450
525
  strategy_context.update(rs.context)
451
526
  except PickleError as err:
452
527
  # NOTE: (WF001) I do not want to fix this issue because
@@ -459,8 +534,8 @@ class Job(BaseModel):
459
534
  except TimeoutError:
460
535
  status = 1
461
536
  logging.warning("Task is hanging. Attempting to kill.")
462
- feature.cancel()
463
- if not feature.cancelled():
537
+ future.cancel()
538
+ if not future.cancelled():
464
539
  logging.warning("Failed to cancel the task.")
465
540
  else:
466
541
  logging.warning("Task canceled successfully.")
@@ -475,7 +550,7 @@ class Job(BaseModel):
475
550
 
476
551
 
477
552
  class Pipeline(BaseModel):
478
- """Pipeline Model this is the main feature of this project because it use to
553
+ """Pipeline Model this is the main future of this project because it use to
479
554
  be workflow data for running everywhere that you want. It use lightweight
480
555
  coding line to execute it.
481
556
  """
@@ -484,8 +559,7 @@ class Pipeline(BaseModel):
484
559
  desc: Optional[str] = Field(
485
560
  default=None,
486
561
  description=(
487
- "A pipeline description that is able to be string of markdown "
488
- "content."
562
+ "A pipeline description that can be string of markdown content."
489
563
  ),
490
564
  )
491
565
  params: dict[str, Param] = Field(
@@ -501,20 +575,30 @@ class Pipeline(BaseModel):
501
575
  description="A mapping of job ID and job model that already loaded.",
502
576
  )
503
577
  run_id: Optional[str] = Field(
504
- default=None, description="A running job ID.", repr=False
578
+ default=None,
579
+ description="A running pipeline ID.",
580
+ repr=False,
505
581
  )
506
582
 
583
+ @property
584
+ def new_run_id(self) -> str:
585
+ """Running ID of this pipeline that always generate new unique value."""
586
+ return gen_id(self.name, unique=True)
587
+
507
588
  @classmethod
508
589
  def from_loader(
509
590
  cls,
510
591
  name: str,
511
592
  externals: DictData | None = None,
512
593
  ) -> Self:
513
- """Create Pipeline instance from the Loader object.
594
+ """Create Pipeline instance from the Loader object that only receive
595
+ an input pipeline name. The loader object will use this pipeline name to
596
+ searching configuration data of this pipeline model in conf path.
514
597
 
515
598
  :param name: A pipeline name that want to pass to Loader object.
516
599
  :param externals: An external parameters that want to pass to Loader
517
600
  object.
601
+ :rtype: Self
518
602
  """
519
603
  loader: Loader = Loader(name, externals=(externals or {}))
520
604
  loader_data: DictData = copy.deepcopy(loader.data)
@@ -537,6 +621,8 @@ class Pipeline(BaseModel):
537
621
  on = [on]
538
622
  if any(not isinstance(i, (dict, str)) for i in on):
539
623
  raise TypeError("The ``on`` key should be list of str or dict")
624
+
625
+ # NOTE: Pass on value to Loader and keep on model object to on field
540
626
  data["on"] = [
541
627
  (
542
628
  Loader(n, externals=(externals or {})).data
@@ -562,25 +648,48 @@ class Pipeline(BaseModel):
562
648
  }
563
649
  return values
564
650
 
651
+ @field_validator("desc", mode="after")
652
+ def ___prepare_desc(cls, value: str) -> str:
653
+ """Prepare description string that was created on a template."""
654
+ return dedent(value)
655
+
565
656
  @model_validator(mode="after")
566
657
  def __validate_jobs_need_and_prepare_running_id(self):
658
+ """Validate each need job in any jobs should exists."""
567
659
  for job in self.jobs:
568
660
  if not_exist := [
569
661
  need for need in self.jobs[job].needs if need not in self.jobs
570
662
  ]:
571
663
  raise PipelineException(
572
664
  f"This needed jobs: {not_exist} do not exist in this "
573
- f"pipeline."
665
+ f"pipeline, {self.name!r}"
574
666
  )
575
667
 
576
668
  # NOTE: update a job id with its job id from pipeline template
577
669
  self.jobs[job].id = job
578
670
 
579
671
  if self.run_id is None:
580
- self.run_id = gen_id(self.name, unique=True)
672
+ self.run_id = self.new_run_id
673
+
674
+ # VALIDATE: Validate pipeline name should not dynamic with params
675
+ # template.
676
+ if has_template(self.name):
677
+ raise ValueError(
678
+ f"Pipeline name should not has any template, please check, "
679
+ f"{self.name!r}."
680
+ )
581
681
 
582
682
  return self
583
683
 
684
+ def get_running_id(self, run_id: str) -> Self:
685
+ """Return Pipeline model object that changing pipeline running ID with
686
+ an input running ID.
687
+
688
+ :param run_id: A replace pipeline running ID.
689
+ :rtype: Self
690
+ """
691
+ return self.model_copy(update={"run_id": run_id})
692
+
584
693
  def job(self, name: str) -> Job:
585
694
  """Return Job model that exists on this pipeline.
586
695
 
@@ -591,7 +700,10 @@ class Pipeline(BaseModel):
591
700
  :returns: A job model that exists on this pipeline by input name.
592
701
  """
593
702
  if name not in self.jobs:
594
- raise ValueError(f"Job {name!r} does not exists")
703
+ raise ValueError(
704
+ f"A Job {name!r} does not exists in this pipeline, "
705
+ f"{self.name!r}"
706
+ )
595
707
  return self.jobs[name]
596
708
 
597
709
  def parameterize(self, params: DictData) -> DictData:
@@ -629,52 +741,146 @@ class Pipeline(BaseModel):
629
741
  def release(
630
742
  self,
631
743
  on: On,
632
- params: DictData | None = None,
744
+ params: DictData,
633
745
  *,
634
- waiting_sec: int = 600,
635
- sleep_interval: int = 10,
636
- ) -> str:
746
+ waiting_sec: int = 55,
747
+ sleep_interval: int = 15,
748
+ log: Log = None,
749
+ lq: list[datetime] = None,
750
+ ) -> Result:
637
751
  """Start running pipeline with the on schedule in period of 30 minutes.
638
752
  That mean it will still running at background 30 minutes until the
639
753
  schedule matching with its time.
754
+
755
+ This method allow pipeline use log object to save the execution
756
+ result to log destination like file log to local /logs directory.
757
+
758
+ :rtype: Result
640
759
  """
641
- params: DictData = params or {}
642
- logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
760
+ delay()
761
+ log: Log = log or FileLog
762
+ current_running_time = datetime.now()
763
+ if not (
764
+ latest_running_time := log.latest_point(name=self.name, queue=lq)
765
+ ) or (
766
+ latest_running_time.replace(tzinfo=ZoneInfo(on.tz))
767
+ < current_running_time.replace(tzinfo=ZoneInfo(on.tz))
768
+ ):
769
+ latest_running_time: datetime = current_running_time.replace(
770
+ tzinfo=ZoneInfo(on.tz)
771
+ )
772
+ else:
773
+ latest_running_time: datetime = latest_running_time.replace(
774
+ tzinfo=ZoneInfo(on.tz)
775
+ )
643
776
 
644
- gen: CronRunner = on.generate(datetime.now())
777
+ gen: CronRunner = on.generate(
778
+ latest_running_time + timedelta(seconds=1)
779
+ )
645
780
  tz: ZoneInfo = gen.tz
781
+
782
+ # NOTE: get next schedule time that generate from now.
646
783
  next_running_time: datetime = gen.next
647
784
 
648
- if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
785
+ # NOTE: get next utils it does not logging.
786
+ # while log.is_pointed(self.name, next_running_time, queue=lq):
787
+ # next_running_time: datetime = gen.next
788
+ while log.is_pointed(self.name, next_running_time, queue=lq):
789
+ next_running_time: datetime = gen.next
790
+
791
+ heappush(lq, next_running_time)
792
+
793
+ # VALIDATE: Check the different time between the next schedule time and
794
+ # now that less than waiting period (second unit).
795
+ if get_diff_sec(next_running_time, tz=tz) <= waiting_sec:
649
796
  logging.debug(
650
- f"[CORE]: {self.name} closely to run >> "
651
- f"{next_running_time:%Y-%m-%d %H:%M:%S}"
797
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
798
+ f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
652
799
  )
653
800
 
654
801
  # NOTE: Release when the time is nearly to schedule time.
655
- while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
656
- time.sleep(sleep_interval)
802
+ while (duration := get_diff_sec(next_running_time, tz=tz)) > (
803
+ sleep_interval + 5
804
+ ):
657
805
  logging.debug(
658
- f"[CORE]: {self.name!r} : Sleep until: {duration}"
806
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
807
+ f"Sleep until: {duration}"
659
808
  )
809
+ time.sleep(sleep_interval)
660
810
 
661
- time.sleep(1)
662
- rs: Result = self.execute(params=params)
663
- logging.debug(f"{rs.context}")
811
+ time.sleep(0.5)
664
812
 
665
- return f"[CORE]: Start Execute: {self.name}"
666
- return f"[CORE]: {self.name} does not closely to run yet."
813
+ # NOTE: Release parameter that use to change if params has
814
+ # templating.
815
+ release_params: DictData = {
816
+ "release": {
817
+ "logical_date": next_running_time,
818
+ },
819
+ }
820
+
821
+ # WARNING: Re-create pipeline object that use new running pipeline
822
+ # ID.
823
+ pipeline: Self = self.get_running_id(run_id=self.new_run_id)
824
+ rs: Result = pipeline.execute(
825
+ params=param2template(params, release_params),
826
+ )
827
+ logging.debug(
828
+ f"({pipeline.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
829
+ f"End release"
830
+ )
667
831
 
668
- def poke(self, params: DictData | None = None):
669
- """Poke pipeline threading task for executing with its schedules that
670
- was set on the `on`.
832
+ del pipeline
833
+
834
+ rs.set_parent_run_id(self.run_id)
835
+ rs_log: Log = log.model_validate(
836
+ {
837
+ "name": self.name,
838
+ "on": str(on.cronjob),
839
+ "release": next_running_time,
840
+ "context": rs.context,
841
+ "parent_run_id": rs.run_id,
842
+ "run_id": rs.run_id,
843
+ }
844
+ )
845
+ rs_log.save()
846
+ else:
847
+ logging.debug(
848
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
849
+ f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
850
+ )
851
+ rs = Result(status=0, context={"params": params})
852
+
853
+ if lq is None:
854
+ return rs
855
+
856
+ lq.remove(next_running_time)
857
+ time.sleep(0.25)
858
+ return rs
859
+
860
+ def poke(
861
+ self,
862
+ params: DictData | None = None,
863
+ *,
864
+ log: Log | None = None,
865
+ ) -> list[Result]:
866
+ """Poke pipeline with threading executor pool for executing with all its
867
+ schedules that was set on the `on` value. This method will observe its
868
+ schedule that nearing to run with the ``self.release()`` method.
869
+
870
+ :param params: A parameters that want to pass to the release method.
871
+ :param log: A log object that want to use on this poking process.
872
+ :rtype: list[Result]
671
873
  """
672
874
  params: DictData = params or {}
673
- logging.info(
674
- f"[CORE]: Start Poking: {self.name!r} :"
675
- f"{gen_id(self.name, unique=True)}"
676
- )
677
- results = []
875
+ logging.info(f"({self.run_id}) [CORE]: Start Poking: {self.name!r} ...")
876
+ results: list[Result] = []
877
+ log_queue: list[datetime] = []
878
+
879
+ # NOTE: If this pipeline does not set schedule, it will return empty
880
+ # result.
881
+ if len(self.on) == 0:
882
+ return results
883
+
678
884
  with ThreadPoolExecutor(
679
885
  max_workers=int(
680
886
  os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
@@ -685,13 +891,22 @@ class Pipeline(BaseModel):
685
891
  self.release,
686
892
  on,
687
893
  params=params,
894
+ log=log,
895
+ lq=log_queue,
688
896
  )
689
897
  for on in self.on
690
898
  ]
691
899
  for future in as_completed(futures):
692
- rs = future.result()
693
- logging.info(rs)
900
+ rs: Result = future.result()
901
+ logging.info(rs.context.get("params", {}))
694
902
  results.append(rs)
903
+
904
+ if len(log_queue) > 0:
905
+ logging.error(
906
+ f"({self.run_id}) [CORE]: Log Queue does empty when poke "
907
+ f"is finishing."
908
+ )
909
+
695
910
  return results
696
911
 
697
912
  def job_execute(
@@ -700,6 +915,7 @@ class Pipeline(BaseModel):
700
915
  params: DictData,
701
916
  ) -> Result:
702
917
  """Job Executor that use on pipeline executor.
918
+
703
919
  :param job: A job ID that want to execute.
704
920
  :param params: A params that was parameterized from pipeline execution.
705
921
  """
@@ -708,14 +924,17 @@ class Pipeline(BaseModel):
708
924
  raise PipelineException(
709
925
  f"The job ID: {job} does not exists on {self.name!r} pipeline."
710
926
  )
711
-
712
927
  try:
713
928
  logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
714
- job_obj: Job = self.jobs[job]
929
+
930
+ # IMPORTANT:
931
+ # Change any job running IDs to this pipeline running ID.
932
+ job_obj: Job = self.jobs[job].get_running_id(self.run_id)
715
933
  j_rs: Result = job_obj.execute(params=params)
934
+
716
935
  except JobException as err:
717
936
  raise PipelineException(
718
- f"The job ID: {job} get raise error: {err.__class__.__name__}:"
937
+ f"The job ID: {job} get error: {err.__class__.__name__}:"
719
938
  f"\n{err}"
720
939
  ) from None
721
940
  return Result(
@@ -738,9 +957,8 @@ class Pipeline(BaseModel):
738
957
  for limit time of execution and waiting job dependency.
739
958
  :rtype: Result
740
959
 
741
- ---
742
-
743
960
  See Also:
961
+ ---
744
962
 
745
963
  The result of execution process for each jobs and stages on this
746
964
  pipeline will keeping in dict which able to catch out with all jobs and
@@ -752,10 +970,7 @@ class Pipeline(BaseModel):
752
970
  ... ${job-name}.stages.${stage-id}.outputs.${key}
753
971
 
754
972
  """
755
- logging.info(
756
- f"[CORE]: Start Execute: {self.name}:"
757
- f"{gen_id(self.name, unique=True)}"
758
- )
973
+ logging.info(f"({self.run_id}) [CORE]: Start Execute: {self.name} ...")
759
974
  params: DictData = params or {}
760
975
 
761
976
  # NOTE: It should not do anything if it does not have job.
@@ -777,15 +992,15 @@ class Pipeline(BaseModel):
777
992
  rs: Result = Result(context=self.parameterize(params))
778
993
  try:
779
994
  rs.receive(
780
- self.__exec_non_threading(rs, jq, ts, timeout=timeout)
995
+ self.__exec_non_threading(rs, ts, timeout=timeout)
781
996
  if (
782
997
  worker := int(
783
- os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1")
998
+ os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2")
784
999
  )
785
1000
  )
786
1001
  == 1
787
1002
  else self.__exec_threading(
788
- rs, jq, ts, worker=worker, timeout=timeout
1003
+ rs, ts, worker=worker, timeout=timeout
789
1004
  )
790
1005
  )
791
1006
  return rs
@@ -797,14 +1012,30 @@ class Pipeline(BaseModel):
797
1012
  def __exec_threading(
798
1013
  self,
799
1014
  rs: Result,
800
- job_queue: Queue,
801
1015
  ts: float,
802
1016
  *,
803
- worker: int = 1,
1017
+ worker: int = 2,
804
1018
  timeout: int = 600,
805
1019
  ) -> Result:
806
- """Pipeline threading execution."""
1020
+ """Pipeline threading execution.
1021
+
1022
+ :param rs:
1023
+ :param ts:
1024
+ :param timeout: A second value unit that bounding running time.
1025
+ :param worker: A number of threading executor pool size.
1026
+ :rtype: Result
1027
+ """
807
1028
  not_time_out_flag: bool = True
1029
+ logging.debug(
1030
+ f"({self.run_id}): [CORE]: Run {self.name} with threading job "
1031
+ f"executor"
1032
+ )
1033
+
1034
+ # NOTE: Create a job queue that keep the job that want to running after
1035
+ # it dependency condition.
1036
+ job_queue: Queue = Queue()
1037
+ for job_id in self.jobs:
1038
+ job_queue.put(job_id)
808
1039
 
809
1040
  # IMPORTANT: The job execution can run parallel and waiting by
810
1041
  # needed.
@@ -816,10 +1047,6 @@ class Pipeline(BaseModel):
816
1047
  job_id: str = job_queue.get()
817
1048
  job: Job = self.jobs[job_id]
818
1049
 
819
- # IMPORTANT:
820
- # Change any job running IDs to this pipeline running ID.
821
- job.run_id = self.run_id
822
-
823
1050
  if any(need not in rs.context["jobs"] for need in job.needs):
824
1051
  job_queue.put(job_id)
825
1052
  time.sleep(0.5)
@@ -832,6 +1059,10 @@ class Pipeline(BaseModel):
832
1059
  params=copy.deepcopy(rs.context),
833
1060
  ),
834
1061
  )
1062
+ job_queue.task_done()
1063
+
1064
+ # NOTE: Wait for all items to finish processing
1065
+ job_queue.join()
835
1066
 
836
1067
  for future in as_completed(futures):
837
1068
  if err := future.exception():
@@ -841,37 +1072,49 @@ class Pipeline(BaseModel):
841
1072
  # NOTE: Update job result to pipeline result.
842
1073
  rs.receive_jobs(future.result(timeout=20))
843
1074
 
844
- if not not_time_out_flag:
845
- logging.warning(
846
- f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
847
- )
848
- raise PipelineException(
849
- f"Execution of pipeline: {self.name} was timeout"
850
- )
851
- rs.status = 0
852
- return rs
1075
+ if not_time_out_flag:
1076
+ rs.status = 0
1077
+ return rs
1078
+
1079
+ # NOTE: Raise timeout error.
1080
+ logging.warning(
1081
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1082
+ )
1083
+ raise PipelineException(
1084
+ f"Execution of pipeline: {self.name} was timeout"
1085
+ )
853
1086
 
854
1087
  def __exec_non_threading(
855
1088
  self,
856
1089
  rs: Result,
857
- job_queue: Queue,
858
1090
  ts: float,
859
1091
  *,
860
1092
  timeout: int = 600,
861
1093
  ) -> Result:
862
- """Pipeline non-threading execution."""
1094
+ """Pipeline non-threading execution.
1095
+
1096
+ :param rs:
1097
+ :param ts:
1098
+ :param timeout: A second value unit that bounding running time.
1099
+ :rtype: Result
1100
+ """
863
1101
  not_time_out_flag: bool = True
864
- logging.info(f"[CORE]: Run {self.name} with non-threading job executor")
1102
+ logging.debug(
1103
+ f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
1104
+ f"executor"
1105
+ )
1106
+ # NOTE: Create a job queue that keep the job that want to running after
1107
+ # it dependency condition.
1108
+ job_queue: Queue = Queue()
1109
+ for job_id in self.jobs:
1110
+ job_queue.put(job_id)
1111
+
865
1112
  while not job_queue.empty() and (
866
1113
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
867
1114
  ):
868
1115
  job_id: str = job_queue.get()
869
1116
  job: Job = self.jobs[job_id]
870
1117
 
871
- # IMPORTANT:
872
- # Change any job running IDs to this pipeline running ID.
873
- job.run_id = self.run_id
874
-
875
1118
  # NOTE:
876
1119
  if any(need not in rs.context["jobs"] for need in job.needs):
877
1120
  job_queue.put(job_id)
@@ -881,13 +1124,19 @@ class Pipeline(BaseModel):
881
1124
  # NOTE: Start job execution.
882
1125
  job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
883
1126
  rs.context["jobs"].update(job_rs.context)
1127
+ job_queue.task_done()
884
1128
 
885
- if not not_time_out_flag:
886
- logging.warning(
887
- f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
888
- )
889
- raise PipelineException(
890
- f"Execution of pipeline: {self.name} was timeout"
891
- )
892
- rs.status = 0
893
- return rs
1129
+ # NOTE: Wait for all items to finish processing
1130
+ job_queue.join()
1131
+
1132
+ if not_time_out_flag:
1133
+ rs.status = 0
1134
+ return rs
1135
+
1136
+ # NOTE: Raise timeout error.
1137
+ logging.warning(
1138
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1139
+ )
1140
+ raise PipelineException(
1141
+ f"Execution of pipeline: {self.name} was timeout"
1142
+ )