ddeutil-workflow 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,20 +7,47 @@ from __future__ import annotations
7
7
 
8
8
  import copy
9
9
  import logging
10
+ import os
10
11
  import time
12
+ from concurrent.futures import (
13
+ FIRST_EXCEPTION,
14
+ Future,
15
+ ProcessPoolExecutor,
16
+ ThreadPoolExecutor,
17
+ as_completed,
18
+ wait,
19
+ )
20
+ from datetime import datetime
21
+ from multiprocessing import Event, Manager
22
+ from pickle import PickleError
11
23
  from queue import Queue
12
24
  from typing import Optional
25
+ from zoneinfo import ZoneInfo
13
26
 
14
27
  from pydantic import BaseModel, Field
15
28
  from pydantic.functional_validators import model_validator
16
29
  from typing_extensions import Self
17
30
 
18
31
  from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
19
- from .exceptions import JobException, PipelineException
32
+ from .exceptions import (
33
+ JobException,
34
+ PipelineException,
35
+ StageException,
36
+ UtilException,
37
+ )
20
38
  from .loader import Loader
21
39
  from .on import On
40
+ from .scheduler import CronRunner
22
41
  from .stage import Stage
23
- from .utils import Param, Result, cross_product, dash2underscore, gen_id
42
+ from .utils import (
43
+ Param,
44
+ Result,
45
+ cross_product,
46
+ dash2underscore,
47
+ filter_func,
48
+ gen_id,
49
+ get_diff_sec,
50
+ )
24
51
 
25
52
 
26
53
  class Strategy(BaseModel):
@@ -29,9 +56,11 @@ class Strategy(BaseModel):
29
56
 
30
57
  Data Validate:
31
58
  >>> strategy = {
59
+ ... 'max-parallel': 1,
60
+ ... 'fail-fast': False,
32
61
  ... 'matrix': {
33
62
  ... 'first': [1, 2, 3],
34
- ... 'second': ['foo', 'bar']
63
+ ... 'second': ['foo', 'bar'],
35
64
  ... },
36
65
  ... 'include': [{'first': 4, 'second': 'foo'}],
37
66
  ... 'exclude': [{'first': 1, 'second': 'bar'}],
@@ -39,7 +68,7 @@ class Strategy(BaseModel):
39
68
  """
40
69
 
41
70
  fail_fast: bool = Field(default=False)
42
- max_parallel: int = Field(default=-1)
71
+ max_parallel: int = Field(default=1, gt=0)
43
72
  matrix: Matrix = Field(default_factory=dict)
44
73
  include: MatrixInclude = Field(
45
74
  default_factory=list,
@@ -59,6 +88,10 @@ class Strategy(BaseModel):
59
88
  dash2underscore("fail-fast", values)
60
89
  return values
61
90
 
91
+ def is_set(self) -> bool:
92
+ """Return True if this strategy was set from yaml template."""
93
+ return len(self.matrix) > 0
94
+
62
95
  def make(self) -> list[DictStr]:
63
96
  """Return List of product of matrix values that already filter with
64
97
  exclude and add include.
@@ -115,18 +148,25 @@ class Job(BaseModel):
115
148
  Data Validate:
116
149
  >>> job = {
117
150
  ... "runs-on": None,
118
- ... "strategy": {},
151
+ ... "strategy": {
152
+ ... "max-parallel": 1,
153
+ ... "matrix": {
154
+ ... "first": [1, 2, 3],
155
+ ... "second": ['foo', 'bar'],
156
+ ... },
157
+ ... },
119
158
  ... "needs": [],
120
159
  ... "stages": [
121
160
  ... {
122
161
  ... "name": "Some stage",
123
162
  ... "run": "print('Hello World')",
124
163
  ... },
164
+ ... ...
125
165
  ... ],
126
166
  ... }
127
167
  """
128
168
 
129
- name: Optional[str] = Field(default=None)
169
+ id: Optional[str] = Field(default=None)
130
170
  desc: Optional[str] = Field(default=None)
131
171
  runs_on: Optional[str] = Field(default=None)
132
172
  stages: list[Stage] = Field(
@@ -141,6 +181,9 @@ class Job(BaseModel):
141
181
  default_factory=Strategy,
142
182
  description="A strategy matrix that want to generate.",
143
183
  )
184
+ run_id: Optional[str] = Field(
185
+ default=None, description="A running job ID.", repr=False
186
+ )
144
187
 
145
188
  @model_validator(mode="before")
146
189
  def __prepare_keys(cls, values: DictData) -> DictData:
@@ -150,6 +193,12 @@ class Job(BaseModel):
150
193
  dash2underscore("runs-on", values)
151
194
  return values
152
195
 
196
+ @model_validator(mode="after")
197
+ def __prepare_running_id(self):
198
+ if self.run_id is None:
199
+ self.run_id = gen_id(self.id or "", unique=True)
200
+ return self
201
+
153
202
  def stage(self, stage_id: str) -> Stage:
154
203
  """Return stage model that match with an input stage ID."""
155
204
  for stage in self.stages:
@@ -157,40 +206,133 @@ class Job(BaseModel):
157
206
  return stage
158
207
  raise ValueError(f"Stage ID {stage_id} does not exists")
159
208
 
160
- @staticmethod
161
- def set_outputs(output: DictData) -> DictData:
162
- if len(output) > 1:
209
+ def set_outputs(self, output: DictData) -> DictData:
210
+ if len(output) > 1 and self.strategy.is_set():
163
211
  return {"strategies": output}
164
212
 
165
213
  return output[next(iter(output))]
166
214
 
167
- def strategy_execute(self, strategy: DictData, params: DictData) -> Result:
168
- context: DictData = {}
169
- context.update(params)
215
+ def strategy_execute(
216
+ self,
217
+ strategy: DictData,
218
+ params: DictData,
219
+ *,
220
+ event: Event | None = None,
221
+ ) -> Result:
222
+ """Job Strategy execution with passing dynamic parameters from the
223
+ pipeline execution to strategy matrix.
224
+
225
+ This execution is the minimum level execution of job model.
226
+
227
+ :param strategy: A metrix strategy value.
228
+ :param params: A dynamic parameters.
229
+ :param event: An manger event that pass to the PoolThreadExecutor.
230
+ :rtype: Result
231
+
232
+ :raise JobException: If it has any error from StageException or
233
+ UtilException.
234
+ """
235
+ if event and event.is_set():
236
+ return Result(
237
+ status=1,
238
+ context={
239
+ gen_id(strategy): {
240
+ "matrix": strategy,
241
+ "stages": {},
242
+ "error": {
243
+ "message": "Process Event stopped before execution"
244
+ },
245
+ },
246
+ },
247
+ )
248
+
249
+ # NOTE: Create strategy execution context and update a matrix and copied
250
+ # of params. So, the context value will have structure like;
251
+ # ---
252
+ # {
253
+ # "params": { ... }, <== Current input params
254
+ # "jobs": { ... }, <== Current input params
255
+ # "matrix": { ... } <== Current strategy value
256
+ # }
257
+ #
258
+ context: DictData = params
170
259
  context.update({"matrix": strategy})
171
260
 
261
+ # IMPORTANT: The stage execution only run sequentially one-by-one.
172
262
  for stage in self.stages:
263
+
264
+ # IMPORTANT: Change any stage running IDs to this job running ID.
265
+ stage.run_id = self.run_id
266
+
173
267
  _st_name: str = stage.id or stage.name
174
268
 
175
- if stage.is_skip(params=context):
176
- logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
269
+ if stage.is_skipped(params=context):
270
+ logging.info(
271
+ f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
272
+ )
177
273
  continue
178
- logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
179
274
 
180
- rs: Result = stage.execute(params=context)
181
- if rs.status == 0:
275
+ logging.info(
276
+ f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
277
+ )
278
+
279
+ # NOTE: Logging a matrix that pass on this stage execution.
280
+ if strategy:
281
+ logging.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
282
+
283
+ # NOTE:
284
+ # I do not use below syntax because `params` dict be the
285
+ # reference memory pointer and it was changed when I action
286
+ # anything like update or re-construct this.
287
+ #
288
+ # ... params |= stage.execute(params=params)
289
+ #
290
+ # This step will add the stage result to ``stages`` key in
291
+ # that stage id. It will have structure like;
292
+ # ---
293
+ # {
294
+ # "params": { ... },
295
+ # "jobs": { ... },
296
+ # "matrix": { ... },
297
+ # "stages": { { "stage-id-1": ... }, ... }
298
+ # }
299
+ #
300
+ if event and event.is_set():
301
+ return Result(
302
+ status=1,
303
+ context={
304
+ gen_id(strategy): {
305
+ "matrix": strategy,
306
+ "stages": filter_func(context.pop("stages", {})),
307
+ "error": {
308
+ "message": (
309
+ "Process Event stopped before execution"
310
+ ),
311
+ },
312
+ },
313
+ },
314
+ )
315
+ try:
316
+ rs: Result = stage.execute(params=context)
182
317
  stage.set_outputs(rs.context, params=context)
183
- else:
184
- raise JobException(
185
- f"Getting status does not equal zero on stage: "
186
- f"{stage.name}."
318
+ except (StageException, UtilException) as err:
319
+ logging.error(
320
+ f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
187
321
  )
322
+ raise JobException(
323
+ f"Get stage execution error: {err.__class__.__name__}: "
324
+ f"{err}"
325
+ ) from None
188
326
  return Result(
189
327
  status=0,
190
328
  context={
191
329
  gen_id(strategy): {
192
330
  "matrix": strategy,
193
- "stages": context.pop("stages", {}),
331
+ # NOTE: (WF001) filter own created function from stages
332
+ # value, because it does not dump with pickle when you
333
+ # execute with multiprocess.
334
+ #
335
+ "stages": filter_func(context.pop("stages", {})),
194
336
  },
195
337
  },
196
338
  )
@@ -204,71 +346,132 @@ class Job(BaseModel):
204
346
  :rtype: Result
205
347
  """
206
348
  strategy_context: DictData = {}
207
- for strategy in self.strategy.make():
208
-
209
- # NOTE: Create strategy context and update matrix and params to this
210
- # context. So, the context will have structure like;
211
- # ---
212
- # {
213
- # "params": { ... }, <== Current input params
214
- # "jobs": { ... },
215
- # "matrix": { ... } <== Current strategy value
216
- # }
217
- #
218
- context: DictData = {}
219
- context.update(params)
220
- context.update({"matrix": strategy})
221
349
 
222
- # TODO: we should add option for ``wait_as_complete`` for release
223
- # a stage execution to run on background (multi-thread).
224
- # ---
225
- # >>> from concurrency
226
- #
227
- # IMPORTANT: The stage execution only run sequentially one-by-one.
228
- for stage in self.stages:
229
- _st_name: str = stage.id or stage.name
350
+ # NOTE: Normal Job execution.
351
+ if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
352
+ for strategy in self.strategy.make():
353
+ rs: Result = self.strategy_execute(
354
+ strategy, params=copy.deepcopy(params)
355
+ )
356
+ strategy_context.update(rs.context)
357
+ return Result(
358
+ status=0,
359
+ context=strategy_context,
360
+ )
230
361
 
231
- if stage.is_skip(params=context):
232
- logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
233
- continue
234
- logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
235
-
236
- # NOTE: Logging a matrix that pass on this stage execution.
237
- if strategy:
238
- logging.info(f"[...]: Matrix: {strategy}")
239
-
240
- # NOTE:
241
- # I do not use below syntax because `params` dict be the
242
- # reference memory pointer and it was changed when I action
243
- # anything like update or re-construct this.
244
- #
245
- # ... params |= stage.execute(params=params)
246
- #
247
- # This step will add the stage result to ``stages`` key in
248
- # that stage id. It will have structure like;
249
- # ---
250
- # {
251
- # "params": { ... },
252
- # "jobs": { ... },
253
- # "matrix": { ... },
254
- # "stages": { { "stage-id-1": ... }, ... }
255
- # }
256
- #
257
- rs: Result = stage.execute(params=context)
258
- if rs.status == 0:
259
- stage.set_outputs(rs.context, params=context)
260
- else:
261
- raise JobException(
262
- f"Getting status does not equal zero on stage: "
263
- f"{stage.name}."
362
+ # WARNING: (WF001) I got error that raise when use
363
+ # ``ProcessPoolExecutor``;
364
+ # ---
365
+ # _pickle.PicklingError: Can't pickle
366
+ # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
367
+ # on ddeutil.workflow.stage failed
368
+ #
369
+ with Manager() as manager:
370
+ event: Event = manager.Event()
371
+
372
+ # NOTE: Start process pool executor for running strategy executor in
373
+ # parallel mode.
374
+ with ProcessPoolExecutor(
375
+ max_workers=self.strategy.max_parallel
376
+ ) as executor:
377
+ features: list[Future] = [
378
+ executor.submit(
379
+ self.strategy_execute,
380
+ strategy,
381
+ params=copy.deepcopy(params),
382
+ event=event,
264
383
  )
384
+ for strategy in self.strategy.make()
385
+ ]
386
+ if self.strategy.fail_fast:
387
+ rs = self.__catch_fail_fast(event, features)
388
+ else:
389
+ rs = self.__catch_all_completed(features)
390
+ return Result(
391
+ status=0,
392
+ context=rs.context,
393
+ )
265
394
 
266
- strategy_context[gen_id(strategy)] = {
267
- "matrix": strategy,
268
- "stages": context.pop("stages", {}),
269
- }
395
+ def __catch_fail_fast(self, event: Event, features: list[Future]) -> Result:
396
+ """Job parallel pool features catching with fail-fast mode. That will
397
+ stop all not done features if it receive the first exception from all
398
+ running features.
399
+
400
+ :param event:
401
+ :param features: A list of features.
402
+ :rtype: Result
403
+ """
404
+ strategy_context: DictData = {}
405
+ # NOTE: Get results from a collection of tasks with a
406
+ # timeout that has the first exception.
407
+ done, not_done = wait(
408
+ features, timeout=1800, return_when=FIRST_EXCEPTION
409
+ )
410
+ nd: str = (
411
+ f", the strategies do not run is {not_done}" if not_done else ""
412
+ )
413
+ logging.debug(f"[JOB]: Strategy is set Fail Fast{nd}")
414
+
415
+ # NOTE: Stop all running tasks
416
+ event.set()
417
+
418
+ # NOTE: Cancel any scheduled tasks
419
+ for future in features:
420
+ future.cancel()
421
+
422
+ status: int = 0
423
+ for f in done:
424
+ if f.exception():
425
+ status = 1
426
+ logging.error(
427
+ f"({self.run_id}) [JOB]: One stage failed with: "
428
+ f"{f.exception()}, shutting down this feature."
429
+ )
430
+ elif f.cancelled():
431
+ continue
432
+ else:
433
+ rs: Result = f.result(timeout=60)
434
+ strategy_context.update(rs.context)
435
+ return Result(
436
+ status=status,
437
+ context=strategy_context,
438
+ )
270
439
 
271
- return Result(status=0, context=strategy_context)
440
+ def __catch_all_completed(self, features: list[Future]) -> Result:
441
+ """Job parallel pool features catching with all-completed mode.
442
+
443
+ :param features: A list of features.
444
+ """
445
+ strategy_context: DictData = {}
446
+ status: int = 0
447
+ for feature in as_completed(features):
448
+ try:
449
+ rs: Result = feature.result(timeout=60)
450
+ strategy_context.update(rs.context)
451
+ except PickleError as err:
452
+ # NOTE: (WF001) I do not want to fix this issue because
453
+ # it does not make sense and over-engineering with
454
+ # this bug fix process.
455
+ raise JobException(
456
+ f"PyStage that create object on locals does use "
457
+ f"parallel in strategy execution;\n\t{err}"
458
+ ) from None
459
+ except TimeoutError:
460
+ status = 1
461
+ logging.warning("Task is hanging. Attempting to kill.")
462
+ feature.cancel()
463
+ if not feature.cancelled():
464
+ logging.warning("Failed to cancel the task.")
465
+ else:
466
+ logging.warning("Task canceled successfully.")
467
+ except JobException as err:
468
+ status = 1
469
+ logging.error(
470
+ f"({self.run_id}) [JOB]: Get stage exception with "
471
+ f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
472
+ f"{err}"
473
+ )
474
+ return Result(status=status, context=strategy_context)
272
475
 
273
476
 
274
477
  class Pipeline(BaseModel):
@@ -297,6 +500,9 @@ class Pipeline(BaseModel):
297
500
  default_factory=dict,
298
501
  description="A mapping of job ID and job model that already loaded.",
299
502
  )
503
+ run_id: Optional[str] = Field(
504
+ default=None, description="A running job ID.", repr=False
505
+ )
300
506
 
301
507
  @classmethod
302
508
  def from_loader(
@@ -356,6 +562,25 @@ class Pipeline(BaseModel):
356
562
  }
357
563
  return values
358
564
 
565
+ @model_validator(mode="after")
566
+ def __validate_jobs_need_and_prepare_running_id(self):
567
+ for job in self.jobs:
568
+ if not_exist := [
569
+ need for need in self.jobs[job].needs if need not in self.jobs
570
+ ]:
571
+ raise PipelineException(
572
+ f"This needed jobs: {not_exist} do not exist in this "
573
+ f"pipeline."
574
+ )
575
+
576
+ # NOTE: update a job id with its job id from pipeline template
577
+ self.jobs[job].id = job
578
+
579
+ if self.run_id is None:
580
+ self.run_id = gen_id(self.name, unique=True)
581
+
582
+ return self
583
+
359
584
  def job(self, name: str) -> Job:
360
585
  """Return Job model that exists on this pipeline.
361
586
 
@@ -375,6 +600,7 @@ class Pipeline(BaseModel):
375
600
  job execution.
376
601
 
377
602
  :param params: A parameter mapping that receive from pipeline execution.
603
+ :rtype: DictData
378
604
  """
379
605
  # VALIDATE: Incoming params should have keys that set on this pipeline.
380
606
  if check_key := tuple(
@@ -382,7 +608,7 @@ class Pipeline(BaseModel):
382
608
  for k in self.params
383
609
  if (k not in params and self.params[k].required)
384
610
  ):
385
- raise ValueError(
611
+ raise PipelineException(
386
612
  f"Required Param on this pipeline setting does not set: "
387
613
  f"{', '.join(check_key)}."
388
614
  )
@@ -400,6 +626,103 @@ class Pipeline(BaseModel):
400
626
  "jobs": {},
401
627
  }
402
628
 
629
+ def release(
630
+ self,
631
+ on: On,
632
+ params: DictData | None = None,
633
+ *,
634
+ waiting_sec: int = 600,
635
+ sleep_interval: int = 10,
636
+ ) -> str:
637
+ """Start running pipeline with the on schedule in period of 30 minutes.
638
+ That mean it will still running at background 30 minutes until the
639
+ schedule matching with its time.
640
+ """
641
+ params: DictData = params or {}
642
+ logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
643
+
644
+ gen: CronRunner = on.generate(datetime.now())
645
+ tz: ZoneInfo = gen.tz
646
+ next_running_time: datetime = gen.next
647
+
648
+ if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
649
+ logging.debug(
650
+ f"[CORE]: {self.name} closely to run >> "
651
+ f"{next_running_time:%Y-%m-%d %H:%M:%S}"
652
+ )
653
+
654
+ # NOTE: Release when the time is nearly to schedule time.
655
+ while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
656
+ time.sleep(sleep_interval)
657
+ logging.debug(
658
+ f"[CORE]: {self.name!r} : Sleep until: {duration}"
659
+ )
660
+
661
+ time.sleep(1)
662
+ rs: Result = self.execute(params=params)
663
+ logging.debug(f"{rs.context}")
664
+
665
+ return f"[CORE]: Start Execute: {self.name}"
666
+ return f"[CORE]: {self.name} does not closely to run yet."
667
+
668
+ def poke(self, params: DictData | None = None):
669
+ """Poke pipeline threading task for executing with its schedules that
670
+ was set on the `on`.
671
+ """
672
+ params: DictData = params or {}
673
+ logging.info(
674
+ f"[CORE]: Start Poking: {self.name!r} :"
675
+ f"{gen_id(self.name, unique=True)}"
676
+ )
677
+ results = []
678
+ with ThreadPoolExecutor(
679
+ max_workers=int(
680
+ os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
681
+ ),
682
+ ) as executor:
683
+ futures: list[Future] = [
684
+ executor.submit(
685
+ self.release,
686
+ on,
687
+ params=params,
688
+ )
689
+ for on in self.on
690
+ ]
691
+ for future in as_completed(futures):
692
+ rs = future.result()
693
+ logging.info(rs)
694
+ results.append(rs)
695
+ return results
696
+
697
+ def job_execute(
698
+ self,
699
+ job: str,
700
+ params: DictData,
701
+ ) -> Result:
702
+ """Job Executor that use on pipeline executor.
703
+ :param job: A job ID that want to execute.
704
+ :param params: A params that was parameterized from pipeline execution.
705
+ """
706
+ # VALIDATE: check a job ID that exists in this pipeline or not.
707
+ if job not in self.jobs:
708
+ raise PipelineException(
709
+ f"The job ID: {job} does not exists on {self.name!r} pipeline."
710
+ )
711
+
712
+ try:
713
+ logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
714
+ job_obj: Job = self.jobs[job]
715
+ j_rs: Result = job_obj.execute(params=params)
716
+ except JobException as err:
717
+ raise PipelineException(
718
+ f"The job ID: {job} get raise error: {err.__class__.__name__}:"
719
+ f"\n{err}"
720
+ ) from None
721
+ return Result(
722
+ status=j_rs.status,
723
+ context={job: job_obj.set_outputs(j_rs.context)},
724
+ )
725
+
403
726
  def execute(
404
727
  self,
405
728
  params: DictData | None = None,
@@ -430,7 +753,7 @@ class Pipeline(BaseModel):
430
753
 
431
754
  """
432
755
  logging.info(
433
- f"[CORE]: Start Pipeline {self.name}:"
756
+ f"[CORE]: Start Execute: {self.name}:"
434
757
  f"{gen_id(self.name, unique=True)}"
435
758
  )
436
759
  params: DictData = params or {}
@@ -440,58 +763,131 @@ class Pipeline(BaseModel):
440
763
  logging.warning("[PIPELINE]: This pipeline does not have any jobs")
441
764
  return Result(status=0, context=params)
442
765
 
443
- # NOTE: create a job queue that keep the job that want to running after
766
+ # NOTE: Create a job queue that keep the job that want to running after
444
767
  # it dependency condition.
445
768
  jq: Queue = Queue()
446
769
  for job_id in self.jobs:
447
770
  jq.put(job_id)
448
771
 
772
+ # NOTE: Create start timestamp
449
773
  ts: float = time.monotonic()
450
- not_time_out_flag: bool = True
451
774
 
452
775
  # NOTE: Create result context that will pass this context to any
453
776
  # execution dependency.
454
777
  rs: Result = Result(context=self.parameterize(params))
778
+ try:
779
+ rs.receive(
780
+ self.__exec_non_threading(rs, jq, ts, timeout=timeout)
781
+ if (
782
+ worker := int(
783
+ os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1")
784
+ )
785
+ )
786
+ == 1
787
+ else self.__exec_threading(
788
+ rs, jq, ts, worker=worker, timeout=timeout
789
+ )
790
+ )
791
+ return rs
792
+ except PipelineException as err:
793
+ rs.context.update({"error": {"message": str(err)}})
794
+ rs.status = 1
795
+ return rs
455
796
 
456
- # IMPORTANT: The job execution can run parallel and waiting by needed.
457
- while not jq.empty() and (
797
+ def __exec_threading(
798
+ self,
799
+ rs: Result,
800
+ job_queue: Queue,
801
+ ts: float,
802
+ *,
803
+ worker: int = 1,
804
+ timeout: int = 600,
805
+ ) -> Result:
806
+ """Pipeline threading execution."""
807
+ not_time_out_flag: bool = True
808
+
809
+ # IMPORTANT: The job execution can run parallel and waiting by
810
+ # needed.
811
+ with ThreadPoolExecutor(max_workers=worker) as executor:
812
+ futures: list[Future] = []
813
+ while not job_queue.empty() and (
814
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
815
+ ):
816
+ job_id: str = job_queue.get()
817
+ job: Job = self.jobs[job_id]
818
+
819
+ # IMPORTANT:
820
+ # Change any job running IDs to this pipeline running ID.
821
+ job.run_id = self.run_id
822
+
823
+ if any(need not in rs.context["jobs"] for need in job.needs):
824
+ job_queue.put(job_id)
825
+ time.sleep(0.5)
826
+ continue
827
+
828
+ futures.append(
829
+ executor.submit(
830
+ self.job_execute,
831
+ job_id,
832
+ params=copy.deepcopy(rs.context),
833
+ ),
834
+ )
835
+
836
+ for future in as_completed(futures):
837
+ if err := future.exception():
838
+ logging.error(f"{err}")
839
+ raise PipelineException(f"{err}")
840
+
841
+ # NOTE: Update job result to pipeline result.
842
+ rs.receive_jobs(future.result(timeout=20))
843
+
844
+ if not not_time_out_flag:
845
+ logging.warning(
846
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
847
+ )
848
+ raise PipelineException(
849
+ f"Execution of pipeline: {self.name} was timeout"
850
+ )
851
+ rs.status = 0
852
+ return rs
853
+
854
+ def __exec_non_threading(
855
+ self,
856
+ rs: Result,
857
+ job_queue: Queue,
858
+ ts: float,
859
+ *,
860
+ timeout: int = 600,
861
+ ) -> Result:
862
+ """Pipeline non-threading execution."""
863
+ not_time_out_flag: bool = True
864
+ logging.info(f"[CORE]: Run {self.name} with non-threading job executor")
865
+ while not job_queue.empty() and (
458
866
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
459
867
  ):
460
- job_id: str = jq.get()
461
- logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
868
+ job_id: str = job_queue.get()
462
869
  job: Job = self.jobs[job_id]
463
870
 
464
- # TODO: Condition on ``needs`` of this job was set. It should create
465
- # multithreading process on this step.
466
- # But, I don't know how to handle changes params between each job
467
- # execution while its use them together.
468
- # ---
469
- # >>> import multiprocessing
470
- # >>> with multiprocessing.Pool(processes=3) as pool:
471
- # ... results = pool.starmap(merge_names, ('', '', ...))
472
- # ---
473
- # This case we use multi-process because I want to split usage of
474
- # data in this level, that mean the data that push to parallel job
475
- # should not use across another job.
476
- #
477
- if any(rs.context["jobs"].get(need) for need in job.needs):
478
- jq.put(job_id)
479
-
480
- # NOTE: copy current the result context for reference other job
481
- # context.
482
- job_context: DictData = copy.deepcopy(rs.context)
483
- job_rs: Result = job.execute(params=job_context)
484
- if job_rs.status == 0:
485
- # NOTE: Receive output of job execution.
486
- rs.context["jobs"][job_id] = job.set_outputs(job_rs.context)
487
- else:
488
- raise PipelineException(
489
- f"Getting status does not equal zero on job: {job_id}."
490
- )
871
+ # IMPORTANT:
872
+ # Change any job running IDs to this pipeline running ID.
873
+ job.run_id = self.run_id
874
+
875
+ # NOTE:
876
+ if any(need not in rs.context["jobs"] for need in job.needs):
877
+ job_queue.put(job_id)
878
+ time.sleep(0.5)
879
+ continue
880
+
881
+ # NOTE: Start job execution.
882
+ job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
883
+ rs.context["jobs"].update(job_rs.context)
491
884
 
492
885
  if not not_time_out_flag:
493
- logging.warning("Execution of pipeline was time out")
494
- rs.status = 1
495
- return rs
886
+ logging.warning(
887
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
888
+ )
889
+ raise PipelineException(
890
+ f"Execution of pipeline: {self.name} was timeout"
891
+ )
496
892
  rs.status = 0
497
893
  return rs