ddeutil-workflow 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,47 +6,67 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  import copy
9
- import logging
10
9
  import os
11
10
  import time
12
11
  from concurrent.futures import (
13
12
  FIRST_EXCEPTION,
14
13
  Future,
15
- ProcessPoolExecutor,
16
14
  ThreadPoolExecutor,
17
15
  as_completed,
18
16
  wait,
19
17
  )
20
- from datetime import datetime
21
- from multiprocessing import Event, Manager
18
+ from datetime import datetime, timedelta
19
+ from heapq import heappush
22
20
  from pickle import PickleError
23
21
  from queue import Queue
22
+ from textwrap import dedent
23
+ from threading import Event
24
24
  from typing import Optional
25
25
  from zoneinfo import ZoneInfo
26
26
 
27
27
  from pydantic import BaseModel, Field
28
- from pydantic.functional_validators import model_validator
28
+ from pydantic.functional_validators import field_validator, model_validator
29
29
  from typing_extensions import Self
30
30
 
31
- from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
31
+ from .__types import (
32
+ DictData,
33
+ DictStr,
34
+ Matrix,
35
+ MatrixExclude,
36
+ MatrixInclude,
37
+ TupleStr,
38
+ )
39
+ from .cron import CronRunner
32
40
  from .exceptions import (
33
41
  JobException,
34
42
  PipelineException,
35
43
  StageException,
36
44
  UtilException,
37
45
  )
38
- from .loader import Loader
46
+ from .log import FileLog, Log, get_logger
39
47
  from .on import On
40
- from .scheduler import CronRunner
41
48
  from .stage import Stage
42
49
  from .utils import (
50
+ Loader,
43
51
  Param,
44
52
  Result,
45
53
  cross_product,
46
54
  dash2underscore,
55
+ delay,
47
56
  filter_func,
48
57
  gen_id,
49
58
  get_diff_sec,
59
+ has_template,
60
+ param2template,
61
+ )
62
+
63
+ logger = get_logger("ddeutil.workflow")
64
+
65
+
66
+ __all__: TupleStr = (
67
+ "Strategy",
68
+ "Job",
69
+ "Pipeline",
50
70
  )
51
71
 
52
72
 
@@ -67,9 +87,25 @@ class Strategy(BaseModel):
67
87
  ... }
68
88
  """
69
89
 
70
- fail_fast: bool = Field(default=False)
71
- max_parallel: int = Field(default=1, gt=0)
72
- matrix: Matrix = Field(default_factory=dict)
90
+ fail_fast: bool = Field(
91
+ default=False,
92
+ serialization_alias="fail-fast",
93
+ )
94
+ max_parallel: int = Field(
95
+ default=1,
96
+ gt=0,
97
+ description=(
98
+ "The maximum number of executor thread pool that want to run "
99
+ "parallel"
100
+ ),
101
+ serialization_alias="max-parallel",
102
+ )
103
+ matrix: Matrix = Field(
104
+ default_factory=dict,
105
+ description=(
106
+ "A matrix values that want to cross product to possible strategies."
107
+ ),
108
+ )
73
109
  include: MatrixInclude = Field(
74
110
  default_factory=list,
75
111
  description="A list of additional matrix that want to adds-in.",
@@ -166,9 +202,22 @@ class Job(BaseModel):
166
202
  ... }
167
203
  """
168
204
 
169
- id: Optional[str] = Field(default=None)
170
- desc: Optional[str] = Field(default=None)
171
- runs_on: Optional[str] = Field(default=None)
205
+ id: Optional[str] = Field(
206
+ default=None,
207
+ description=(
208
+ "A job ID, this value will add from pipeline after validation "
209
+ "process."
210
+ ),
211
+ )
212
+ desc: Optional[str] = Field(
213
+ default=None,
214
+ description="A job description that can be string of markdown content.",
215
+ )
216
+ runs_on: Optional[str] = Field(
217
+ default=None,
218
+ description="A target executor node for this job use to execution.",
219
+ serialization_alias="runs-on",
220
+ )
172
221
  stages: list[Stage] = Field(
173
222
  default_factory=list,
174
223
  description="A list of Stage of this job.",
@@ -182,7 +231,10 @@ class Job(BaseModel):
182
231
  description="A strategy matrix that want to generate.",
183
232
  )
184
233
  run_id: Optional[str] = Field(
185
- default=None, description="A running job ID.", repr=False
234
+ default=None,
235
+ description="A running job ID.",
236
+ repr=False,
237
+ exclude=True,
186
238
  )
187
239
 
188
240
  @model_validator(mode="before")
@@ -193,12 +245,31 @@ class Job(BaseModel):
193
245
  dash2underscore("runs-on", values)
194
246
  return values
195
247
 
248
+ @field_validator("desc", mode="after")
249
+ def ___prepare_desc(cls, value: str) -> str:
250
+ """Prepare description string that was created on a template."""
251
+ return dedent(value)
252
+
196
253
  @model_validator(mode="after")
197
254
  def __prepare_running_id(self):
198
255
  if self.run_id is None:
199
256
  self.run_id = gen_id(self.id or "", unique=True)
257
+
258
+ # VALIDATE: Validate job id should not dynamic with params template.
259
+ if has_template(self.id):
260
+ raise ValueError("Job ID should not has any template.")
261
+
200
262
  return self
201
263
 
264
+ def get_running_id(self, run_id: str) -> Self:
265
+ """Return Job model object that changing job running ID with an
266
+ input running ID.
267
+
268
+ :param run_id: A replace job running ID.
269
+ :rtype: Self
270
+ """
271
+ return self.model_copy(update={"run_id": run_id})
272
+
202
273
  def stage(self, stage_id: str) -> Stage:
203
274
  """Return stage model that match with an input stage ID."""
204
275
  for stage in self.stages:
@@ -207,12 +278,12 @@ class Job(BaseModel):
207
278
  raise ValueError(f"Stage ID {stage_id} does not exists")
208
279
 
209
280
  def set_outputs(self, output: DictData) -> DictData:
281
+ """Setting output of job execution"""
210
282
  if len(output) > 1 and self.strategy.is_set():
211
283
  return {"strategies": output}
212
-
213
284
  return output[next(iter(output))]
214
285
 
215
- def strategy_execute(
286
+ def execute_strategy(
216
287
  self,
217
288
  strategy: DictData,
218
289
  params: DictData,
@@ -232,6 +303,7 @@ class Job(BaseModel):
232
303
  :raise JobException: If it has any error from StageException or
233
304
  UtilException.
234
305
  """
306
+ # NOTE: Force stop this execution if event was set from main execution.
235
307
  if event and event.is_set():
236
308
  return Result(
237
309
  status=1,
@@ -239,7 +311,7 @@ class Job(BaseModel):
239
311
  gen_id(strategy): {
240
312
  "matrix": strategy,
241
313
  "stages": {},
242
- "error": {
314
+ "error_message": {
243
315
  "message": "Process Event stopped before execution"
244
316
  },
245
317
  },
@@ -262,23 +334,23 @@ class Job(BaseModel):
262
334
  for stage in self.stages:
263
335
 
264
336
  # IMPORTANT: Change any stage running IDs to this job running ID.
265
- stage.run_id = self.run_id
337
+ stage: Stage = stage.get_running_id(self.run_id)
266
338
 
267
339
  _st_name: str = stage.id or stage.name
268
340
 
269
341
  if stage.is_skipped(params=context):
270
- logging.info(
342
+ logger.info(
271
343
  f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
272
344
  )
273
345
  continue
274
346
 
275
- logging.info(
347
+ logger.info(
276
348
  f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
277
349
  )
278
350
 
279
351
  # NOTE: Logging a matrix that pass on this stage execution.
280
352
  if strategy:
281
- logging.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
353
+ logger.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
282
354
 
283
355
  # NOTE:
284
356
  # I do not use below syntax because `params` dict be the
@@ -303,8 +375,12 @@ class Job(BaseModel):
303
375
  context={
304
376
  gen_id(strategy): {
305
377
  "matrix": strategy,
306
- "stages": filter_func(context.pop("stages", {})),
307
- "error": {
378
+ # NOTE: If job strategy executor use multithreading,
379
+ # it will not filter function object from context.
380
+ # ---
381
+ # "stages": filter_func(context.pop("stages", {})),
382
+ "stages": context.pop("stages", {}),
383
+ "error_message": {
308
384
  "message": (
309
385
  "Process Event stopped before execution"
310
386
  ),
@@ -314,15 +390,20 @@ class Job(BaseModel):
314
390
  )
315
391
  try:
316
392
  rs: Result = stage.execute(params=context)
317
- stage.set_outputs(rs.context, params=context)
393
+ stage.set_outputs(rs.context, to=context)
318
394
  except (StageException, UtilException) as err:
319
- logging.error(
395
+ logger.error(
320
396
  f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
321
397
  )
322
398
  raise JobException(
323
399
  f"Get stage execution error: {err.__class__.__name__}: "
324
400
  f"{err}"
325
401
  ) from None
402
+
403
+ # NOTE: Remove new stage object that was created from
404
+ # ``get_running_id`` method.
405
+ del stage
406
+
326
407
  return Result(
327
408
  status=0,
328
409
  context={
@@ -345,109 +426,132 @@ class Job(BaseModel):
345
426
  :param params: An input parameters that use on job execution.
346
427
  :rtype: Result
347
428
  """
348
- strategy_context: DictData = {}
429
+ context: DictData = {}
349
430
 
350
431
  # NOTE: Normal Job execution.
351
432
  if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
352
433
  for strategy in self.strategy.make():
353
- rs: Result = self.strategy_execute(
434
+ rs: Result = self.execute_strategy(
354
435
  strategy, params=copy.deepcopy(params)
355
436
  )
356
- strategy_context.update(rs.context)
437
+ context.update(rs.context)
357
438
  return Result(
358
439
  status=0,
359
- context=strategy_context,
440
+ context=context,
360
441
  )
361
442
 
362
- # WARNING: (WF001) I got error that raise when use
363
- # ``ProcessPoolExecutor``;
364
- # ---
365
- # _pickle.PicklingError: Can't pickle
366
- # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
367
- # on ddeutil.workflow.stage failed
443
+ # # WARNING: (WF001) I got error that raise when use
444
+ # # ``ProcessPoolExecutor``;
445
+ # # ---
446
+ # # _pickle.PicklingError: Can't pickle
447
+ # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
448
+ # # on ddeutil.workflow.stage failed
449
+ # #
450
+ # # from multiprocessing import Event, Manager
451
+ # with Manager() as manager:
452
+ # event: Event = manager.Event()
368
453
  #
369
- with Manager() as manager:
370
- event: Event = manager.Event()
371
-
372
- # NOTE: Start process pool executor for running strategy executor in
373
- # parallel mode.
374
- with ProcessPoolExecutor(
375
- max_workers=self.strategy.max_parallel
376
- ) as executor:
377
- features: list[Future] = [
378
- executor.submit(
379
- self.strategy_execute,
380
- strategy,
381
- params=copy.deepcopy(params),
382
- event=event,
383
- )
384
- for strategy in self.strategy.make()
385
- ]
386
- if self.strategy.fail_fast:
387
- rs = self.__catch_fail_fast(event, features)
388
- else:
389
- rs = self.__catch_all_completed(features)
454
+ # # NOTE: Start process pool executor for running strategy executor
455
+ # # in parallel mode.
456
+ # with ProcessPoolExecutor(
457
+ # max_workers=self.strategy.max_parallel
458
+ # ) as executor:
459
+ # futures: list[Future] = [
460
+ # executor.submit(
461
+ # self.execute_strategy,
462
+ # strategy,
463
+ # params=copy.deepcopy(params),
464
+ # event=event,
465
+ # )
466
+ # for strategy in self.strategy.make()
467
+ # ]
468
+ # if self.strategy.fail_fast:
469
+ # rs = self.__catch_fail_fast(event, futures)
470
+ # else:
471
+ # rs = self.__catch_all_completed(futures)
472
+
473
+ # NOTE: Create event for cancel executor stop running.
474
+ event: Event = Event()
475
+
476
+ with ThreadPoolExecutor(
477
+ max_workers=self.strategy.max_parallel
478
+ ) as executor:
479
+ futures: list[Future] = [
480
+ executor.submit(
481
+ self.execute_strategy,
482
+ strategy,
483
+ params=copy.deepcopy(params),
484
+ event=event,
485
+ )
486
+ for strategy in self.strategy.make()
487
+ ]
488
+
489
+ # NOTE: Dynamic catching futures object with fail-fast flag.
490
+ if self.strategy.fail_fast:
491
+ rs: Result = self.__catch_fail_fast(event, futures)
492
+ else:
493
+ rs: Result = self.__catch_all_completed(futures)
390
494
  return Result(
391
495
  status=0,
392
496
  context=rs.context,
393
497
  )
394
498
 
395
- def __catch_fail_fast(self, event: Event, features: list[Future]) -> Result:
396
- """Job parallel pool features catching with fail-fast mode. That will
397
- stop all not done features if it receive the first exception from all
398
- running features.
499
+ def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
500
+ """Job parallel pool futures catching with fail-fast mode. That will
501
+ stop all not done futures if it receive the first exception from all
502
+ running futures.
399
503
 
400
504
  :param event:
401
- :param features: A list of features.
505
+ :param futures: A list of futures.
402
506
  :rtype: Result
403
507
  """
404
- strategy_context: DictData = {}
508
+ context: DictData = {}
405
509
  # NOTE: Get results from a collection of tasks with a
406
510
  # timeout that has the first exception.
407
511
  done, not_done = wait(
408
- features, timeout=1800, return_when=FIRST_EXCEPTION
512
+ futures, timeout=1800, return_when=FIRST_EXCEPTION
409
513
  )
410
514
  nd: str = (
411
515
  f", the strategies do not run is {not_done}" if not_done else ""
412
516
  )
413
- logging.debug(f"[JOB]: Strategy is set Fail Fast{nd}")
517
+ logger.debug(f"({self.run_id}) [JOB]: Strategy is set Fail Fast{nd}")
518
+
519
+ if len(done) != len(futures):
414
520
 
415
- # NOTE: Stop all running tasks
416
- event.set()
521
+ # NOTE: Stop all running tasks
522
+ event.set()
417
523
 
418
- # NOTE: Cancel any scheduled tasks
419
- for future in features:
420
- future.cancel()
524
+ # NOTE: Cancel any scheduled tasks
525
+ for future in futures:
526
+ future.cancel()
421
527
 
422
528
  status: int = 0
423
- for f in done:
424
- if f.exception():
529
+ for future in done:
530
+ if future.exception():
425
531
  status = 1
426
- logging.error(
532
+ logger.error(
427
533
  f"({self.run_id}) [JOB]: One stage failed with: "
428
- f"{f.exception()}, shutting down this feature."
534
+ f"{future.exception()}, shutting down this future."
429
535
  )
430
- elif f.cancelled():
536
+ elif future.cancelled():
431
537
  continue
432
538
  else:
433
- rs: Result = f.result(timeout=60)
434
- strategy_context.update(rs.context)
435
- return Result(
436
- status=status,
437
- context=strategy_context,
438
- )
539
+ rs: Result = future.result(timeout=60)
540
+ context.update(rs.context)
541
+ return Result(status=status, context=context)
439
542
 
440
- def __catch_all_completed(self, features: list[Future]) -> Result:
441
- """Job parallel pool features catching with all-completed mode.
543
+ def __catch_all_completed(self, futures: list[Future]) -> Result:
544
+ """Job parallel pool futures catching with all-completed mode.
442
545
 
443
- :param features: A list of features.
546
+ :param futures: A list of futures.
547
+ :rtype: Result
444
548
  """
445
- strategy_context: DictData = {}
549
+ context: DictData = {}
446
550
  status: int = 0
447
- for feature in as_completed(features):
551
+ for future in as_completed(futures):
448
552
  try:
449
- rs: Result = feature.result(timeout=60)
450
- strategy_context.update(rs.context)
553
+ rs: Result = future.result(timeout=60)
554
+ context.update(rs.context)
451
555
  except PickleError as err:
452
556
  # NOTE: (WF001) I do not want to fix this issue because
453
557
  # it does not make sense and over-engineering with
@@ -458,34 +562,42 @@ class Job(BaseModel):
458
562
  ) from None
459
563
  except TimeoutError:
460
564
  status = 1
461
- logging.warning("Task is hanging. Attempting to kill.")
462
- feature.cancel()
463
- if not feature.cancelled():
464
- logging.warning("Failed to cancel the task.")
565
+ logger.warning(
566
+ f"({self.run_id}) [JOB]: Task is hanging. Attempting to "
567
+ f"kill."
568
+ )
569
+ future.cancel()
570
+ time.sleep(0.1)
571
+ if not future.cancelled():
572
+ logger.warning(
573
+ f"({self.run_id}) [JOB]: Failed to cancel the task."
574
+ )
465
575
  else:
466
- logging.warning("Task canceled successfully.")
576
+ logger.warning(
577
+ f"({self.run_id}) [JOB]: Task canceled successfully."
578
+ )
467
579
  except JobException as err:
468
580
  status = 1
469
- logging.error(
581
+ logger.error(
470
582
  f"({self.run_id}) [JOB]: Get stage exception with "
471
583
  f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
472
584
  f"{err}"
473
585
  )
474
- return Result(status=status, context=strategy_context)
586
+ return Result(status=status, context=context)
475
587
 
476
588
 
477
589
  class Pipeline(BaseModel):
478
- """Pipeline Model this is the main feature of this project because it use to
479
- be workflow data for running everywhere that you want. It use lightweight
480
- coding line to execute it.
590
+ """Pipeline Model this is the main future of this project because it use to
591
+ be workflow data for running everywhere that you want or using it to
592
+ scheduler task in background. It use lightweight coding line from Pydantic
593
+ Model and enhance execute method on it.
481
594
  """
482
595
 
483
596
  name: str = Field(description="A pipeline name.")
484
597
  desc: Optional[str] = Field(
485
598
  default=None,
486
599
  description=(
487
- "A pipeline description that is able to be string of markdown "
488
- "content."
600
+ "A pipeline description that can be string of markdown content."
489
601
  ),
490
602
  )
491
603
  params: dict[str, Param] = Field(
@@ -501,33 +613,46 @@ class Pipeline(BaseModel):
501
613
  description="A mapping of job ID and job model that already loaded.",
502
614
  )
503
615
  run_id: Optional[str] = Field(
504
- default=None, description="A running job ID.", repr=False
616
+ default=None,
617
+ description="A running pipeline ID.",
618
+ repr=False,
619
+ exclude=True,
505
620
  )
506
621
 
622
+ @property
623
+ def new_run_id(self) -> str:
624
+ """Running ID of this pipeline that always generate new unique value."""
625
+ return gen_id(self.name, unique=True)
626
+
507
627
  @classmethod
508
628
  def from_loader(
509
629
  cls,
510
630
  name: str,
511
631
  externals: DictData | None = None,
512
632
  ) -> Self:
513
- """Create Pipeline instance from the Loader object.
633
+ """Create Pipeline instance from the Loader object that only receive
634
+ an input pipeline name. The loader object will use this pipeline name to
635
+ searching configuration data of this pipeline model in conf path.
514
636
 
515
637
  :param name: A pipeline name that want to pass to Loader object.
516
638
  :param externals: An external parameters that want to pass to Loader
517
639
  object.
640
+ :rtype: Self
518
641
  """
519
642
  loader: Loader = Loader(name, externals=(externals or {}))
643
+
644
+ # NOTE: Validate the config type match with current connection model
645
+ if loader.type != cls:
646
+ raise ValueError(f"Type {loader.type} does not match with {cls}")
647
+
520
648
  loader_data: DictData = copy.deepcopy(loader.data)
521
649
 
522
650
  # NOTE: Add name to loader data
523
651
  loader_data["name"] = name.replace(" ", "_")
524
652
 
525
- if "jobs" not in loader_data:
526
- raise ValueError("Config does not set ``jobs`` value")
527
-
528
653
  # NOTE: Prepare `on` data
529
654
  cls.__bypass_on(loader_data)
530
- return cls.model_validate(loader_data)
655
+ return cls.model_validate(obj=loader_data)
531
656
 
532
657
  @classmethod
533
658
  def __bypass_on(cls, data: DictData, externals: DictData | None = None):
@@ -537,6 +662,8 @@ class Pipeline(BaseModel):
537
662
  on = [on]
538
663
  if any(not isinstance(i, (dict, str)) for i in on):
539
664
  raise TypeError("The ``on`` key should be list of str or dict")
665
+
666
+ # NOTE: Pass on value to Loader and keep on model object to on field
540
667
  data["on"] = [
541
668
  (
542
669
  Loader(n, externals=(externals or {})).data
@@ -562,25 +689,48 @@ class Pipeline(BaseModel):
562
689
  }
563
690
  return values
564
691
 
692
+ @field_validator("desc", mode="after")
693
+ def ___prepare_desc(cls, value: str) -> str:
694
+ """Prepare description string that was created on a template."""
695
+ return dedent(value)
696
+
565
697
  @model_validator(mode="after")
566
698
  def __validate_jobs_need_and_prepare_running_id(self):
699
+ """Validate each need job in any jobs should exists."""
567
700
  for job in self.jobs:
568
701
  if not_exist := [
569
702
  need for need in self.jobs[job].needs if need not in self.jobs
570
703
  ]:
571
704
  raise PipelineException(
572
705
  f"This needed jobs: {not_exist} do not exist in this "
573
- f"pipeline."
706
+ f"pipeline, {self.name!r}"
574
707
  )
575
708
 
576
709
  # NOTE: update a job id with its job id from pipeline template
577
710
  self.jobs[job].id = job
578
711
 
579
712
  if self.run_id is None:
580
- self.run_id = gen_id(self.name, unique=True)
713
+ self.run_id = self.new_run_id
714
+
715
+ # VALIDATE: Validate pipeline name should not dynamic with params
716
+ # template.
717
+ if has_template(self.name):
718
+ raise ValueError(
719
+ f"Pipeline name should not has any template, please check, "
720
+ f"{self.name!r}."
721
+ )
581
722
 
582
723
  return self
583
724
 
725
+ def get_running_id(self, run_id: str) -> Self:
726
+ """Return Pipeline model object that changing pipeline running ID with
727
+ an input running ID.
728
+
729
+ :param run_id: A replace pipeline running ID.
730
+ :rtype: Self
731
+ """
732
+ return self.model_copy(update={"run_id": run_id})
733
+
584
734
  def job(self, name: str) -> Job:
585
735
  """Return Job model that exists on this pipeline.
586
736
 
@@ -591,7 +741,10 @@ class Pipeline(BaseModel):
591
741
  :returns: A job model that exists on this pipeline by input name.
592
742
  """
593
743
  if name not in self.jobs:
594
- raise ValueError(f"Job {name!r} does not exists")
744
+ raise ValueError(
745
+ f"A Job {name!r} does not exists in this pipeline, "
746
+ f"{self.name!r}"
747
+ )
595
748
  return self.jobs[name]
596
749
 
597
750
  def parameterize(self, params: DictData) -> DictData:
@@ -629,95 +782,213 @@ class Pipeline(BaseModel):
629
782
  def release(
630
783
  self,
631
784
  on: On,
632
- params: DictData | None = None,
785
+ params: DictData,
786
+ queue: list[datetime],
633
787
  *,
634
- waiting_sec: int = 600,
635
- sleep_interval: int = 10,
636
- ) -> str:
788
+ waiting_sec: int = 60,
789
+ sleep_interval: int = 15,
790
+ log: Log = None,
791
+ ) -> Result:
637
792
  """Start running pipeline with the on schedule in period of 30 minutes.
638
793
  That mean it will still running at background 30 minutes until the
639
794
  schedule matching with its time.
795
+
796
+ This method allow pipeline use log object to save the execution
797
+ result to log destination like file log to local `/logs` directory.
798
+
799
+ :param on: An on schedule value.
800
+ :param params: A pipeline parameter that pass to execute method.
801
+ :param queue: A list of release time that already running.
802
+ :param waiting_sec: A second period value that allow pipeline execute.
803
+ :param sleep_interval: A second value that want to waiting until time
804
+ to execute.
805
+ :param log: A log object that want to save execution result.
806
+ :rtype: Result
640
807
  """
641
- params: DictData = params or {}
642
- logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
808
+ log: Log = log or FileLog
809
+ tz: ZoneInfo = ZoneInfo(os.getenv("WORKFLOW_CORE_TIMEZONE", "UTC"))
810
+ gen: CronRunner = on.generate(
811
+ datetime.now(tz=tz).replace(second=0, microsecond=0)
812
+ + timedelta(seconds=1)
813
+ )
814
+ cron_tz: ZoneInfo = gen.tz
643
815
 
644
- gen: CronRunner = on.generate(datetime.now())
645
- tz: ZoneInfo = gen.tz
646
- next_running_time: datetime = gen.next
816
+ # NOTE: get next schedule time that generate from now.
817
+ next_time: datetime = gen.next
647
818
 
648
- if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
649
- logging.debug(
650
- f"[CORE]: {self.name} closely to run >> "
651
- f"{next_running_time:%Y-%m-%d %H:%M:%S}"
819
+ # NOTE: get next utils it does not logger.
820
+ while log.is_pointed(self.name, next_time, queue=queue):
821
+ next_time: datetime = gen.next
822
+
823
+ # NOTE: push this next running time to log queue
824
+ heappush(queue, next_time)
825
+
826
+ # VALIDATE: Check the different time between the next schedule time and
827
+ # now that less than waiting period (second unit).
828
+ if get_diff_sec(next_time, tz=cron_tz) > waiting_sec:
829
+ logger.debug(
830
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
831
+ f"Does not closely >> {next_time:%Y-%m-%d %H:%M:%S}"
652
832
  )
653
833
 
654
- # NOTE: Release when the time is nearly to schedule time.
655
- while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
656
- time.sleep(sleep_interval)
657
- logging.debug(
658
- f"[CORE]: {self.name!r} : Sleep until: {duration}"
659
- )
834
+ # NOTE: Remove next datetime from queue.
835
+ queue.remove(next_time)
836
+
837
+ time.sleep(0.15)
838
+ return Result(
839
+ status=0,
840
+ context={
841
+ "params": params,
842
+ "poking": {"skipped": [str(on.cronjob)], "run": []},
843
+ },
844
+ )
845
+
846
+ logger.debug(
847
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
848
+ f"Closely to run >> {next_time:%Y-%m-%d %H:%M:%S}"
849
+ )
850
+
851
+ # NOTE: Release when the time is nearly to schedule time.
852
+ while (duration := get_diff_sec(next_time, tz=cron_tz)) > (
853
+ sleep_interval + 5
854
+ ):
855
+ logger.debug(
856
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
857
+ f"Sleep until: {duration}"
858
+ )
859
+ time.sleep(sleep_interval)
860
+
861
+ time.sleep(0.5)
660
862
 
661
- time.sleep(1)
662
- rs: Result = self.execute(params=params)
663
- logging.debug(f"{rs.context}")
863
+ # NOTE: Release parameter that use to change if params has
864
+ # templating.
865
+ release_params: DictData = {
866
+ "release": {
867
+ "logical_date": next_time,
868
+ },
869
+ }
870
+
871
+ # WARNING: Re-create pipeline object that use new running pipeline
872
+ # ID.
873
+ runner: Self = self.get_running_id(run_id=self.new_run_id)
874
+ rs: Result = runner.execute(
875
+ params=param2template(params, release_params),
876
+ )
877
+ logger.debug(
878
+ f"({runner.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
879
+ f"End release {next_time:%Y-%m-%d %H:%M:%S}"
880
+ )
664
881
 
665
- return f"[CORE]: Start Execute: {self.name}"
666
- return f"[CORE]: {self.name} does not closely to run yet."
882
+ # NOTE: Delete a copied pipeline instance for saving memory.
883
+ del runner
884
+
885
+ rs.set_parent_run_id(self.run_id)
886
+ rs_log: Log = log.model_validate(
887
+ {
888
+ "name": self.name,
889
+ "on": str(on.cronjob),
890
+ "release": next_time,
891
+ "context": rs.context,
892
+ "parent_run_id": rs.run_id,
893
+ "run_id": rs.run_id,
894
+ }
895
+ )
896
+ # NOTE: Saving execution result to destination of the input log object.
897
+ rs_log.save(excluded=None)
898
+
899
+ queue.remove(next_time)
900
+ time.sleep(0.05)
901
+ return Result(
902
+ status=0,
903
+ context={
904
+ "params": params,
905
+ "poking": {"skipped": [], "run": [str(on.cronjob)]},
906
+ },
907
+ )
667
908
 
668
- def poke(self, params: DictData | None = None):
669
- """Poke pipeline threading task for executing with its schedules that
670
- was set on the `on`.
909
+ def poke(
910
+ self,
911
+ params: DictData | None = None,
912
+ *,
913
+ log: Log | None = None,
914
+ ) -> list[Result]:
915
+ """Poke pipeline with threading executor pool for executing with all its
916
+ schedules that was set on the `on` value. This method will observe its
917
+ schedule that nearing to run with the ``self.release()`` method.
918
+
919
+ :param params: A parameters that want to pass to the release method.
920
+ :param log: A log object that want to use on this poking process.
921
+ :rtype: list[Result]
671
922
  """
672
- params: DictData = params or {}
673
- logging.info(
674
- f"[CORE]: Start Poking: {self.name!r} :"
675
- f"{gen_id(self.name, unique=True)}"
923
+ logger.info(
924
+ f"({self.run_id}) [POKING]: Start Poking: {self.name!r} ..."
676
925
  )
677
- results = []
678
- with ThreadPoolExecutor(
679
- max_workers=int(
680
- os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
681
- ),
682
- ) as executor:
683
- futures: list[Future] = [
684
- executor.submit(
685
- self.release,
686
- on,
687
- params=params,
926
+
927
+ # NOTE: If this pipeline does not set the on schedule, it will return
928
+ # empty result.
929
+ if len(self.on) == 0:
930
+ return []
931
+
932
+ params: DictData = params or {}
933
+ queue: list[datetime] = []
934
+ results: list[Result] = []
935
+
936
+ wk: int = int(os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING") or "4")
937
+ with ThreadPoolExecutor(max_workers=wk) as executor:
938
+ # TODO: If I want to run infinite loop.
939
+ futures: list[Future] = []
940
+ for on in self.on:
941
+ futures.append(
942
+ executor.submit(
943
+ self.release,
944
+ on,
945
+ params=params,
946
+ log=log,
947
+ queue=queue,
948
+ )
688
949
  )
689
- for on in self.on
690
- ]
950
+ delay()
951
+
952
+ # WARNING: This poking method does not allow to use fail-fast logic
953
+ # to catching parallel execution result.
691
954
  for future in as_completed(futures):
692
- rs = future.result()
693
- logging.info(rs)
694
- results.append(rs)
955
+ results.append(future.result(timeout=60))
956
+
957
+ if len(queue) > 0:
958
+ logger.error(
959
+ f"({self.run_id}) [POKING]: Log Queue does empty when poking "
960
+ f"process was finishing."
961
+ )
962
+
695
963
  return results
696
964
 
697
- def job_execute(
965
+ def execute_job(
698
966
  self,
699
967
  job: str,
700
968
  params: DictData,
701
969
  ) -> Result:
702
970
  """Job Executor that use on pipeline executor.
971
+
703
972
  :param job: A job ID that want to execute.
704
973
  :param params: A params that was parameterized from pipeline execution.
974
+ :rtype: Result
705
975
  """
706
976
  # VALIDATE: check a job ID that exists in this pipeline or not.
707
977
  if job not in self.jobs:
708
978
  raise PipelineException(
709
979
  f"The job ID: {job} does not exists on {self.name!r} pipeline."
710
980
  )
711
-
712
981
  try:
713
- logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
714
- job_obj: Job = self.jobs[job]
982
+ logger.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
983
+
984
+ # IMPORTANT:
985
+ # Change any job running IDs to this pipeline running ID.
986
+ job_obj: Job = self.jobs[job].get_running_id(self.run_id)
715
987
  j_rs: Result = job_obj.execute(params=params)
988
+
716
989
  except JobException as err:
717
- raise PipelineException(
718
- f"The job ID: {job} get raise error: {err.__class__.__name__}:"
719
- f"\n{err}"
720
- ) from None
990
+ raise PipelineException(f"{job}: JobException: {err}") from None
991
+
721
992
  return Result(
722
993
  status=j_rs.status,
723
994
  context={job: job_obj.set_outputs(j_rs.context)},
@@ -738,9 +1009,8 @@ class Pipeline(BaseModel):
738
1009
  for limit time of execution and waiting job dependency.
739
1010
  :rtype: Result
740
1011
 
741
- ---
742
-
743
1012
  See Also:
1013
+ ---
744
1014
 
745
1015
  The result of execution process for each jobs and stages on this
746
1016
  pipeline will keeping in dict which able to catch out with all jobs and
@@ -752,15 +1022,16 @@ class Pipeline(BaseModel):
752
1022
  ... ${job-name}.stages.${stage-id}.outputs.${key}
753
1023
 
754
1024
  """
755
- logging.info(
756
- f"[CORE]: Start Execute: {self.name}:"
757
- f"{gen_id(self.name, unique=True)}"
758
- )
1025
+ logger.info(f"({self.run_id}) [CORE]: Start Execute: {self.name!r} ...")
759
1026
  params: DictData = params or {}
1027
+ ts: float = time.monotonic()
760
1028
 
761
1029
  # NOTE: It should not do anything if it does not have job.
762
1030
  if not self.jobs:
763
- logging.warning("[PIPELINE]: This pipeline does not have any jobs")
1031
+ logger.warning(
1032
+ f"({self.run_id}) [PIPELINE]: This pipeline: {self.name!r} "
1033
+ f"does not have any jobs"
1034
+ )
764
1035
  return Result(status=0, context=params)
765
1036
 
766
1037
  # NOTE: Create a job queue that keep the job that want to running after
@@ -769,125 +1040,147 @@ class Pipeline(BaseModel):
769
1040
  for job_id in self.jobs:
770
1041
  jq.put(job_id)
771
1042
 
772
- # NOTE: Create start timestamp
773
- ts: float = time.monotonic()
774
-
775
1043
  # NOTE: Create result context that will pass this context to any
776
1044
  # execution dependency.
777
- rs: Result = Result(context=self.parameterize(params))
1045
+ context: DictData = self.parameterize(params)
778
1046
  try:
779
- rs.receive(
780
- self.__exec_non_threading(rs, jq, ts, timeout=timeout)
781
- if (
782
- worker := int(
783
- os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1")
784
- )
785
- )
786
- == 1
1047
+ worker: int = int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2"))
1048
+ (
1049
+ self.__exec_non_threading(context, ts, jq, timeout=timeout)
1050
+ if worker == 1
787
1051
  else self.__exec_threading(
788
- rs, jq, ts, worker=worker, timeout=timeout
1052
+ context, ts, jq, worker=worker, timeout=timeout
789
1053
  )
790
1054
  )
791
- return rs
1055
+ return Result(status=0, context=context)
792
1056
  except PipelineException as err:
793
- rs.context.update({"error": {"message": str(err)}})
794
- rs.status = 1
795
- return rs
1057
+ context.update(
1058
+ {"error_message": f"{err.__class__.__name__}: {err}"}
1059
+ )
1060
+ return Result(status=1, context=context)
796
1061
 
797
1062
  def __exec_threading(
798
1063
  self,
799
- rs: Result,
800
- job_queue: Queue,
1064
+ context: DictData,
801
1065
  ts: float,
1066
+ job_queue: Queue,
802
1067
  *,
803
- worker: int = 1,
1068
+ worker: int = 2,
804
1069
  timeout: int = 600,
805
- ) -> Result:
806
- """Pipeline threading execution."""
1070
+ ) -> DictData:
1071
+ """Pipeline threading execution.
1072
+
1073
+ :param context: A context pipeline data that want to downstream passing.
1074
+ :param ts: A start timestamp that use for checking execute time should
1075
+ timeout.
1076
+ :param timeout: A second value unit that bounding running time.
1077
+ :param worker: A number of threading executor pool size.
1078
+ :rtype: DictData
1079
+ """
807
1080
  not_time_out_flag: bool = True
1081
+ logger.debug(
1082
+ f"({self.run_id}): [CORE]: Run {self.name} with threading job "
1083
+ f"executor"
1084
+ )
808
1085
 
809
1086
  # IMPORTANT: The job execution can run parallel and waiting by
810
1087
  # needed.
811
1088
  with ThreadPoolExecutor(max_workers=worker) as executor:
812
1089
  futures: list[Future] = []
1090
+
813
1091
  while not job_queue.empty() and (
814
1092
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
815
1093
  ):
816
1094
  job_id: str = job_queue.get()
817
1095
  job: Job = self.jobs[job_id]
818
1096
 
819
- # IMPORTANT:
820
- # Change any job running IDs to this pipeline running ID.
821
- job.run_id = self.run_id
822
-
823
- if any(need not in rs.context["jobs"] for need in job.needs):
1097
+ if any(need not in context["jobs"] for need in job.needs):
824
1098
  job_queue.put(job_id)
825
- time.sleep(0.5)
1099
+ time.sleep(0.25)
826
1100
  continue
827
1101
 
828
1102
  futures.append(
829
1103
  executor.submit(
830
- self.job_execute,
1104
+ self.execute_job,
831
1105
  job_id,
832
- params=copy.deepcopy(rs.context),
1106
+ params=copy.deepcopy(context),
833
1107
  ),
834
1108
  )
1109
+ job_queue.task_done()
1110
+
1111
+ # NOTE: Wait for all items to finish processing
1112
+ job_queue.join()
835
1113
 
836
1114
  for future in as_completed(futures):
837
1115
  if err := future.exception():
838
- logging.error(f"{err}")
1116
+ logger.error(f"{err}")
839
1117
  raise PipelineException(f"{err}")
840
1118
 
841
1119
  # NOTE: Update job result to pipeline result.
842
- rs.receive_jobs(future.result(timeout=20))
1120
+ context["jobs"].update(future.result(timeout=20).conext)
843
1121
 
844
- if not not_time_out_flag:
845
- logging.warning(
846
- f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
847
- )
848
- raise PipelineException(
849
- f"Execution of pipeline: {self.name} was timeout"
850
- )
851
- rs.status = 0
852
- return rs
1122
+ if not_time_out_flag:
1123
+ return context
1124
+
1125
+ # NOTE: Raise timeout error.
1126
+ logger.warning(
1127
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline, {self.name!r} "
1128
+ f", was timeout"
1129
+ )
1130
+ raise PipelineException(
1131
+ f"Execution of pipeline: {self.name} was timeout"
1132
+ )
853
1133
 
854
1134
  def __exec_non_threading(
855
1135
  self,
856
- rs: Result,
857
- job_queue: Queue,
1136
+ context: DictData,
858
1137
  ts: float,
1138
+ job_queue: Queue,
859
1139
  *,
860
1140
  timeout: int = 600,
861
- ) -> Result:
862
- """Pipeline non-threading execution."""
1141
+ ) -> DictData:
1142
+ """Pipeline non-threading execution that use sequential job running
1143
+ and waiting previous run successful.
1144
+
1145
+ :param context: A context pipeline data that want to downstream passing.
1146
+ :param ts: A start timestamp that use for checking execute time should
1147
+ timeout.
1148
+ :param timeout: A second value unit that bounding running time.
1149
+ :rtype: DictData
1150
+ """
863
1151
  not_time_out_flag: bool = True
864
- logging.info(f"[CORE]: Run {self.name} with non-threading job executor")
1152
+ logger.debug(
1153
+ f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
1154
+ f"executor"
1155
+ )
1156
+
865
1157
  while not job_queue.empty() and (
866
1158
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
867
1159
  ):
868
1160
  job_id: str = job_queue.get()
869
1161
  job: Job = self.jobs[job_id]
870
1162
 
871
- # IMPORTANT:
872
- # Change any job running IDs to this pipeline running ID.
873
- job.run_id = self.run_id
874
-
875
1163
  # NOTE:
876
- if any(need not in rs.context["jobs"] for need in job.needs):
1164
+ if any(need not in context["jobs"] for need in job.needs):
877
1165
  job_queue.put(job_id)
878
- time.sleep(0.5)
1166
+ time.sleep(0.25)
879
1167
  continue
880
1168
 
881
1169
  # NOTE: Start job execution.
882
- job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
883
- rs.context["jobs"].update(job_rs.context)
1170
+ job_rs = self.execute_job(job_id, params=copy.deepcopy(context))
1171
+ context["jobs"].update(job_rs.context)
1172
+ job_queue.task_done()
884
1173
 
885
- if not not_time_out_flag:
886
- logging.warning(
887
- f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
888
- )
889
- raise PipelineException(
890
- f"Execution of pipeline: {self.name} was timeout"
891
- )
892
- rs.status = 0
893
- return rs
1174
+ # NOTE: Wait for all items to finish processing
1175
+ job_queue.join()
1176
+
1177
+ if not_time_out_flag:
1178
+ return context
1179
+
1180
+ # NOTE: Raise timeout error.
1181
+ logger.warning(
1182
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1183
+ )
1184
+ raise PipelineException(
1185
+ f"Execution of pipeline: {self.name} was timeout"
1186
+ )