ddeutil-workflow 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,35 +12,59 @@ import time
12
12
  from concurrent.futures import (
13
13
  FIRST_EXCEPTION,
14
14
  Future,
15
- ProcessPoolExecutor,
16
15
  ThreadPoolExecutor,
17
16
  as_completed,
18
17
  wait,
19
18
  )
20
- from datetime import datetime
21
- from multiprocessing import Event, Manager
19
+ from datetime import datetime, timedelta
20
+ from heapq import heappush
22
21
  from pickle import PickleError
23
22
  from queue import Queue
23
+ from textwrap import dedent
24
+ from threading import Event
24
25
  from typing import Optional
25
26
  from zoneinfo import ZoneInfo
26
27
 
27
28
  from pydantic import BaseModel, Field
28
- from pydantic.functional_validators import model_validator
29
+ from pydantic.functional_validators import field_validator, model_validator
29
30
  from typing_extensions import Self
30
31
 
31
- from .__types import DictData, DictStr, Matrix, MatrixExclude, MatrixInclude
32
- from .exceptions import JobException, PipelineException, StageException
32
+ from .__types import (
33
+ DictData,
34
+ DictStr,
35
+ Matrix,
36
+ MatrixExclude,
37
+ MatrixInclude,
38
+ TupleStr,
39
+ )
40
+ from .cron import CronRunner
41
+ from .exceptions import (
42
+ JobException,
43
+ PipelineException,
44
+ StageException,
45
+ UtilException,
46
+ )
33
47
  from .loader import Loader
48
+ from .log import FileLog, Log
34
49
  from .on import On
35
- from .scheduler import CronRunner
36
50
  from .stage import Stage
37
51
  from .utils import (
38
52
  Param,
39
53
  Result,
40
54
  cross_product,
41
55
  dash2underscore,
56
+ delay,
57
+ filter_func,
42
58
  gen_id,
43
59
  get_diff_sec,
60
+ has_template,
61
+ param2template,
62
+ )
63
+
64
+ __all__: TupleStr = (
65
+ "Strategy",
66
+ "Job",
67
+ "Pipeline",
44
68
  )
45
69
 
46
70
 
@@ -54,7 +78,7 @@ class Strategy(BaseModel):
54
78
  ... 'fail-fast': False,
55
79
  ... 'matrix': {
56
80
  ... 'first': [1, 2, 3],
57
- ... 'second': ['foo', 'bar']
81
+ ... 'second': ['foo', 'bar'],
58
82
  ... },
59
83
  ... 'include': [{'first': 4, 'second': 'foo'}],
60
84
  ... 'exclude': [{'first': 1, 'second': 'bar'}],
@@ -82,6 +106,10 @@ class Strategy(BaseModel):
82
106
  dash2underscore("fail-fast", values)
83
107
  return values
84
108
 
109
+ def is_set(self) -> bool:
110
+ """Return True if this strategy was set from yaml template."""
111
+ return len(self.matrix) > 0
112
+
85
113
  def make(self) -> list[DictStr]:
86
114
  """Return List of product of matrix values that already filter with
87
115
  exclude and add include.
@@ -138,20 +166,33 @@ class Job(BaseModel):
138
166
  Data Validate:
139
167
  >>> job = {
140
168
  ... "runs-on": None,
141
- ... "strategy": {},
169
+ ... "strategy": {
170
+ ... "max-parallel": 1,
171
+ ... "matrix": {
172
+ ... "first": [1, 2, 3],
173
+ ... "second": ['foo', 'bar'],
174
+ ... },
175
+ ... },
142
176
  ... "needs": [],
143
177
  ... "stages": [
144
178
  ... {
145
179
  ... "name": "Some stage",
146
180
  ... "run": "print('Hello World')",
147
181
  ... },
182
+ ... ...
148
183
  ... ],
149
184
  ... }
150
185
  """
151
186
 
152
- name: Optional[str] = Field(default=None)
153
- desc: Optional[str] = Field(default=None)
154
- runs_on: Optional[str] = Field(default=None)
187
+ id: Optional[str] = Field(default=None, description="A job ID.")
188
+ desc: Optional[str] = Field(
189
+ default=None,
190
+ description="A job description that can be string of markdown content.",
191
+ )
192
+ runs_on: Optional[str] = Field(
193
+ default=None,
194
+ description="A target executor node for this job use to execution.",
195
+ )
155
196
  stages: list[Stage] = Field(
156
197
  default_factory=list,
157
198
  description="A list of Stage of this job.",
@@ -164,6 +205,11 @@ class Job(BaseModel):
164
205
  default_factory=Strategy,
165
206
  description="A strategy matrix that want to generate.",
166
207
  )
208
+ run_id: Optional[str] = Field(
209
+ default=None,
210
+ description="A running job ID.",
211
+ repr=False,
212
+ )
167
213
 
168
214
  @model_validator(mode="before")
169
215
  def __prepare_keys(cls, values: DictData) -> DictData:
@@ -173,6 +219,31 @@ class Job(BaseModel):
173
219
  dash2underscore("runs-on", values)
174
220
  return values
175
221
 
222
+ @field_validator("desc", mode="after")
223
+ def ___prepare_desc(cls, value: str) -> str:
224
+ """Prepare description string that was created on a template."""
225
+ return dedent(value)
226
+
227
+ @model_validator(mode="after")
228
+ def __prepare_running_id(self):
229
+ if self.run_id is None:
230
+ self.run_id = gen_id(self.id or "", unique=True)
231
+
232
+ # VALIDATE: Validate job id should not dynamic with params template.
233
+ if has_template(self.id):
234
+ raise ValueError("Job ID should not has any template.")
235
+
236
+ return self
237
+
238
+ def get_running_id(self, run_id: str) -> Self:
239
+ """Return Job model object that changing job running ID with an
240
+ input running ID.
241
+
242
+ :param run_id: A replace job running ID.
243
+ :rtype: Self
244
+ """
245
+ return self.model_copy(update={"run_id": run_id})
246
+
176
247
  def stage(self, stage_id: str) -> Stage:
177
248
  """Return stage model that match with an input stage ID."""
178
249
  for stage in self.stages:
@@ -180,11 +251,9 @@ class Job(BaseModel):
180
251
  return stage
181
252
  raise ValueError(f"Stage ID {stage_id} does not exists")
182
253
 
183
- @staticmethod
184
- def set_outputs(output: DictData) -> DictData:
185
- if len(output) > 1:
254
+ def set_outputs(self, output: DictData) -> DictData:
255
+ if len(output) > 1 and self.strategy.is_set():
186
256
  return {"strategies": output}
187
-
188
257
  return output[next(iter(output))]
189
258
 
190
259
  def strategy_execute(
@@ -194,26 +263,32 @@ class Job(BaseModel):
194
263
  *,
195
264
  event: Event | None = None,
196
265
  ) -> Result:
197
- """Strategy execution with passing dynamic parameters from the pipeline
198
- stage execution.
266
+ """Job Strategy execution with passing dynamic parameters from the
267
+ pipeline execution to strategy matrix.
199
268
 
200
- :param strategy:
201
- :param params:
269
+ This execution is the minimum level execution of job model.
270
+
271
+ :param strategy: A metrix strategy value.
272
+ :param params: A dynamic parameters.
202
273
  :param event: An manger event that pass to the PoolThreadExecutor.
203
274
  :rtype: Result
275
+
276
+ :raise JobException: If it has any error from StageException or
277
+ UtilException.
204
278
  """
205
- _stop_rs: Result = Result(
206
- status=1,
207
- context={
208
- gen_id(strategy): {
209
- "matrix": strategy,
210
- "stages": {},
211
- "error": "Event stopped",
212
- },
213
- },
214
- )
215
279
  if event and event.is_set():
216
- return _stop_rs
280
+ return Result(
281
+ status=1,
282
+ context={
283
+ gen_id(strategy): {
284
+ "matrix": strategy,
285
+ "stages": {},
286
+ "error": {
287
+ "message": "Process Event stopped before execution"
288
+ },
289
+ },
290
+ },
291
+ )
217
292
 
218
293
  # NOTE: Create strategy execution context and update a matrix and copied
219
294
  # of params. So, the context value will have structure like;
@@ -229,16 +304,25 @@ class Job(BaseModel):
229
304
 
230
305
  # IMPORTANT: The stage execution only run sequentially one-by-one.
231
306
  for stage in self.stages:
307
+
308
+ # IMPORTANT: Change any stage running IDs to this job running ID.
309
+ stage: Stage = stage.get_running_id(self.run_id)
310
+
232
311
  _st_name: str = stage.id or stage.name
233
312
 
234
- if stage.is_skip(params=context):
235
- logging.info(f"[JOB]: Skip the stage: {_st_name!r}")
313
+ if stage.is_skipped(params=context):
314
+ logging.info(
315
+ f"({self.run_id}) [JOB]: Skip the stage: {_st_name!r}"
316
+ )
236
317
  continue
237
- logging.info(f"[JOB]: Start execute the stage: {_st_name!r}")
318
+
319
+ logging.info(
320
+ f"({self.run_id}) [JOB]: Start execute the stage: {_st_name!r}"
321
+ )
238
322
 
239
323
  # NOTE: Logging a matrix that pass on this stage execution.
240
324
  if strategy:
241
- logging.info(f"[...]: Matrix: {strategy}")
325
+ logging.info(f"({self.run_id}) [JOB]: Matrix: {strategy}")
242
326
 
243
327
  # NOTE:
244
328
  # I do not use below syntax because `params` dict be the
@@ -258,23 +342,50 @@ class Job(BaseModel):
258
342
  # }
259
343
  #
260
344
  if event and event.is_set():
261
- return _stop_rs
262
- rs: Result = stage.execute(params=context)
263
- if rs.status == 0:
264
- stage.set_outputs(rs.context, params=context)
265
- else:
266
- raise JobException(
267
- f"Getting status does not equal zero on stage: "
268
- f"{stage.name}."
345
+ return Result(
346
+ status=1,
347
+ context={
348
+ gen_id(strategy): {
349
+ "matrix": strategy,
350
+ # NOTE: If job strategy executor use multithreading,
351
+ # it will not filter function object from context.
352
+ # ---
353
+ # "stages": filter_func(context.pop("stages", {})),
354
+ "stages": context.pop("stages", {}),
355
+ "error": {
356
+ "message": (
357
+ "Process Event stopped before execution"
358
+ ),
359
+ },
360
+ },
361
+ },
269
362
  )
270
- # TODO: Filter and warning if it pass any objects to context between
271
- # strategy job executor like function, etc.
363
+ try:
364
+ rs: Result = stage.execute(params=context)
365
+ stage.set_outputs(rs.context, to=context)
366
+ except (StageException, UtilException) as err:
367
+ logging.error(
368
+ f"({self.run_id}) [JOB]: {err.__class__.__name__}: {err}"
369
+ )
370
+ raise JobException(
371
+ f"Get stage execution error: {err.__class__.__name__}: "
372
+ f"{err}"
373
+ ) from None
374
+
375
+ # NOTE: Remove new stage object that was created from
376
+ # ``get_running_id`` method.
377
+ del stage
378
+
272
379
  return Result(
273
380
  status=0,
274
381
  context={
275
382
  gen_id(strategy): {
276
383
  "matrix": strategy,
277
- "stages": context.pop("stages", {}),
384
+ # NOTE: (WF001) filter own created function from stages
385
+ # value, because it does not dump with pickle when you
386
+ # execute with multiprocess.
387
+ #
388
+ "stages": filter_func(context.pop("stages", {})),
278
389
  },
279
390
  },
280
391
  )
@@ -288,108 +399,158 @@ class Job(BaseModel):
288
399
  :rtype: Result
289
400
  """
290
401
  strategy_context: DictData = {}
291
- rs = Result(context=strategy_context)
292
402
 
293
- if self.strategy.max_parallel == 1:
403
+ # NOTE: Normal Job execution.
404
+ if (not self.strategy.is_set()) or self.strategy.max_parallel == 1:
294
405
  for strategy in self.strategy.make():
295
406
  rs: Result = self.strategy_execute(
296
407
  strategy, params=copy.deepcopy(params)
297
408
  )
298
409
  strategy_context.update(rs.context)
299
- return rs
410
+ return Result(
411
+ status=0,
412
+ context=strategy_context,
413
+ )
300
414
 
301
- # FIXME: (WF001) I got error that raise when use
302
- # ``ProcessPoolExecutor``;
303
- # ---
304
- # _pickle.PicklingError: Can't pickle
305
- # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
306
- # on ddeutil.workflow.stage failed
415
+ # # WARNING: (WF001) I got error that raise when use
416
+ # # ``ProcessPoolExecutor``;
417
+ # # ---
418
+ # # _pickle.PicklingError: Can't pickle
419
+ # # <function ??? at 0x000001F0BE80F160>: attribute lookup ???
420
+ # # on ddeutil.workflow.stage failed
421
+ # #
422
+ # # from multiprocessing import Event, Manager
423
+ # with Manager() as manager:
424
+ # event: Event = manager.Event()
307
425
  #
308
- with Manager() as manager:
309
- event: Event = manager.Event()
310
-
311
- with ProcessPoolExecutor(
312
- max_workers=self.strategy.max_parallel
313
- ) as pool:
314
- pool_result: list[Future] = [
315
- pool.submit(
316
- self.strategy_execute,
317
- st,
318
- params=copy.deepcopy(params),
319
- event=event,
320
- )
321
- for st in self.strategy.make()
322
- ]
323
- if self.strategy.fail_fast:
324
-
325
- # NOTE: Get results from a collection of tasks with a
326
- # timeout that has the first exception.
327
- done, not_done = wait(
328
- pool_result, timeout=60, return_when=FIRST_EXCEPTION
329
- )
330
- nd: str = (
331
- f", the strategies do not run is {not_done}"
332
- if not_done
333
- else ""
334
- )
335
- logging.warning(f"[JOB]: Strategy is set Fail Fast{nd}")
336
-
337
- # NOTE: Stop all running tasks
338
- event.set()
339
-
340
- # NOTE: Cancel any scheduled tasks
341
- for future in pool_result:
342
- future.cancel()
343
-
344
- rs.status = 0
345
- for f in done:
346
- if f.exception():
347
- rs.status = 1
348
- logging.error(
349
- f"One task failed with: {f.exception()}, "
350
- f"shutting down"
351
- )
352
- elif f.cancelled():
353
- continue
354
- else:
355
- rs: Result = f.result(timeout=60)
356
- strategy_context.update(rs.context)
357
- rs.context = strategy_context
358
- return rs
359
-
360
- for pool_rs in as_completed(pool_result):
361
- try:
362
- rs: Result = pool_rs.result(timeout=60)
363
- strategy_context.update(rs.context)
364
- except PickleError as err:
365
- # NOTE: I do not want to fix this issue because it does
366
- # not make sense and over-engineering with this bug
367
- # fix process.
368
- raise JobException(
369
- f"PyStage that create object on locals does use "
370
- f"parallel in strategy;\n\t{err}"
371
- ) from None
372
- except TimeoutError:
373
- rs.status = 1
374
- logging.warning("Task is hanging. Attempting to kill.")
375
- pool_rs.cancel()
376
- if not pool_rs.cancelled():
377
- logging.warning("Failed to cancel the task.")
378
- else:
379
- logging.warning("Task canceled successfully.")
380
- except StageException as err:
381
- rs.status = 1
382
- logging.warning(
383
- f"Get stage exception with fail-fast does not set;"
384
- f"\n\t{err}"
385
- )
386
- rs.status = 0
387
- rs.context = strategy_context
388
- return rs
426
+ # # NOTE: Start process pool executor for running strategy executor
427
+ # # in parallel mode.
428
+ # with ProcessPoolExecutor(
429
+ # max_workers=self.strategy.max_parallel
430
+ # ) as executor:
431
+ # futures: list[Future] = [
432
+ # executor.submit(
433
+ # self.strategy_execute,
434
+ # strategy,
435
+ # params=copy.deepcopy(params),
436
+ # event=event,
437
+ # )
438
+ # for strategy in self.strategy.make()
439
+ # ]
440
+ # if self.strategy.fail_fast:
441
+ # rs = self.__catch_fail_fast(event, futures)
442
+ # else:
443
+ # rs = self.__catch_all_completed(futures)
444
+
445
+ # NOTE: Create event for cancel executor stop running.
446
+ event: Event = Event()
447
+
448
+ with ThreadPoolExecutor(
449
+ max_workers=self.strategy.max_parallel
450
+ ) as executor:
451
+ futures: list[Future] = [
452
+ executor.submit(
453
+ self.strategy_execute,
454
+ strategy,
455
+ params=copy.deepcopy(params),
456
+ event=event,
457
+ )
458
+ for strategy in self.strategy.make()
459
+ ]
460
+ if self.strategy.fail_fast:
461
+ rs: Result = self.__catch_fail_fast(event, futures)
462
+ else:
463
+ rs: Result = self.__catch_all_completed(futures)
464
+ return Result(
465
+ status=0,
466
+ context=rs.context,
467
+ )
468
+
469
+ def __catch_fail_fast(self, event: Event, futures: list[Future]) -> Result:
470
+ """Job parallel pool futures catching with fail-fast mode. That will
471
+ stop all not done futures if it receive the first exception from all
472
+ running futures.
473
+
474
+ :param event:
475
+ :param futures: A list of futures.
476
+ :rtype: Result
477
+ """
478
+ strategy_context: DictData = {}
479
+ # NOTE: Get results from a collection of tasks with a
480
+ # timeout that has the first exception.
481
+ done, not_done = wait(
482
+ futures, timeout=1800, return_when=FIRST_EXCEPTION
483
+ )
484
+ nd: str = (
485
+ f", the strategies do not run is {not_done}" if not_done else ""
486
+ )
487
+ logging.debug(f"[JOB]: Strategy is set Fail Fast{nd}")
488
+
489
+ # NOTE: Stop all running tasks
490
+ event.set()
491
+
492
+ # NOTE: Cancel any scheduled tasks
493
+ for future in futures:
494
+ future.cancel()
495
+
496
+ status: int = 0
497
+ for future in done:
498
+ if future.exception():
499
+ status = 1
500
+ logging.error(
501
+ f"({self.run_id}) [JOB]: One stage failed with: "
502
+ f"{future.exception()}, shutting down this future."
503
+ )
504
+ elif future.cancelled():
505
+ continue
506
+ else:
507
+ rs: Result = future.result(timeout=60)
508
+ strategy_context.update(rs.context)
509
+ return Result(
510
+ status=status,
511
+ context=strategy_context,
512
+ )
513
+
514
+ def __catch_all_completed(self, futures: list[Future]) -> Result:
515
+ """Job parallel pool futures catching with all-completed mode.
516
+
517
+ :param futures: A list of futures.
518
+ :rtype: Result
519
+ """
520
+ strategy_context: DictData = {}
521
+ status: int = 0
522
+ for future in as_completed(futures):
523
+ try:
524
+ rs: Result = future.result(timeout=60)
525
+ strategy_context.update(rs.context)
526
+ except PickleError as err:
527
+ # NOTE: (WF001) I do not want to fix this issue because
528
+ # it does not make sense and over-engineering with
529
+ # this bug fix process.
530
+ raise JobException(
531
+ f"PyStage that create object on locals does use "
532
+ f"parallel in strategy execution;\n\t{err}"
533
+ ) from None
534
+ except TimeoutError:
535
+ status = 1
536
+ logging.warning("Task is hanging. Attempting to kill.")
537
+ future.cancel()
538
+ if not future.cancelled():
539
+ logging.warning("Failed to cancel the task.")
540
+ else:
541
+ logging.warning("Task canceled successfully.")
542
+ except JobException as err:
543
+ status = 1
544
+ logging.error(
545
+ f"({self.run_id}) [JOB]: Get stage exception with "
546
+ f"fail-fast does not set;\n{err.__class__.__name__}:\n\t"
547
+ f"{err}"
548
+ )
549
+ return Result(status=status, context=strategy_context)
389
550
 
390
551
 
391
552
  class Pipeline(BaseModel):
392
- """Pipeline Model this is the main feature of this project because it use to
553
+ """Pipeline Model this is the main future of this project because it use to
393
554
  be workflow data for running everywhere that you want. It use lightweight
394
555
  coding line to execute it.
395
556
  """
@@ -398,8 +559,7 @@ class Pipeline(BaseModel):
398
559
  desc: Optional[str] = Field(
399
560
  default=None,
400
561
  description=(
401
- "A pipeline description that is able to be string of markdown "
402
- "content."
562
+ "A pipeline description that can be string of markdown content."
403
563
  ),
404
564
  )
405
565
  params: dict[str, Param] = Field(
@@ -414,6 +574,16 @@ class Pipeline(BaseModel):
414
574
  default_factory=dict,
415
575
  description="A mapping of job ID and job model that already loaded.",
416
576
  )
577
+ run_id: Optional[str] = Field(
578
+ default=None,
579
+ description="A running pipeline ID.",
580
+ repr=False,
581
+ )
582
+
583
+ @property
584
+ def new_run_id(self) -> str:
585
+ """Running ID of this pipeline that always generate new unique value."""
586
+ return gen_id(self.name, unique=True)
417
587
 
418
588
  @classmethod
419
589
  def from_loader(
@@ -421,11 +591,14 @@ class Pipeline(BaseModel):
421
591
  name: str,
422
592
  externals: DictData | None = None,
423
593
  ) -> Self:
424
- """Create Pipeline instance from the Loader object.
594
+ """Create Pipeline instance from the Loader object that only receive
595
+ an input pipeline name. The loader object will use this pipeline name to
596
+ searching configuration data of this pipeline model in conf path.
425
597
 
426
598
  :param name: A pipeline name that want to pass to Loader object.
427
599
  :param externals: An external parameters that want to pass to Loader
428
600
  object.
601
+ :rtype: Self
429
602
  """
430
603
  loader: Loader = Loader(name, externals=(externals or {}))
431
604
  loader_data: DictData = copy.deepcopy(loader.data)
@@ -448,6 +621,8 @@ class Pipeline(BaseModel):
448
621
  on = [on]
449
622
  if any(not isinstance(i, (dict, str)) for i in on):
450
623
  raise TypeError("The ``on`` key should be list of str or dict")
624
+
625
+ # NOTE: Pass on value to Loader and keep on model object to on field
451
626
  data["on"] = [
452
627
  (
453
628
  Loader(n, externals=(externals or {})).data
@@ -473,18 +648,48 @@ class Pipeline(BaseModel):
473
648
  }
474
649
  return values
475
650
 
651
+ @field_validator("desc", mode="after")
652
+ def ___prepare_desc(cls, value: str) -> str:
653
+ """Prepare description string that was created on a template."""
654
+ return dedent(value)
655
+
476
656
  @model_validator(mode="after")
477
- def __validate_jobs_need(self):
657
+ def __validate_jobs_need_and_prepare_running_id(self):
658
+ """Validate each need job in any jobs should exists."""
478
659
  for job in self.jobs:
479
660
  if not_exist := [
480
661
  need for need in self.jobs[job].needs if need not in self.jobs
481
662
  ]:
482
663
  raise PipelineException(
483
664
  f"This needed jobs: {not_exist} do not exist in this "
484
- f"pipeline."
665
+ f"pipeline, {self.name!r}"
485
666
  )
667
+
668
+ # NOTE: update a job id with its job id from pipeline template
669
+ self.jobs[job].id = job
670
+
671
+ if self.run_id is None:
672
+ self.run_id = self.new_run_id
673
+
674
+ # VALIDATE: Validate pipeline name should not dynamic with params
675
+ # template.
676
+ if has_template(self.name):
677
+ raise ValueError(
678
+ f"Pipeline name should not has any template, please check, "
679
+ f"{self.name!r}."
680
+ )
681
+
486
682
  return self
487
683
 
684
+ def get_running_id(self, run_id: str) -> Self:
685
+ """Return Pipeline model object that changing pipeline running ID with
686
+ an input running ID.
687
+
688
+ :param run_id: A replace pipeline running ID.
689
+ :rtype: Self
690
+ """
691
+ return self.model_copy(update={"run_id": run_id})
692
+
488
693
  def job(self, name: str) -> Job:
489
694
  """Return Job model that exists on this pipeline.
490
695
 
@@ -495,7 +700,10 @@ class Pipeline(BaseModel):
495
700
  :returns: A job model that exists on this pipeline by input name.
496
701
  """
497
702
  if name not in self.jobs:
498
- raise ValueError(f"Job {name!r} does not exists")
703
+ raise ValueError(
704
+ f"A Job {name!r} does not exists in this pipeline, "
705
+ f"{self.name!r}"
706
+ )
499
707
  return self.jobs[name]
500
708
 
501
709
  def parameterize(self, params: DictData) -> DictData:
@@ -533,52 +741,146 @@ class Pipeline(BaseModel):
533
741
  def release(
534
742
  self,
535
743
  on: On,
536
- params: DictData | None = None,
744
+ params: DictData,
537
745
  *,
538
- waiting_sec: int = 600,
539
- sleep_interval: int = 10,
540
- ) -> str:
746
+ waiting_sec: int = 55,
747
+ sleep_interval: int = 15,
748
+ log: Log = None,
749
+ lq: list[datetime] = None,
750
+ ) -> Result:
541
751
  """Start running pipeline with the on schedule in period of 30 minutes.
542
752
  That mean it will still running at background 30 minutes until the
543
753
  schedule matching with its time.
754
+
755
+ This method allow pipeline use log object to save the execution
756
+ result to log destination like file log to local /logs directory.
757
+
758
+ :rtype: Result
544
759
  """
545
- params: DictData = params or {}
546
- logging.info(f"[CORE] Start release: {self.name!r} : {on.cronjob}")
760
+ delay()
761
+ log: Log = log or FileLog
762
+ current_running_time = datetime.now()
763
+ if not (
764
+ latest_running_time := log.latest_point(name=self.name, queue=lq)
765
+ ) or (
766
+ latest_running_time.replace(tzinfo=ZoneInfo(on.tz))
767
+ < current_running_time.replace(tzinfo=ZoneInfo(on.tz))
768
+ ):
769
+ latest_running_time: datetime = current_running_time.replace(
770
+ tzinfo=ZoneInfo(on.tz)
771
+ )
772
+ else:
773
+ latest_running_time: datetime = latest_running_time.replace(
774
+ tzinfo=ZoneInfo(on.tz)
775
+ )
547
776
 
548
- gen: CronRunner = on.generate(datetime.now())
777
+ gen: CronRunner = on.generate(
778
+ latest_running_time + timedelta(seconds=1)
779
+ )
549
780
  tz: ZoneInfo = gen.tz
781
+
782
+ # NOTE: get next schedule time that generate from now.
550
783
  next_running_time: datetime = gen.next
551
784
 
552
- if get_diff_sec(next_running_time, tz=tz) < waiting_sec:
785
+ # NOTE: get next utils it does not logging.
786
+ # while log.is_pointed(self.name, next_running_time, queue=lq):
787
+ # next_running_time: datetime = gen.next
788
+ while log.is_pointed(self.name, next_running_time, queue=lq):
789
+ next_running_time: datetime = gen.next
790
+
791
+ heappush(lq, next_running_time)
792
+
793
+ # VALIDATE: Check the different time between the next schedule time and
794
+ # now that less than waiting period (second unit).
795
+ if get_diff_sec(next_running_time, tz=tz) <= waiting_sec:
553
796
  logging.debug(
554
- f"[CORE]: {self.name} closely to run >> "
555
- f"{next_running_time:%Y-%m-%d %H:%M:%S}"
797
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
798
+ f"Closely to run >> {next_running_time:%Y-%m-%d %H:%M:%S}"
556
799
  )
557
800
 
558
801
  # NOTE: Release when the time is nearly to schedule time.
559
- while (duration := get_diff_sec(next_running_time, tz=tz)) > 15:
560
- time.sleep(sleep_interval)
802
+ while (duration := get_diff_sec(next_running_time, tz=tz)) > (
803
+ sleep_interval + 5
804
+ ):
561
805
  logging.debug(
562
- f"[CORE]: {self.name!r} : Sleep until: {duration}"
806
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
807
+ f"Sleep until: {duration}"
563
808
  )
809
+ time.sleep(sleep_interval)
564
810
 
565
- time.sleep(1)
566
- rs: Result = self.execute(params=params)
567
- logging.debug(f"{rs.context}")
811
+ time.sleep(0.5)
568
812
 
569
- return f"[CORE]: Start Execute: {self.name}"
570
- return f"[CORE]: {self.name} does not closely to run yet."
813
+ # NOTE: Release parameter that use to change if params has
814
+ # templating.
815
+ release_params: DictData = {
816
+ "release": {
817
+ "logical_date": next_running_time,
818
+ },
819
+ }
571
820
 
572
- def poke(self, params: DictData | None = None):
573
- """Poke pipeline threading task for executing with its schedules that
574
- was set on the `on`.
821
+ # WARNING: Re-create pipeline object that use new running pipeline
822
+ # ID.
823
+ pipeline: Self = self.get_running_id(run_id=self.new_run_id)
824
+ rs: Result = pipeline.execute(
825
+ params=param2template(params, release_params),
826
+ )
827
+ logging.debug(
828
+ f"({pipeline.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
829
+ f"End release"
830
+ )
831
+
832
+ del pipeline
833
+
834
+ rs.set_parent_run_id(self.run_id)
835
+ rs_log: Log = log.model_validate(
836
+ {
837
+ "name": self.name,
838
+ "on": str(on.cronjob),
839
+ "release": next_running_time,
840
+ "context": rs.context,
841
+ "parent_run_id": rs.run_id,
842
+ "run_id": rs.run_id,
843
+ }
844
+ )
845
+ rs_log.save()
846
+ else:
847
+ logging.debug(
848
+ f"({self.run_id}) [CORE]: {self.name!r} : {on.cronjob} : "
849
+ f"Does not closely >> {next_running_time:%Y-%m-%d %H:%M:%S}"
850
+ )
851
+ rs = Result(status=0, context={"params": params})
852
+
853
+ if lq is None:
854
+ return rs
855
+
856
+ lq.remove(next_running_time)
857
+ time.sleep(0.25)
858
+ return rs
859
+
860
+ def poke(
861
+ self,
862
+ params: DictData | None = None,
863
+ *,
864
+ log: Log | None = None,
865
+ ) -> list[Result]:
866
+ """Poke pipeline with threading executor pool for executing with all its
867
+ schedules that was set on the `on` value. This method will observe its
868
+ schedule that nearing to run with the ``self.release()`` method.
869
+
870
+ :param params: A parameters that want to pass to the release method.
871
+ :param log: A log object that want to use on this poking process.
872
+ :rtype: list[Result]
575
873
  """
576
874
  params: DictData = params or {}
577
- logging.info(
578
- f"[CORE]: Start Poking: {self.name!r} :"
579
- f"{gen_id(self.name, unique=True)}"
580
- )
581
- results = []
875
+ logging.info(f"({self.run_id}) [CORE]: Start Poking: {self.name!r} ...")
876
+ results: list[Result] = []
877
+ log_queue: list[datetime] = []
878
+
879
+ # NOTE: If this pipeline does not set schedule, it will return empty
880
+ # result.
881
+ if len(self.on) == 0:
882
+ return results
883
+
582
884
  with ThreadPoolExecutor(
583
885
  max_workers=int(
584
886
  os.getenv("WORKFLOW_CORE_MAX_PIPELINE_POKING", "4")
@@ -589,21 +891,31 @@ class Pipeline(BaseModel):
589
891
  self.release,
590
892
  on,
591
893
  params=params,
894
+ log=log,
895
+ lq=log_queue,
592
896
  )
593
897
  for on in self.on
594
898
  ]
595
899
  for future in as_completed(futures):
596
- rs = future.result()
597
- logging.info(rs)
900
+ rs: Result = future.result()
901
+ logging.info(rs.context.get("params", {}))
598
902
  results.append(rs)
903
+
904
+ if len(log_queue) > 0:
905
+ logging.error(
906
+ f"({self.run_id}) [CORE]: Log Queue does empty when poke "
907
+ f"is finishing."
908
+ )
909
+
599
910
  return results
600
911
 
601
912
  def job_execute(
602
913
  self,
603
914
  job: str,
604
915
  params: DictData,
605
- ):
916
+ ) -> Result:
606
917
  """Job Executor that use on pipeline executor.
918
+
607
919
  :param job: A job ID that want to execute.
608
920
  :param params: A params that was parameterized from pipeline execution.
609
921
  """
@@ -612,19 +924,23 @@ class Pipeline(BaseModel):
612
924
  raise PipelineException(
613
925
  f"The job ID: {job} does not exists on {self.name!r} pipeline."
614
926
  )
927
+ try:
928
+ logging.info(f"({self.run_id}) [PIPELINE]: Start execute: {job!r}")
615
929
 
616
- job_obj: Job = self.jobs[job]
617
-
618
- rs: Result = job_obj.execute(params=params)
619
- if rs.status != 0:
620
- logging.warning(
621
- f"Getting status does not equal zero on job: {job}."
622
- )
623
- return Result(
624
- status=1, context={job: job_obj.set_outputs(rs.context)}
625
- )
930
+ # IMPORTANT:
931
+ # Change any job running IDs to this pipeline running ID.
932
+ job_obj: Job = self.jobs[job].get_running_id(self.run_id)
933
+ j_rs: Result = job_obj.execute(params=params)
626
934
 
627
- return Result(status=0, context={job: job_obj.set_outputs(rs.context)})
935
+ except JobException as err:
936
+ raise PipelineException(
937
+ f"The job ID: {job} get error: {err.__class__.__name__}:"
938
+ f"\n{err}"
939
+ ) from None
940
+ return Result(
941
+ status=j_rs.status,
942
+ context={job: job_obj.set_outputs(j_rs.context)},
943
+ )
628
944
 
629
945
  def execute(
630
946
  self,
@@ -641,9 +957,8 @@ class Pipeline(BaseModel):
641
957
  for limit time of execution and waiting job dependency.
642
958
  :rtype: Result
643
959
 
644
- ---
645
-
646
960
  See Also:
961
+ ---
647
962
 
648
963
  The result of execution process for each jobs and stages on this
649
964
  pipeline will keeping in dict which able to catch out with all jobs and
@@ -655,10 +970,7 @@ class Pipeline(BaseModel):
655
970
  ... ${job-name}.stages.${stage-id}.outputs.${key}
656
971
 
657
972
  """
658
- logging.info(
659
- f"[CORE]: Start Execute: {self.name}:"
660
- f"{gen_id(self.name, unique=True)}"
661
- )
973
+ logging.info(f"({self.run_id}) [CORE]: Start Execute: {self.name} ...")
662
974
  params: DictData = params or {}
663
975
 
664
976
  # NOTE: It should not do anything if it does not have job.
@@ -666,68 +978,165 @@ class Pipeline(BaseModel):
666
978
  logging.warning("[PIPELINE]: This pipeline does not have any jobs")
667
979
  return Result(status=0, context=params)
668
980
 
669
- # NOTE: create a job queue that keep the job that want to running after
981
+ # NOTE: Create a job queue that keep the job that want to running after
670
982
  # it dependency condition.
671
983
  jq: Queue = Queue()
672
984
  for job_id in self.jobs:
673
985
  jq.put(job_id)
674
986
 
987
+ # NOTE: Create start timestamp
675
988
  ts: float = time.monotonic()
676
- not_time_out_flag: bool = True
677
989
 
678
990
  # NOTE: Create result context that will pass this context to any
679
991
  # execution dependency.
680
992
  rs: Result = Result(context=self.parameterize(params))
681
- if (
682
- worker := int(os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "1"))
683
- ) > 1:
684
- # IMPORTANT: The job execution can run parallel and waiting by
685
- # needed.
686
- with ThreadPoolExecutor(max_workers=worker) as executor:
687
- futures: list[Future] = []
688
- while not jq.empty() and (
689
- not_time_out_flag := ((time.monotonic() - ts) < timeout)
690
- ):
691
- job_id: str = jq.get()
692
- logging.info(
693
- f"[PIPELINE]: Start execute the job: {job_id!r}"
694
- )
695
- job: Job = self.jobs[job_id]
696
- if any(
697
- need not in rs.context["jobs"] for need in job.needs
698
- ):
699
- jq.put(job_id)
700
- futures.append(
701
- executor.submit(
702
- self.job_execute,
703
- job_id,
704
- params=copy.deepcopy(rs.context),
705
- ),
993
+ try:
994
+ rs.receive(
995
+ self.__exec_non_threading(rs, ts, timeout=timeout)
996
+ if (
997
+ worker := int(
998
+ os.getenv("WORKFLOW_CORE_MAX_JOB_PARALLEL", "2")
706
999
  )
707
- for future in as_completed(futures):
708
- job_rs: Result = future.result(timeout=20)
709
- rs.context["jobs"].update(job_rs.context)
710
- else:
711
- logging.info(
712
- f"[CORE]: Run {self.name} with non-threading job executor"
1000
+ )
1001
+ == 1
1002
+ else self.__exec_threading(
1003
+ rs, ts, worker=worker, timeout=timeout
1004
+ )
713
1005
  )
714
- while not jq.empty() and (
1006
+ return rs
1007
+ except PipelineException as err:
1008
+ rs.context.update({"error": {"message": str(err)}})
1009
+ rs.status = 1
1010
+ return rs
1011
+
1012
+ def __exec_threading(
1013
+ self,
1014
+ rs: Result,
1015
+ ts: float,
1016
+ *,
1017
+ worker: int = 2,
1018
+ timeout: int = 600,
1019
+ ) -> Result:
1020
+ """Pipeline threading execution.
1021
+
1022
+ :param rs:
1023
+ :param ts:
1024
+ :param timeout: A second value unit that bounding running time.
1025
+ :param worker: A number of threading executor pool size.
1026
+ :rtype: Result
1027
+ """
1028
+ not_time_out_flag: bool = True
1029
+ logging.debug(
1030
+ f"({self.run_id}): [CORE]: Run {self.name} with threading job "
1031
+ f"executor"
1032
+ )
1033
+
1034
+ # NOTE: Create a job queue that keep the job that want to running after
1035
+ # it dependency condition.
1036
+ job_queue: Queue = Queue()
1037
+ for job_id in self.jobs:
1038
+ job_queue.put(job_id)
1039
+
1040
+ # IMPORTANT: The job execution can run parallel and waiting by
1041
+ # needed.
1042
+ with ThreadPoolExecutor(max_workers=worker) as executor:
1043
+ futures: list[Future] = []
1044
+ while not job_queue.empty() and (
715
1045
  not_time_out_flag := ((time.monotonic() - ts) < timeout)
716
1046
  ):
717
- job_id: str = jq.get()
718
- logging.info(f"[PIPELINE]: Start execute the job: {job_id!r}")
1047
+ job_id: str = job_queue.get()
719
1048
  job: Job = self.jobs[job_id]
720
- if any(need not in rs.context["jobs"] for need in job.needs):
721
- jq.put(job_id)
722
1049
 
723
- job_rs = self.job_execute(
724
- job_id, params=copy.deepcopy(rs.context)
1050
+ if any(need not in rs.context["jobs"] for need in job.needs):
1051
+ job_queue.put(job_id)
1052
+ time.sleep(0.5)
1053
+ continue
1054
+
1055
+ futures.append(
1056
+ executor.submit(
1057
+ self.job_execute,
1058
+ job_id,
1059
+ params=copy.deepcopy(rs.context),
1060
+ ),
725
1061
  )
726
- rs.context["jobs"].update(job_rs.context)
1062
+ job_queue.task_done()
727
1063
 
728
- if not not_time_out_flag:
729
- logging.warning("Execution of pipeline was time out")
730
- rs.status = 1
1064
+ # NOTE: Wait for all items to finish processing
1065
+ job_queue.join()
1066
+
1067
+ for future in as_completed(futures):
1068
+ if err := future.exception():
1069
+ logging.error(f"{err}")
1070
+ raise PipelineException(f"{err}")
1071
+
1072
+ # NOTE: Update job result to pipeline result.
1073
+ rs.receive_jobs(future.result(timeout=20))
1074
+
1075
+ if not_time_out_flag:
1076
+ rs.status = 0
731
1077
  return rs
732
- rs.status = 0
733
- return rs
1078
+
1079
+ # NOTE: Raise timeout error.
1080
+ logging.warning(
1081
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1082
+ )
1083
+ raise PipelineException(
1084
+ f"Execution of pipeline: {self.name} was timeout"
1085
+ )
1086
+
1087
+ def __exec_non_threading(
1088
+ self,
1089
+ rs: Result,
1090
+ ts: float,
1091
+ *,
1092
+ timeout: int = 600,
1093
+ ) -> Result:
1094
+ """Pipeline non-threading execution.
1095
+
1096
+ :param rs:
1097
+ :param ts:
1098
+ :param timeout: A second value unit that bounding running time.
1099
+ :rtype: Result
1100
+ """
1101
+ not_time_out_flag: bool = True
1102
+ logging.debug(
1103
+ f"({self.run_id}) [CORE]: Run {self.name} with non-threading job "
1104
+ f"executor"
1105
+ )
1106
+ # NOTE: Create a job queue that keep the job that want to running after
1107
+ # it dependency condition.
1108
+ job_queue: Queue = Queue()
1109
+ for job_id in self.jobs:
1110
+ job_queue.put(job_id)
1111
+
1112
+ while not job_queue.empty() and (
1113
+ not_time_out_flag := ((time.monotonic() - ts) < timeout)
1114
+ ):
1115
+ job_id: str = job_queue.get()
1116
+ job: Job = self.jobs[job_id]
1117
+
1118
+ # NOTE:
1119
+ if any(need not in rs.context["jobs"] for need in job.needs):
1120
+ job_queue.put(job_id)
1121
+ time.sleep(0.5)
1122
+ continue
1123
+
1124
+ # NOTE: Start job execution.
1125
+ job_rs = self.job_execute(job_id, params=copy.deepcopy(rs.context))
1126
+ rs.context["jobs"].update(job_rs.context)
1127
+ job_queue.task_done()
1128
+
1129
+ # NOTE: Wait for all items to finish processing
1130
+ job_queue.join()
1131
+
1132
+ if not_time_out_flag:
1133
+ rs.status = 0
1134
+ return rs
1135
+
1136
+ # NOTE: Raise timeout error.
1137
+ logging.warning(
1138
+ f"({self.run_id}) [PIPELINE]: Execution of pipeline was timeout"
1139
+ )
1140
+ raise PipelineException(
1141
+ f"Execution of pipeline: {self.name} was timeout"
1142
+ )