ddeutil-workflow 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ddeutil/workflow/__about__.py +1 -1
  2. ddeutil/workflow/__init__.py +2 -6
  3. ddeutil/workflow/api/routes/job.py +2 -2
  4. ddeutil/workflow/api/routes/logs.py +5 -5
  5. ddeutil/workflow/api/routes/workflows.py +3 -3
  6. ddeutil/workflow/audits.py +547 -176
  7. ddeutil/workflow/cli.py +19 -1
  8. ddeutil/workflow/conf.py +10 -20
  9. ddeutil/workflow/event.py +15 -6
  10. ddeutil/workflow/job.py +147 -74
  11. ddeutil/workflow/params.py +172 -58
  12. ddeutil/workflow/plugins/__init__.py +0 -0
  13. ddeutil/workflow/plugins/providers/__init__.py +0 -0
  14. ddeutil/workflow/plugins/providers/aws.py +908 -0
  15. ddeutil/workflow/plugins/providers/az.py +1003 -0
  16. ddeutil/workflow/plugins/providers/container.py +703 -0
  17. ddeutil/workflow/plugins/providers/gcs.py +826 -0
  18. ddeutil/workflow/result.py +6 -4
  19. ddeutil/workflow/reusables.py +151 -95
  20. ddeutil/workflow/stages.py +28 -28
  21. ddeutil/workflow/traces.py +1697 -541
  22. ddeutil/workflow/utils.py +109 -67
  23. ddeutil/workflow/workflow.py +42 -30
  24. {ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/METADATA +39 -19
  25. ddeutil_workflow-0.0.80.dist-info/RECORD +36 -0
  26. ddeutil_workflow-0.0.78.dist-info/RECORD +0 -30
  27. {ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/WHEEL +0 -0
  28. {ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/entry_points.txt +0 -0
  29. {ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/licenses/LICENSE +0 -0
  30. {ddeutil_workflow-0.0.78.dist-info → ddeutil_workflow-0.0.80.dist-info}/top_level.txt +0 -0
ddeutil/workflow/cli.py CHANGED
@@ -73,6 +73,10 @@ def init() -> None:
73
73
  uses: tasks/say-hello-func@example
74
74
  with:
75
75
  name: ${{ params.name }}
76
+ second-job:
77
+
78
+ - name: "Hello Env"
79
+ echo: "Start say hi with ${ WORKFLOW_DEMO_HELLO }"
76
80
  """
77
81
  ).lstrip("\n")
78
82
  )
@@ -98,8 +102,22 @@ def init() -> None:
98
102
 
99
103
  init_path = task_path / "__init__.py"
100
104
  init_path.write_text("from .example import hello_world_task\n")
105
+
106
+ dotenv_file = Path(".env")
107
+ mode: str = "a" if dotenv_file.exists() else "w"
108
+ with dotenv_file.open(mode=mode) as f:
109
+ f.write("\n# Workflow env vars\n")
110
+ f.write(
111
+ "WORKFLOW_DEMO_HELLO=foo\n"
112
+ "WORKFLOW_CORE_DEBUG_MODE=true\n"
113
+ "WORKFLOW_LOG_TIMEZONE=Asia/Bangkok\n"
114
+ "WORKFLOW_LOG_TRACE_ENABLE_WRITE=false\n"
115
+ "WORKFLOW_LOG_AUDIT_ENABLE_WRITE=true\n"
116
+ )
117
+
118
+ typer.echo("Starter command:")
101
119
  typer.echo(
102
- "Starter command: `workflow-cli workflows execute --name=wf-example`"
120
+ "> `source .env && workflow-cli workflows execute --name=wf-example`"
103
121
  )
104
122
 
105
123
 
ddeutil/workflow/conf.py CHANGED
@@ -40,12 +40,12 @@ Note:
40
40
  ${VAR_NAME} syntax and provide extensive validation capabilities.
41
41
  """
42
42
  import copy
43
+ import json
43
44
  import os
44
45
  from collections.abc import Iterator
45
46
  from functools import cached_property
46
47
  from pathlib import Path
47
- from typing import Final, Optional, TypeVar, Union
48
- from urllib.parse import ParseResult, urlparse
48
+ from typing import Any, Final, Optional, TypeVar, Union
49
49
  from zoneinfo import ZoneInfo
50
50
 
51
51
  from ddeutil.core import str2bool
@@ -122,8 +122,8 @@ class Config: # pragma: no cov
122
122
  return [r.strip() for r in regis_filter_str.split(",")]
123
123
 
124
124
  @property
125
- def trace_url(self) -> ParseResult:
126
- return urlparse(env("LOG_TRACE_URL", "file:./logs"))
125
+ def trace_handlers(self) -> list[dict[str, Any]]:
126
+ return json.loads(env("LOG_TRACE_HANDLERS", '[{"type": "console"}]'))
127
127
 
128
128
  @property
129
129
  def debug(self) -> bool:
@@ -155,23 +155,11 @@ class Config: # pragma: no cov
155
155
  )
156
156
 
157
157
  @property
158
- def log_format_file(self) -> str:
159
- return env(
160
- "LOG_FORMAT_FILE",
161
- (
162
- "{datetime} ({process:5d}, {thread:5d}) ({cut_id}) "
163
- "{message:120s} ({filename}:{lineno})"
164
- ),
158
+ def audit_conf(self) -> str:
159
+ return json.loads(
160
+ env("LOG_AUDIT_URL", '{"type": "file", "path": "./audits"}')
165
161
  )
166
162
 
167
- @property
168
- def enable_write_log(self) -> bool:
169
- return str2bool(env("LOG_TRACE_ENABLE_WRITE", "false"))
170
-
171
- @property
172
- def audit_url(self) -> ParseResult:
173
- return urlparse(env("LOG_AUDIT_URL", "file:./audits"))
174
-
175
163
  @property
176
164
  def enable_write_audit(self) -> bool:
177
165
  return str2bool(env("LOG_AUDIT_ENABLE_WRITE", "false"))
@@ -464,7 +452,9 @@ def dynamic(
464
452
  conf: Optional[T] = getattr(config, key, None) if f is None else f
465
453
  if extra is None:
466
454
  return conf
467
- if not isinstance(extra, type(conf)):
455
+ # NOTE: Fix type checking for boolean value and int type like
456
+ # `isinstance(False, int)` which return True.
457
+ if type(extra) is not type(conf):
468
458
  raise TypeError(
469
459
  f"Type of config {key!r} from extras: {extra!r} does not valid "
470
460
  f"as config {type(conf)}."
ddeutil/workflow/event.py CHANGED
@@ -19,6 +19,9 @@ Classes:
19
19
  Crontab: Main cron-based event scheduler.
20
20
  CrontabYear: Enhanced cron scheduler with year constraints.
21
21
  ReleaseEvent: Release-based event triggers.
22
+ FileEvent: File system monitoring triggers.
23
+ WebhookEvent: API/webhook-based triggers.
24
+ DatabaseEvent: Database change monitoring triggers.
22
25
  SensorEvent: Sensor-based event monitoring.
23
26
 
24
27
  Example:
@@ -312,11 +315,9 @@ class CrontabYear(Crontab):
312
315
  cronjob: CronJobYear instance for year-aware schedule validation and generation.
313
316
  """
314
317
 
315
- cronjob: CronJobYear = (
316
- Field(
317
- description=(
318
- "A Cronjob object that use for validate and generate datetime."
319
- ),
318
+ cronjob: CronJobYear = Field(
319
+ description=(
320
+ "A Cronjob object that use for validate and generate datetime."
320
321
  ),
321
322
  )
322
323
 
@@ -369,7 +370,15 @@ Cron = Annotated[
369
370
 
370
371
 
371
372
  class Event(BaseModel):
372
- """Event model."""
373
+ """Event model with comprehensive trigger support.
374
+
375
+ Supports multiple types of event triggers including cron scheduling,
376
+ file monitoring, webhooks, database changes, sensor-based triggers,
377
+ polling-based triggers, message queue events, stream processing events,
378
+ batch processing events, data quality events, API rate limiting events,
379
+ data lineage events, ML pipeline events, data catalog events,
380
+ infrastructure events, compliance events, and business events.
381
+ """
373
382
 
374
383
  schedule: list[Cron] = Field(
375
384
  default_factory=list,
ddeutil/workflow/job.py CHANGED
@@ -72,7 +72,7 @@ from .result import (
72
72
  )
73
73
  from .reusables import has_template, param2template
74
74
  from .stages import Stage
75
- from .traces import Trace, get_trace
75
+ from .traces import TraceManager, get_trace
76
76
  from .utils import cross_product, filter_func, gen_id
77
77
 
78
78
  MatrixFilter = list[dict[str, Union[str, int]]]
@@ -249,14 +249,21 @@ class RunsOn(str, Enum):
249
249
  SELF_HOSTED = "self_hosted"
250
250
  AZ_BATCH = "azure_batch"
251
251
  AWS_BATCH = "aws_batch"
252
+ GCP_BATCH = "gcp_batch"
252
253
  CLOUD_BATCH = "cloud_batch"
253
254
  DOCKER = "docker"
255
+ CONTAINER = "container"
254
256
 
255
257
 
258
+ # Import constants for backward compatibility
256
259
  LOCAL = RunsOn.LOCAL
257
260
  SELF_HOSTED = RunsOn.SELF_HOSTED
258
261
  AZ_BATCH = RunsOn.AZ_BATCH
262
+ AWS_BATCH = RunsOn.AWS_BATCH
263
+ GCP_BATCH = RunsOn.GCP_BATCH
264
+ CLOUD_BATCH = RunsOn.CLOUD_BATCH
259
265
  DOCKER = RunsOn.DOCKER
266
+ CONTAINER = RunsOn.CONTAINER
260
267
 
261
268
 
262
269
  class BaseRunsOn(BaseModel): # pragma: no cov
@@ -328,6 +335,98 @@ class OnDocker(BaseRunsOn): # pragma: no cov
328
335
  args: DockerArgs = Field(default_factory=DockerArgs, alias="with")
329
336
 
330
337
 
338
+ class ContainerArgs(BaseModel):
339
+ """Container arguments."""
340
+
341
+ image: str = Field(description="Docker image to use")
342
+ container_name: Optional[str] = Field(
343
+ default=None, description="Container name"
344
+ )
345
+ volumes: Optional[list[dict[str, str]]] = Field(
346
+ default=None, description="Volume mounts"
347
+ )
348
+ environment: Optional[dict[str, str]] = Field(
349
+ default=None, description="Environment variables"
350
+ )
351
+ network: Optional[dict[str, Any]] = Field(
352
+ default=None, description="Network configuration"
353
+ )
354
+ resources: Optional[dict[str, Any]] = Field(
355
+ default=None, description="Resource limits"
356
+ )
357
+ working_dir: Optional[str] = Field(
358
+ default="/app", description="Working directory"
359
+ )
360
+ user: Optional[str] = Field(default=None, description="User to run as")
361
+ command: Optional[str] = Field(
362
+ default=None, description="Override default command"
363
+ )
364
+ timeout: int = Field(
365
+ default=3600, description="Execution timeout in seconds"
366
+ )
367
+ remove: bool = Field(
368
+ default=True, description="Remove container after execution"
369
+ )
370
+ docker_host: Optional[str] = Field(
371
+ default=None, description="Docker host URL"
372
+ )
373
+
374
+
375
+ class OnContainer(BaseRunsOn): # pragma: no cov
376
+ """Runs-on Container."""
377
+
378
+ type: RunsOn = CONTAINER
379
+ args: ContainerArgs = Field(default_factory=ContainerArgs, alias="with")
380
+
381
+
382
+ class AWSBatchArgs(BaseModel):
383
+ """AWS Batch arguments."""
384
+
385
+ job_queue_arn: str = Field(description="AWS Batch job queue ARN")
386
+ s3_bucket: str = Field(description="S3 bucket for file storage")
387
+ region_name: str = Field(default="us-east-1", description="AWS region")
388
+ aws_access_key_id: Optional[str] = Field(
389
+ default=None, description="AWS access key ID"
390
+ )
391
+ aws_secret_access_key: Optional[str] = Field(
392
+ default=None, description="AWS secret access key"
393
+ )
394
+ aws_session_token: Optional[str] = Field(
395
+ default=None, description="AWS session token"
396
+ )
397
+
398
+
399
+ class OnAWSBatch(BaseRunsOn): # pragma: no cov
400
+ """Runs-on AWS Batch."""
401
+
402
+ type: RunsOn = AWS_BATCH
403
+ args: AWSBatchArgs = Field(alias="with")
404
+
405
+
406
+ class GCPBatchArgs(BaseModel):
407
+ """Google Cloud Batch arguments."""
408
+
409
+ project_id: str = Field(description="Google Cloud project ID")
410
+ region: str = Field(description="Google Cloud region")
411
+ gcs_bucket: str = Field(description="Google Cloud Storage bucket")
412
+ credentials_path: Optional[str] = Field(
413
+ default=None, description="Path to service account credentials"
414
+ )
415
+ machine_type: str = Field(
416
+ default="e2-standard-4", description="Machine type"
417
+ )
418
+ max_parallel_tasks: int = Field(
419
+ default=1, description="Maximum parallel tasks"
420
+ )
421
+
422
+
423
+ class OnGCPBatch(BaseRunsOn): # pragma: no cov
424
+ """Runs-on Google Cloud Batch."""
425
+
426
+ type: RunsOn = GCP_BATCH
427
+ args: GCPBatchArgs = Field(alias="with")
428
+
429
+
331
430
  def get_discriminator_runs_on(model: dict[str, Any]) -> RunsOn:
332
431
  """Get discriminator of the RunsOn models."""
333
432
  t: str = model.get("type")
@@ -339,6 +438,9 @@ RunsOnModel = Annotated[
339
438
  Annotated[OnSelfHosted, Tag(SELF_HOSTED)],
340
439
  Annotated[OnDocker, Tag(DOCKER)],
341
440
  Annotated[OnLocal, Tag(LOCAL)],
441
+ Annotated[OnContainer, Tag(CONTAINER)],
442
+ Annotated[OnAWSBatch, Tag(AWS_BATCH)],
443
+ Annotated[OnGCPBatch, Tag(GCP_BATCH)],
342
444
  ],
343
445
  Discriminator(get_discriminator_runs_on),
344
446
  ]
@@ -482,7 +584,8 @@ class Job(BaseModel):
482
584
  return self
483
585
 
484
586
  @field_serializer("runs_on")
485
- def __serialize_runs_on(self, value: RunsOnModel):
587
+ def __serialize_runs_on(self, value: RunsOnModel) -> DictData:
588
+ """Serialize the runs_on field."""
486
589
  return value.model_dump(by_alias=True)
487
590
 
488
591
  def stage(self, stage_id: str) -> Stage:
@@ -776,7 +879,7 @@ class Job(BaseModel):
776
879
  ts: float = time.monotonic()
777
880
  parent_run_id: str = run_id
778
881
  run_id: str = gen_id((self.id or "EMPTY"), unique=True)
779
- trace: Trace = get_trace(
882
+ trace: TraceManager = get_trace(
780
883
  run_id, parent_run_id=parent_run_id, extras=self.extras
781
884
  )
782
885
  trace.info(
@@ -795,7 +898,14 @@ class Job(BaseModel):
795
898
  elif self.runs_on.type == SELF_HOSTED: # pragma: no cov
796
899
  pass
797
900
  elif self.runs_on.type == AZ_BATCH: # pragma: no cov
798
- pass
901
+ from .plugins.providers.az import azure_batch_execute
902
+
903
+ return azure_batch_execute(
904
+ self,
905
+ params,
906
+ run_id=parent_run_id,
907
+ event=event,
908
+ ).make_info({"execution_time": time.monotonic() - ts})
799
909
  elif self.runs_on.type == DOCKER: # pragma: no cov
800
910
  return docker_execution(
801
911
  self,
@@ -803,6 +913,33 @@ class Job(BaseModel):
803
913
  run_id=parent_run_id,
804
914
  event=event,
805
915
  ).make_info({"execution_time": time.monotonic() - ts})
916
+ elif self.runs_on.type == CONTAINER: # pragma: no cov
917
+ from .plugins.providers.container import container_execute
918
+
919
+ return container_execute(
920
+ self,
921
+ params,
922
+ run_id=parent_run_id,
923
+ event=event,
924
+ ).make_info({"execution_time": time.monotonic() - ts})
925
+ elif self.runs_on.type == AWS_BATCH: # pragma: no cov
926
+ from .plugins.providers.aws import aws_batch_execute
927
+
928
+ return aws_batch_execute(
929
+ self,
930
+ params,
931
+ run_id=parent_run_id,
932
+ event=event,
933
+ ).make_info({"execution_time": time.monotonic() - ts})
934
+ elif self.runs_on.type == GCP_BATCH: # pragma: no cov
935
+ from .plugins.providers.gcs import gcp_batch_execute
936
+
937
+ return gcp_batch_execute(
938
+ self,
939
+ params,
940
+ run_id=parent_run_id,
941
+ event=event,
942
+ ).make_info({"execution_time": time.monotonic() - ts})
806
943
 
807
944
  trace.error(
808
945
  f"[JOB]: Execution not support runs-on: {self.runs_on.type.value!r} "
@@ -879,7 +1016,7 @@ def local_execute_strategy(
879
1016
 
880
1017
  :rtype: tuple[Status, DictData]
881
1018
  """
882
- trace: Trace = get_trace(
1019
+ trace: TraceManager = get_trace(
883
1020
  run_id, parent_run_id=parent_run_id, extras=job.extras
884
1021
  )
885
1022
  if strategy:
@@ -1015,7 +1152,7 @@ def local_execute(
1015
1152
  ts: float = time.monotonic()
1016
1153
  parent_run_id: StrOrNone = run_id
1017
1154
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1018
- trace: Trace = get_trace(
1155
+ trace: TraceManager = get_trace(
1019
1156
  run_id, parent_run_id=parent_run_id, extras=job.extras
1020
1157
  )
1021
1158
  context: DictData = {"status": WAIT}
@@ -1158,7 +1295,7 @@ def self_hosted_execute(
1158
1295
  """
1159
1296
  parent_run_id: StrOrNone = run_id
1160
1297
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1161
- trace: Trace = get_trace(
1298
+ trace: TraceManager = get_trace(
1162
1299
  run_id, parent_run_id=parent_run_id, extras=job.extras
1163
1300
  )
1164
1301
  context: DictData = {"status": WAIT}
@@ -1221,71 +1358,8 @@ def self_hosted_execute(
1221
1358
  )
1222
1359
 
1223
1360
 
1224
- def azure_batch_execute(
1225
- job: Job,
1226
- params: DictData,
1227
- *,
1228
- run_id: StrOrNone = None,
1229
- event: Optional[Event] = None,
1230
- ) -> Result: # pragma: no cov
1231
- """Azure Batch job execution that will run all job's stages on the Azure
1232
- Batch Node and extract the result file to be returning context result.
1233
-
1234
- Steps:
1235
- - Create a Batch account and a Batch pool.
1236
- - Create a Batch job and add tasks to the job. Each task represents a
1237
- command to run on a compute node.
1238
- - Specify the command to run the Python script in the task. You can use
1239
- the cmd /c command to run the script with the Python interpreter.
1240
- - Upload the Python script and any required input files to Azure Storage
1241
- Account.
1242
- - Configure the task to download the input files from Azure Storage to
1243
- the compute node before running the script.
1244
- - Monitor the job and retrieve the output files from Azure Storage.
1245
-
1246
- References:
1247
- - https://docs.azure.cn/en-us/batch/tutorial-parallel-python
1248
-
1249
- :param job:
1250
- :param params:
1251
- :param run_id:
1252
- :param event:
1253
-
1254
- :rtype: Result
1255
- """
1256
- parent_run_id: StrOrNone = run_id
1257
- run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1258
- trace: Trace = get_trace(
1259
- run_id, parent_run_id=parent_run_id, extras=job.extras
1260
- )
1261
- context: DictData = {"status": WAIT}
1262
- trace.info("[JOB]: Start Azure Batch executor.")
1263
-
1264
- if event and event.is_set():
1265
- return Result(
1266
- run_id=run_id,
1267
- parent_run_id=parent_run_id,
1268
- status=CANCEL,
1269
- context=catch(
1270
- context,
1271
- status=CANCEL,
1272
- updated={
1273
- "errors": JobCancelError(
1274
- "Execution was canceled from the event before start "
1275
- "self-hosted execution."
1276
- ).to_dict()
1277
- },
1278
- ),
1279
- extras=job.extras,
1280
- )
1281
- print(params)
1282
- return Result(
1283
- run_id=run_id,
1284
- parent_run_id=parent_run_id,
1285
- status=SUCCESS,
1286
- context=catch(context, status=SUCCESS),
1287
- extras=job.extras,
1288
- )
1361
+ # Azure Batch execution is now handled by the Azure Batch provider
1362
+ # See src/ddeutil/workflow/plugins/providers/az.py for implementation
1289
1363
 
1290
1364
 
1291
1365
  def docker_execution(
@@ -1304,7 +1378,7 @@ def docker_execution(
1304
1378
  """
1305
1379
  parent_run_id: StrOrNone = run_id
1306
1380
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1307
- trace: Trace = get_trace(
1381
+ trace: TraceManager = get_trace(
1308
1382
  run_id, parent_run_id=parent_run_id, extras=job.extras
1309
1383
  )
1310
1384
  context: DictData = {"status": WAIT}
@@ -1327,7 +1401,6 @@ def docker_execution(
1327
1401
  ),
1328
1402
  extras=job.extras,
1329
1403
  )
1330
- print(params)
1331
1404
  return Result(
1332
1405
  run_id=run_id,
1333
1406
  parent_run_id=parent_run_id,