ddeutil-workflow 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddeutil/workflow/job.py CHANGED
@@ -48,10 +48,11 @@ from enum import Enum
48
48
  from functools import lru_cache
49
49
  from textwrap import dedent
50
50
  from threading import Event
51
- from typing import Annotated, Any, Literal, Optional, Union
51
+ from typing import Annotated, Any, Optional, Union
52
52
 
53
53
  from ddeutil.core import freeze_args
54
54
  from pydantic import BaseModel, Discriminator, Field, SecretStr, Tag
55
+ from pydantic.functional_serializers import field_serializer
55
56
  from pydantic.functional_validators import field_validator, model_validator
56
57
  from typing_extensions import Self
57
58
 
@@ -71,7 +72,7 @@ from .result import (
71
72
  )
72
73
  from .reusables import has_template, param2template
73
74
  from .stages import Stage
74
- from .traces import Trace, get_trace
75
+ from .traces import TraceManager, get_trace
75
76
  from .utils import cross_product, filter_func, gen_id
76
77
 
77
78
  MatrixFilter = list[dict[str, Union[str, int]]]
@@ -248,14 +249,21 @@ class RunsOn(str, Enum):
248
249
  SELF_HOSTED = "self_hosted"
249
250
  AZ_BATCH = "azure_batch"
250
251
  AWS_BATCH = "aws_batch"
252
+ GCP_BATCH = "gcp_batch"
251
253
  CLOUD_BATCH = "cloud_batch"
252
254
  DOCKER = "docker"
255
+ CONTAINER = "container"
253
256
 
254
257
 
258
+ # Import constants for backward compatibility
255
259
  LOCAL = RunsOn.LOCAL
256
260
  SELF_HOSTED = RunsOn.SELF_HOSTED
257
261
  AZ_BATCH = RunsOn.AZ_BATCH
262
+ AWS_BATCH = RunsOn.AWS_BATCH
263
+ GCP_BATCH = RunsOn.GCP_BATCH
264
+ CLOUD_BATCH = RunsOn.CLOUD_BATCH
258
265
  DOCKER = RunsOn.DOCKER
266
+ CONTAINER = RunsOn.CONTAINER
259
267
 
260
268
 
261
269
  class BaseRunsOn(BaseModel): # pragma: no cov
@@ -263,24 +271,20 @@ class BaseRunsOn(BaseModel): # pragma: no cov
263
271
  object and override execute method.
264
272
  """
265
273
 
266
- type: RunsOn = Field(description="A runs-on type.")
274
+ type: RunsOn = LOCAL
267
275
  args: DictData = Field(
268
276
  default_factory=dict,
269
- alias="with",
270
277
  description=(
271
278
  "An argument that pass to the runs-on execution function. This "
272
279
  "args will override by this child-model with specific args model."
273
280
  ),
281
+ alias="with",
274
282
  )
275
283
 
276
284
 
277
285
  class OnLocal(BaseRunsOn): # pragma: no cov
278
286
  """Runs-on local."""
279
287
 
280
- type: Literal[RunsOn.LOCAL] = Field(
281
- default=RunsOn.LOCAL, validate_default=True
282
- )
283
-
284
288
 
285
289
  class SelfHostedArgs(BaseModel):
286
290
  """Self-Hosted arguments."""
@@ -292,9 +296,7 @@ class SelfHostedArgs(BaseModel):
292
296
  class OnSelfHosted(BaseRunsOn): # pragma: no cov
293
297
  """Runs-on self-hosted."""
294
298
 
295
- type: Literal[RunsOn.SELF_HOSTED] = Field(
296
- default=RunsOn.SELF_HOSTED, validate_default=True
297
- )
299
+ type: RunsOn = SELF_HOSTED
298
300
  args: SelfHostedArgs = Field(alias="with")
299
301
 
300
302
 
@@ -310,9 +312,7 @@ class AzBatchArgs(BaseModel):
310
312
 
311
313
  class OnAzBatch(BaseRunsOn): # pragma: no cov
312
314
 
313
- type: Literal[RunsOn.AZ_BATCH] = Field(
314
- default=RunsOn.AZ_BATCH, validate_default=True
315
- )
315
+ type: RunsOn = AZ_BATCH
316
316
  args: AzBatchArgs = Field(alias="with")
317
317
 
318
318
 
@@ -331,23 +331,116 @@ class DockerArgs(BaseModel):
331
331
  class OnDocker(BaseRunsOn): # pragma: no cov
332
332
  """Runs-on Docker container."""
333
333
 
334
- type: Literal[RunsOn.DOCKER] = Field(
335
- default=RunsOn.DOCKER, validate_default=True
334
+ type: RunsOn = DOCKER
335
+ args: DockerArgs = Field(default_factory=DockerArgs, alias="with")
336
+
337
+
338
+ class ContainerArgs(BaseModel):
339
+ """Container arguments."""
340
+
341
+ image: str = Field(description="Docker image to use")
342
+ container_name: Optional[str] = Field(
343
+ default=None, description="Container name"
344
+ )
345
+ volumes: Optional[list[dict[str, str]]] = Field(
346
+ default=None, description="Volume mounts"
347
+ )
348
+ environment: Optional[dict[str, str]] = Field(
349
+ default=None, description="Environment variables"
350
+ )
351
+ network: Optional[dict[str, Any]] = Field(
352
+ default=None, description="Network configuration"
353
+ )
354
+ resources: Optional[dict[str, Any]] = Field(
355
+ default=None, description="Resource limits"
356
+ )
357
+ working_dir: Optional[str] = Field(
358
+ default="/app", description="Working directory"
359
+ )
360
+ user: Optional[str] = Field(default=None, description="User to run as")
361
+ command: Optional[str] = Field(
362
+ default=None, description="Override default command"
363
+ )
364
+ timeout: int = Field(
365
+ default=3600, description="Execution timeout in seconds"
366
+ )
367
+ remove: bool = Field(
368
+ default=True, description="Remove container after execution"
369
+ )
370
+ docker_host: Optional[str] = Field(
371
+ default=None, description="Docker host URL"
372
+ )
373
+
374
+
375
+ class OnContainer(BaseRunsOn): # pragma: no cov
376
+ """Runs-on Container."""
377
+
378
+ type: RunsOn = CONTAINER
379
+ args: ContainerArgs = Field(default_factory=ContainerArgs, alias="with")
380
+
381
+
382
+ class AWSBatchArgs(BaseModel):
383
+ """AWS Batch arguments."""
384
+
385
+ job_queue_arn: str = Field(description="AWS Batch job queue ARN")
386
+ s3_bucket: str = Field(description="S3 bucket for file storage")
387
+ region_name: str = Field(default="us-east-1", description="AWS region")
388
+ aws_access_key_id: Optional[str] = Field(
389
+ default=None, description="AWS access key ID"
390
+ )
391
+ aws_secret_access_key: Optional[str] = Field(
392
+ default=None, description="AWS secret access key"
393
+ )
394
+ aws_session_token: Optional[str] = Field(
395
+ default=None, description="AWS session token"
396
+ )
397
+
398
+
399
+ class OnAWSBatch(BaseRunsOn): # pragma: no cov
400
+ """Runs-on AWS Batch."""
401
+
402
+ type: RunsOn = AWS_BATCH
403
+ args: AWSBatchArgs = Field(alias="with")
404
+
405
+
406
+ class GCPBatchArgs(BaseModel):
407
+ """Google Cloud Batch arguments."""
408
+
409
+ project_id: str = Field(description="Google Cloud project ID")
410
+ region: str = Field(description="Google Cloud region")
411
+ gcs_bucket: str = Field(description="Google Cloud Storage bucket")
412
+ credentials_path: Optional[str] = Field(
413
+ default=None, description="Path to service account credentials"
336
414
  )
337
- args: DockerArgs = Field(alias="with", default_factory=DockerArgs)
415
+ machine_type: str = Field(
416
+ default="e2-standard-4", description="Machine type"
417
+ )
418
+ max_parallel_tasks: int = Field(
419
+ default=1, description="Maximum parallel tasks"
420
+ )
421
+
422
+
423
+ class OnGCPBatch(BaseRunsOn): # pragma: no cov
424
+ """Runs-on Google Cloud Batch."""
425
+
426
+ type: RunsOn = GCP_BATCH
427
+ args: GCPBatchArgs = Field(alias="with")
338
428
 
339
429
 
340
430
  def get_discriminator_runs_on(model: dict[str, Any]) -> RunsOn:
341
431
  """Get discriminator of the RunsOn models."""
342
432
  t: str = model.get("type")
343
- return RunsOn(t) if t else RunsOn.LOCAL
433
+ return RunsOn(t) if t else LOCAL
344
434
 
345
435
 
346
436
  RunsOnModel = Annotated[
347
437
  Union[
348
- Annotated[OnSelfHosted, Tag(RunsOn.SELF_HOSTED)],
349
- Annotated[OnDocker, Tag(RunsOn.DOCKER)],
350
- Annotated[OnLocal, Tag(RunsOn.LOCAL)],
438
+ Annotated[OnSelfHosted, Tag(SELF_HOSTED)],
439
+ Annotated[OnDocker, Tag(DOCKER)],
440
+ Annotated[OnLocal, Tag(LOCAL)],
441
+ Annotated[OnContainer, Tag(CONTAINER)],
442
+ Annotated[OnAWSBatch, Tag(AWS_BATCH)],
443
+ Annotated[OnGCPBatch, Tag(GCP_BATCH)],
351
444
  ],
352
445
  Discriminator(get_discriminator_runs_on),
353
446
  ]
@@ -490,6 +583,11 @@ class Job(BaseModel):
490
583
 
491
584
  return self
492
585
 
586
+ @field_serializer("runs_on")
587
+ def __serialize_runs_on(self, value: RunsOnModel) -> DictData:
588
+ """Serialize the runs_on field."""
589
+ return value.model_dump(by_alias=True)
590
+
493
591
  def stage(self, stage_id: str) -> Stage:
494
592
  """Return stage instance that exists in this job via passing an input
495
593
  stage ID.
@@ -781,7 +879,7 @@ class Job(BaseModel):
781
879
  ts: float = time.monotonic()
782
880
  parent_run_id: str = run_id
783
881
  run_id: str = gen_id((self.id or "EMPTY"), unique=True)
784
- trace: Trace = get_trace(
882
+ trace: TraceManager = get_trace(
785
883
  run_id, parent_run_id=parent_run_id, extras=self.extras
786
884
  )
787
885
  trace.info(
@@ -800,7 +898,14 @@ class Job(BaseModel):
800
898
  elif self.runs_on.type == SELF_HOSTED: # pragma: no cov
801
899
  pass
802
900
  elif self.runs_on.type == AZ_BATCH: # pragma: no cov
803
- pass
901
+ from .plugins.providers.az import azure_batch_execute
902
+
903
+ return azure_batch_execute(
904
+ self,
905
+ params,
906
+ run_id=parent_run_id,
907
+ event=event,
908
+ ).make_info({"execution_time": time.monotonic() - ts})
804
909
  elif self.runs_on.type == DOCKER: # pragma: no cov
805
910
  return docker_execution(
806
911
  self,
@@ -808,6 +913,33 @@ class Job(BaseModel):
808
913
  run_id=parent_run_id,
809
914
  event=event,
810
915
  ).make_info({"execution_time": time.monotonic() - ts})
916
+ elif self.runs_on.type == CONTAINER: # pragma: no cov
917
+ from .plugins.providers.container import container_execute
918
+
919
+ return container_execute(
920
+ self,
921
+ params,
922
+ run_id=parent_run_id,
923
+ event=event,
924
+ ).make_info({"execution_time": time.monotonic() - ts})
925
+ elif self.runs_on.type == AWS_BATCH: # pragma: no cov
926
+ from .plugins.providers.aws import aws_batch_execute
927
+
928
+ return aws_batch_execute(
929
+ self,
930
+ params,
931
+ run_id=parent_run_id,
932
+ event=event,
933
+ ).make_info({"execution_time": time.monotonic() - ts})
934
+ elif self.runs_on.type == GCP_BATCH: # pragma: no cov
935
+ from .plugins.providers.gcs import gcp_batch_execute
936
+
937
+ return gcp_batch_execute(
938
+ self,
939
+ params,
940
+ run_id=parent_run_id,
941
+ event=event,
942
+ ).make_info({"execution_time": time.monotonic() - ts})
811
943
 
812
944
  trace.error(
813
945
  f"[JOB]: Execution not support runs-on: {self.runs_on.type.value!r} "
@@ -884,7 +1016,7 @@ def local_execute_strategy(
884
1016
 
885
1017
  :rtype: tuple[Status, DictData]
886
1018
  """
887
- trace: Trace = get_trace(
1019
+ trace: TraceManager = get_trace(
888
1020
  run_id, parent_run_id=parent_run_id, extras=job.extras
889
1021
  )
890
1022
  if strategy:
@@ -1020,7 +1152,7 @@ def local_execute(
1020
1152
  ts: float = time.monotonic()
1021
1153
  parent_run_id: StrOrNone = run_id
1022
1154
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1023
- trace: Trace = get_trace(
1155
+ trace: TraceManager = get_trace(
1024
1156
  run_id, parent_run_id=parent_run_id, extras=job.extras
1025
1157
  )
1026
1158
  context: DictData = {"status": WAIT}
@@ -1163,7 +1295,7 @@ def self_hosted_execute(
1163
1295
  """
1164
1296
  parent_run_id: StrOrNone = run_id
1165
1297
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1166
- trace: Trace = get_trace(
1298
+ trace: TraceManager = get_trace(
1167
1299
  run_id, parent_run_id=parent_run_id, extras=job.extras
1168
1300
  )
1169
1301
  context: DictData = {"status": WAIT}
@@ -1226,71 +1358,8 @@ def self_hosted_execute(
1226
1358
  )
1227
1359
 
1228
1360
 
1229
- def azure_batch_execute(
1230
- job: Job,
1231
- params: DictData,
1232
- *,
1233
- run_id: StrOrNone = None,
1234
- event: Optional[Event] = None,
1235
- ) -> Result: # pragma: no cov
1236
- """Azure Batch job execution that will run all job's stages on the Azure
1237
- Batch Node and extract the result file to be returning context result.
1238
-
1239
- Steps:
1240
- - Create a Batch account and a Batch pool.
1241
- - Create a Batch job and add tasks to the job. Each task represents a
1242
- command to run on a compute node.
1243
- - Specify the command to run the Python script in the task. You can use
1244
- the cmd /c command to run the script with the Python interpreter.
1245
- - Upload the Python script and any required input files to Azure Storage
1246
- Account.
1247
- - Configure the task to download the input files from Azure Storage to
1248
- the compute node before running the script.
1249
- - Monitor the job and retrieve the output files from Azure Storage.
1250
-
1251
- References:
1252
- - https://docs.azure.cn/en-us/batch/tutorial-parallel-python
1253
-
1254
- :param job:
1255
- :param params:
1256
- :param run_id:
1257
- :param event:
1258
-
1259
- :rtype: Result
1260
- """
1261
- parent_run_id: StrOrNone = run_id
1262
- run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1263
- trace: Trace = get_trace(
1264
- run_id, parent_run_id=parent_run_id, extras=job.extras
1265
- )
1266
- context: DictData = {"status": WAIT}
1267
- trace.info("[JOB]: Start Azure Batch executor.")
1268
-
1269
- if event and event.is_set():
1270
- return Result(
1271
- run_id=run_id,
1272
- parent_run_id=parent_run_id,
1273
- status=CANCEL,
1274
- context=catch(
1275
- context,
1276
- status=CANCEL,
1277
- updated={
1278
- "errors": JobCancelError(
1279
- "Execution was canceled from the event before start "
1280
- "self-hosted execution."
1281
- ).to_dict()
1282
- },
1283
- ),
1284
- extras=job.extras,
1285
- )
1286
- print(params)
1287
- return Result(
1288
- run_id=run_id,
1289
- parent_run_id=parent_run_id,
1290
- status=SUCCESS,
1291
- context=catch(context, status=SUCCESS),
1292
- extras=job.extras,
1293
- )
1361
+ # Azure Batch execution is now handled by the Azure Batch provider
1362
+ # See src/ddeutil/workflow/plugins/providers/az.py for implementation
1294
1363
 
1295
1364
 
1296
1365
  def docker_execution(
@@ -1309,7 +1378,7 @@ def docker_execution(
1309
1378
  """
1310
1379
  parent_run_id: StrOrNone = run_id
1311
1380
  run_id: str = gen_id((job.id or "EMPTY"), unique=True)
1312
- trace: Trace = get_trace(
1381
+ trace: TraceManager = get_trace(
1313
1382
  run_id, parent_run_id=parent_run_id, extras=job.extras
1314
1383
  )
1315
1384
  context: DictData = {"status": WAIT}