ddeutil-workflow 0.0.78__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,703 @@
1
+ # ------------------------------------------------------------------------------
2
+ # Copyright (c) 2022 Korawich Anuttra. All rights reserved.
3
+ # Licensed under the MIT License. See LICENSE in the project root for
4
+ # license information.
5
+ # ------------------------------------------------------------------------------
6
+ """Container Provider Module.
7
+
8
+ This module provides container-based execution for workflow jobs, enabling
9
+ workflow execution inside Docker containers on any self-hosted server.
10
+
11
+ The Container provider enables running workflow jobs in isolated container
12
+ environments, providing consistent execution environments across different
13
+ operating systems and infrastructure.
14
+
15
+ Key Features:
16
+ - Multi-OS container support (Ubuntu, Windows, Linux)
17
+ - Self-hosted server compatibility
18
+ - Isolated execution environments
19
+ - File volume mounting and sharing
20
+ - Result collection and error handling
21
+ - Resource cleanup and management
22
+
23
+ Classes:
24
+ ContainerProvider: Main provider for container operations
25
+ ContainerConfig: Configuration for container execution
26
+ VolumeConfig: Configuration for volume mounting
27
+ NetworkConfig: Configuration for container networking
28
+
29
+ Config Example:
30
+
31
+ ```yaml
32
+ jobs:
33
+ my-job:
34
+ runs-on:
35
+ type: "container"
36
+ with:
37
+ image: "ubuntu:20.04"
38
+ container_name: "workflow-{run_id}"
39
+ volumes:
40
+ - source: "/host/data"
41
+ target: "/container/data"
42
+ mode: "rw"
43
+ environment:
44
+ PYTHONPATH: "/app"
45
+ resources:
46
+ memory: "2g"
47
+ cpu: "2"
48
+ stages:
49
+ - name: "process"
50
+ type: "py"
51
+ run: |
52
+ # Your processing logic here
53
+ result.context.update({"output": "processed"})
54
+ ```
55
+
56
+ """
57
+ from __future__ import annotations
58
+
59
+ import json
60
+ from typing import Any, Optional, Union
61
+
62
+ try:
63
+ import docker
64
+
65
+ DOCKER_AVAILABLE = True
66
+ except ImportError:
67
+ DOCKER_AVAILABLE = False
68
+
69
+ from pydantic import BaseModel, Field
70
+
71
+ from ...__types import DictData
72
+ from ...job import Job
73
+ from ...result import FAILED, SUCCESS, Result
74
+ from ...traces import get_trace
75
+ from ...utils import gen_id
76
+
77
+
78
+ class VolumeConfig(BaseModel):
79
+ """Container volume configuration."""
80
+
81
+ source: str = Field(description="Host path to mount")
82
+ target: str = Field(description="Container path to mount to")
83
+ mode: str = Field(default="rw", description="Mount mode (ro/rw)")
84
+
85
+
86
+ class NetworkConfig(BaseModel):
87
+ """Container network configuration."""
88
+
89
+ network_name: Optional[str] = Field(
90
+ default=None, description="Network name"
91
+ )
92
+ network_mode: str = Field(default="bridge", description="Network mode")
93
+ ports: Optional[dict[str, str]] = Field(
94
+ default=None, description="Port mappings"
95
+ )
96
+
97
+
98
+ class ResourceConfig(BaseModel):
99
+ """Container resource configuration."""
100
+
101
+ memory: Optional[str] = Field(
102
+ default=None, description="Memory limit (e.g., '2g')"
103
+ )
104
+ cpu: Optional[Union[str, float]] = Field(
105
+ default=None, description="CPU limit"
106
+ )
107
+ cpuset_cpus: Optional[str] = Field(default=None, description="CPU set")
108
+ memswap_limit: Optional[str] = Field(
109
+ default=None, description="Memory swap limit"
110
+ )
111
+
112
+
113
+ class ContainerConfig(BaseModel):
114
+ """Container execution configuration."""
115
+
116
+ image: str = Field(description="Docker image to use")
117
+ container_name: Optional[str] = Field(
118
+ default=None, description="Container name"
119
+ )
120
+ volumes: Optional[list[VolumeConfig]] = Field(
121
+ default=None, description="Volume mounts"
122
+ )
123
+ environment: Optional[dict[str, str]] = Field(
124
+ default=None, description="Environment variables"
125
+ )
126
+ network: Optional[NetworkConfig] = Field(
127
+ default=None, description="Network configuration"
128
+ )
129
+ resources: Optional[ResourceConfig] = Field(
130
+ default=None, description="Resource limits"
131
+ )
132
+ working_dir: Optional[str] = Field(
133
+ default="/app", description="Working directory"
134
+ )
135
+ user: Optional[str] = Field(default=None, description="User to run as")
136
+ command: Optional[str] = Field(
137
+ default=None, description="Override default command"
138
+ )
139
+ timeout: int = Field(
140
+ default=3600, description="Execution timeout in seconds"
141
+ )
142
+ remove: bool = Field(
143
+ default=True, description="Remove container after execution"
144
+ )
145
+
146
+
147
+ class ContainerProvider:
148
+ """Container provider for workflow job execution.
149
+
150
+ This provider handles the complete lifecycle of container operations
151
+ including container creation, job execution, result collection, and
152
+ cleanup. It supports multiple operating systems and provides isolated
153
+ execution environments.
154
+
155
+ Attributes:
156
+ docker_client: Docker client for container operations
157
+ config: Container configuration
158
+ base_volumes: Base volume mounts for workflow files
159
+
160
+ Example:
161
+ ```python
162
+ provider = ContainerProvider(
163
+ image="ubuntu:20.04",
164
+ volumes=[
165
+ VolumeConfig(source="/host/data", target="/container/data")
166
+ ],
167
+ environment={"PYTHONPATH": "/app"}
168
+ )
169
+
170
+ result = provider.execute_job(job, params, run_id="job-123")
171
+ ```
172
+ """
173
+
174
+ def __init__(
175
+ self,
176
+ image: str,
177
+ container_name: Optional[str] = None,
178
+ volumes: Optional[list[VolumeConfig]] = None,
179
+ environment: Optional[dict[str, str]] = None,
180
+ network: Optional[NetworkConfig] = None,
181
+ resources: Optional[ResourceConfig] = None,
182
+ working_dir: str = "/app",
183
+ user: Optional[str] = None,
184
+ command: Optional[str] = None,
185
+ timeout: int = 3600,
186
+ remove: bool = True,
187
+ docker_host: Optional[str] = None,
188
+ ):
189
+ """Initialize Container provider.
190
+
191
+ Args:
192
+ image: Docker image to use
193
+ container_name: Container name
194
+ volumes: Volume mounts
195
+ environment: Environment variables
196
+ network: Network configuration
197
+ resources: Resource limits
198
+ working_dir: Working directory
199
+ user: User to run as
200
+ command: Override default command
201
+ timeout: Execution timeout
202
+ remove: Remove container after execution
203
+ docker_host: Docker host URL
204
+ """
205
+ if not DOCKER_AVAILABLE:
206
+ raise ImportError(
207
+ "Docker dependencies not available. "
208
+ "Install with: pip install docker"
209
+ )
210
+
211
+ self.config = ContainerConfig(
212
+ image=image,
213
+ container_name=container_name,
214
+ volumes=volumes or [],
215
+ environment=environment or {},
216
+ network=network,
217
+ resources=resources,
218
+ working_dir=working_dir,
219
+ user=user,
220
+ command=command,
221
+ timeout=timeout,
222
+ remove=remove,
223
+ )
224
+
225
+ # Initialize Docker client
226
+ self.docker_client = docker.from_env(base_url=docker_host)
227
+
228
+ # Base volumes for workflow files
229
+ self.base_volumes = []
230
+
231
+ def _create_workflow_volume(self, run_id: str) -> str:
232
+ """Create temporary volume for workflow files.
233
+
234
+ Args:
235
+ run_id: Execution run ID
236
+
237
+ Returns:
238
+ str: Volume name
239
+ """
240
+ volume_name = f"workflow-{run_id}"
241
+ try:
242
+ self.docker_client.volumes.get(volume_name)
243
+ except docker.errors.NotFound:
244
+ self.docker_client.volumes.create(name=volume_name)
245
+ return volume_name
246
+
247
+ def _prepare_container_volumes(self, run_id: str) -> list[dict[str, str]]:
248
+ """Prepare container volume mounts.
249
+
250
+ Args:
251
+ run_id: Execution run ID
252
+
253
+ Returns:
254
+ List[Dict[str, str]]: Volume mount configurations
255
+ """
256
+ volumes = []
257
+
258
+ # Add workflow volume
259
+ workflow_volume = self._create_workflow_volume(run_id)
260
+ volumes.append(
261
+ {"type": "volume", "source": workflow_volume, "target": "/workflow"}
262
+ )
263
+
264
+ # Add configured volumes
265
+ for volume in self.config.volumes or []:
266
+ volumes.append(
267
+ {
268
+ "type": "bind",
269
+ "source": volume.source,
270
+ "target": volume.target,
271
+ "read_only": volume.mode == "ro",
272
+ }
273
+ )
274
+
275
+ return volumes
276
+
277
+ def _prepare_environment(
278
+ self, run_id: str, job: Job, params: DictData
279
+ ) -> dict[str, str]:
280
+ """Prepare container environment variables.
281
+
282
+ Args:
283
+ run_id: Execution run ID
284
+ job: Job to execute
285
+ params: Job parameters
286
+
287
+ Returns:
288
+ Dict[str, str]: Environment variables
289
+ """
290
+ env = self.config.environment.copy()
291
+
292
+ # Add workflow-specific environment
293
+ env.update(
294
+ {
295
+ "WORKFLOW_RUN_ID": run_id,
296
+ "WORKFLOW_JOB_ID": job.id or "unknown",
297
+ "PYTHONPATH": "/workflow:/app",
298
+ "WORKFLOW_WORKING_DIR": "/workflow",
299
+ }
300
+ )
301
+
302
+ return env
303
+
304
+ def _create_task_script(
305
+ self, job: Job, params: DictData, run_id: str
306
+ ) -> str:
307
+ """Create Python script for task execution.
308
+
309
+ Args:
310
+ job: Job to execute
311
+ params: Job parameters
312
+ run_id: Execution run ID
313
+
314
+ Returns:
315
+ str: Script content
316
+ """
317
+ script_content = f"""
318
+ import json
319
+ import sys
320
+ import os
321
+ from pathlib import Path
322
+
323
+ # Add workflow directory to Python path
324
+ sys.path.insert(0, '/workflow')
325
+
326
+ from ddeutil.workflow.job import local_execute
327
+ from ddeutil.workflow import Job
328
+
329
+ # Change to workflow directory
330
+ os.chdir('/workflow')
331
+
332
+ # Load job configuration
333
+ with open('job_config.json', 'r') as f:
334
+ job_data = json.load(f)
335
+
336
+ # Load parameters
337
+ with open('params.json', 'r') as f:
338
+ params = json.load(f)
339
+
340
+ # Create job instance
341
+ job = Job(**job_data)
342
+
343
+ # Execute job
344
+ result = local_execute(job, params, run_id='{run_id}')
345
+
346
+ # Save result
347
+ with open('result.json', 'w') as f:
348
+ json.dump(result.model_dump(), f, indent=2)
349
+
350
+ # Exit with appropriate code
351
+ sys.exit(0 if result.status == 'success' else 1)
352
+ """
353
+ return script_content
354
+
355
+ def _upload_files_to_volume(
356
+ self, volume_name: str, job: Job, params: DictData, run_id: str
357
+ ) -> None:
358
+ """Upload files to container volume.
359
+
360
+ Args:
361
+ volume_name: Volume name
362
+ job: Job to execute
363
+ params: Job parameters
364
+ run_id: Execution run ID
365
+ """
366
+ # Create temporary container to write files
367
+ temp_container = self.docker_client.containers.run(
368
+ image="alpine:latest",
369
+ command="sh -c 'apk add --no-cache python3 && python3 -c \"import json; print('ready')\"'",
370
+ volumes={volume_name: {"bind": "/workflow", "mode": "rw"}},
371
+ detach=True,
372
+ remove=True,
373
+ )
374
+
375
+ try:
376
+ # Wait for container to be ready
377
+ temp_container.wait()
378
+
379
+ # Create task script
380
+ script_content = self._create_task_script(job, params, run_id)
381
+
382
+ # Write files to volume
383
+ exec_result = temp_container.exec_run(
384
+ cmd="sh -c 'cat > /workflow/task_script.py'", stdin=True
385
+ )
386
+ exec_result[1].write(script_content.encode())
387
+
388
+ # Write job configuration
389
+ job_config = json.dumps(job.model_dump(), indent=2)
390
+ exec_result = temp_container.exec_run(
391
+ cmd="sh -c 'cat > /workflow/job_config.json'", stdin=True
392
+ )
393
+ exec_result[1].write(job_config.encode())
394
+
395
+ # Write parameters
396
+ params_config = json.dumps(params, indent=2)
397
+ exec_result = temp_container.exec_run(
398
+ cmd="sh -c 'cat > /workflow/params.json'", stdin=True
399
+ )
400
+ exec_result[1].write(params_config.encode())
401
+
402
+ finally:
403
+ temp_container.stop()
404
+ temp_container.remove()
405
+
406
+ def _get_container_command(self) -> list[str]:
407
+ """Get container command to execute.
408
+
409
+ Returns:
410
+ List[str]: Command to execute
411
+ """
412
+ if self.config.command:
413
+ return ["sh", "-c", self.config.command]
414
+
415
+ # Default command to install ddeutil-workflow and run task
416
+ return [
417
+ "sh",
418
+ "-c",
419
+ "pip3 install ddeutil-workflow && python3 /workflow/task_script.py",
420
+ ]
421
+
422
+ def _wait_for_container_completion(
423
+ self, container, timeout: int
424
+ ) -> dict[str, Any]:
425
+ """Wait for container completion and return results.
426
+
427
+ Args:
428
+ container: Docker container
429
+ timeout: Timeout in seconds
430
+
431
+ Returns:
432
+ Dict[str, Any]: Container results
433
+ """
434
+ try:
435
+ # Wait for container to finish
436
+ result = container.wait(timeout=timeout)
437
+
438
+ # Get container logs
439
+ logs = container.logs().decode("utf-8")
440
+
441
+ # Get result file if it exists
442
+ result_data = {}
443
+ try:
444
+ result_file = container.exec_run("cat /workflow/result.json")
445
+ if result_file[0] == 0:
446
+ result_data = json.loads(result_file[1].decode("utf-8"))
447
+ except Exception:
448
+ pass
449
+
450
+ return {
451
+ "status": (
452
+ "completed" if result["StatusCode"] == 0 else "failed"
453
+ ),
454
+ "exit_code": result["StatusCode"],
455
+ "logs": logs,
456
+ "result_data": result_data,
457
+ }
458
+
459
+ except Exception as e:
460
+ return {
461
+ "status": "error",
462
+ "error": str(e),
463
+ "logs": container.logs().decode("utf-8") if container else "",
464
+ }
465
+
466
+ def execute_job(
467
+ self,
468
+ job: Job,
469
+ params: DictData,
470
+ *,
471
+ run_id: Optional[str] = None,
472
+ event: Optional[Any] = None,
473
+ ) -> Result:
474
+ """Execute job in container.
475
+
476
+ Args:
477
+ job: Job to execute
478
+ params: Job parameters
479
+ run_id: Execution run ID
480
+ event: Event for cancellation
481
+
482
+ Returns:
483
+ Result: Execution result
484
+ """
485
+ if event and event.is_set():
486
+ return Result(
487
+ status=FAILED,
488
+ context={
489
+ "errors": {"message": "Execution was canceled before start"}
490
+ },
491
+ run_id=run_id or gen_id("container"),
492
+ )
493
+
494
+ # Generate run ID if not provided
495
+ if not run_id:
496
+ run_id = gen_id(job.id or "container", unique=True)
497
+
498
+ trace = get_trace(run_id, extras=job.extras)
499
+ trace.info(f"[CONTAINER]: Starting job execution: {job.id}")
500
+
501
+ container = None
502
+ volume_name = None
503
+
504
+ try:
505
+ # Create workflow volume
506
+ volume_name = self._create_workflow_volume(run_id)
507
+ trace.info(f"[CONTAINER]: Created workflow volume: {volume_name}")
508
+
509
+ # Upload files to volume
510
+ trace.info("[CONTAINER]: Uploading files to volume")
511
+ self._upload_files_to_volume(volume_name, job, params, run_id)
512
+
513
+ # Prepare container configuration
514
+ container_name = self.config.container_name or f"workflow-{run_id}"
515
+ volumes = self._prepare_container_volumes(run_id)
516
+ environment = self._prepare_environment(run_id, job, params)
517
+ command = self._get_container_command()
518
+
519
+ # Prepare host config
520
+ host_config = self.docker_client.api.create_host_config(
521
+ volumes=volumes,
522
+ mem_limit=(
523
+ self.config.resources.memory
524
+ if self.config.resources
525
+ else None
526
+ ),
527
+ cpu_period=100000,
528
+ cpu_quota=(
529
+ int(float(self.config.resources.cpu) * 100000)
530
+ if self.config.resources and self.config.resources.cpu
531
+ else None
532
+ ),
533
+ cpuset_cpus=(
534
+ self.config.resources.cpuset_cpus
535
+ if self.config.resources
536
+ else None
537
+ ),
538
+ memswap_limit=(
539
+ self.config.resources.memswap_limit
540
+ if self.config.resources
541
+ else None
542
+ ),
543
+ network_mode=(
544
+ self.config.network.network_mode
545
+ if self.config.network
546
+ else None
547
+ ),
548
+ port_bindings=(
549
+ self.config.network.ports if self.config.network else None
550
+ ),
551
+ user=self.config.user,
552
+ )
553
+
554
+ # Create and start container
555
+ trace.info(f"[CONTAINER]: Creating container: {container_name}")
556
+ container = self.docker_client.containers.run(
557
+ image=self.config.image,
558
+ name=container_name,
559
+ command=command,
560
+ environment=environment,
561
+ working_dir=self.config.working_dir,
562
+ host_config=host_config,
563
+ detach=True,
564
+ remove=self.config.remove,
565
+ )
566
+
567
+ # Wait for completion
568
+ trace.info("[CONTAINER]: Waiting for container completion")
569
+ result = self._wait_for_container_completion(
570
+ container, self.config.timeout
571
+ )
572
+
573
+ # Process results
574
+ if result["status"] == "completed":
575
+ trace.info("[CONTAINER]: Container completed successfully")
576
+ return Result(
577
+ status=SUCCESS,
578
+ context=result.get("result_data", {}),
579
+ run_id=run_id,
580
+ extras=job.extras,
581
+ )
582
+ else:
583
+ error_msg = (
584
+ f"Container failed: {result.get('error', 'unknown error')}"
585
+ )
586
+ trace.error(f"[CONTAINER]: {error_msg}")
587
+ return Result(
588
+ status=FAILED,
589
+ context={
590
+ "errors": {"message": error_msg},
591
+ "logs": result.get("logs", ""),
592
+ },
593
+ run_id=run_id,
594
+ extras=job.extras,
595
+ )
596
+
597
+ except Exception as e:
598
+ trace.error(f"[CONTAINER]: Execution failed: {str(e)}")
599
+ return Result(
600
+ status=FAILED,
601
+ context={"errors": {"message": str(e)}},
602
+ run_id=run_id,
603
+ extras=job.extras,
604
+ )
605
+
606
+ finally:
607
+ # Cleanup
608
+ if container and not self.config.remove:
609
+ try:
610
+ container.stop()
611
+ container.remove()
612
+ except Exception:
613
+ pass
614
+
615
+ def cleanup(self, run_id: Optional[str] = None) -> None:
616
+ """Clean up container resources.
617
+
618
+ Args:
619
+ run_id: Run ID to clean up (if None, cleans up all workflow resources)
620
+ """
621
+ try:
622
+ if run_id:
623
+ # Clean up specific run
624
+ volume_name = f"workflow-{run_id}"
625
+ try:
626
+ volume = self.docker_client.volumes.get(volume_name)
627
+ volume.remove()
628
+ except Exception:
629
+ pass
630
+
631
+ # Clean up container if it exists
632
+ container_name = f"workflow-{run_id}"
633
+ try:
634
+ container = self.docker_client.containers.get(
635
+ container_name
636
+ )
637
+ container.stop()
638
+ container.remove()
639
+ except Exception:
640
+ pass
641
+ else:
642
+ # Clean up all workflow resources
643
+ volumes = self.docker_client.volumes.list()
644
+ for volume in volumes:
645
+ if volume.name.startswith("workflow-"):
646
+ volume.remove()
647
+
648
+ containers = self.docker_client.containers.list(all=True)
649
+ for container in containers:
650
+ if container.name.startswith("workflow-"):
651
+ container.stop()
652
+ container.remove()
653
+
654
+ except Exception:
655
+ pass
656
+
657
+
658
+ def container_execute(
659
+ job: Job,
660
+ params: DictData,
661
+ *,
662
+ run_id: Optional[str] = None,
663
+ event: Optional[Any] = None,
664
+ ) -> Result:
665
+ """Container job execution function.
666
+
667
+ This function creates a Container provider and executes the job
668
+ inside a Docker container. It handles the complete lifecycle
669
+ including container creation, job execution, and cleanup.
670
+
671
+ Args:
672
+ job: Job to execute
673
+ params: Job parameters
674
+ run_id: Execution run ID
675
+ event: Event for cancellation
676
+
677
+ Returns:
678
+ Result: Execution result
679
+ """
680
+ # Extract container configuration from job
681
+ container_args = job.runs_on.args
682
+
683
+ provider = ContainerProvider(
684
+ image=container_args.image,
685
+ container_name=container_args.container_name,
686
+ volumes=container_args.volumes,
687
+ environment=container_args.environment,
688
+ network=container_args.network,
689
+ resources=container_args.resources,
690
+ working_dir=container_args.working_dir,
691
+ user=container_args.user,
692
+ command=container_args.command,
693
+ timeout=container_args.timeout,
694
+ remove=container_args.remove,
695
+ docker_host=container_args.docker_host,
696
+ )
697
+
698
+ try:
699
+ return provider.execute_job(job, params, run_id=run_id, event=event)
700
+ finally:
701
+ # Clean up resources
702
+ if run_id:
703
+ provider.cleanup(run_id)