kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,697 @@
|
|
1
|
+
"""
|
2
|
+
Docker Runtime for Kailash Python SDK.
|
3
|
+
|
4
|
+
This module implements a Docker-based runtime for executing workflows where each
|
5
|
+
node runs in a separate Docker container. This enables scalable, isolated, and
|
6
|
+
reproducible workflow execution.
|
7
|
+
|
8
|
+
Key features:
|
9
|
+
- Container isolation for each node
|
10
|
+
- Resource constraints for execution
|
11
|
+
- Network communication between nodes
|
12
|
+
- Volume mounting for data exchange
|
13
|
+
- Orchestration of workflow execution
|
14
|
+
- Observability and monitoring
|
15
|
+
"""
|
16
|
+
|
17
|
+
import json
|
18
|
+
import logging
|
19
|
+
import os
|
20
|
+
import subprocess
|
21
|
+
import sys
|
22
|
+
import tempfile
|
23
|
+
from pathlib import Path
|
24
|
+
from typing import Any, Dict, Optional, Tuple
|
25
|
+
|
26
|
+
from kailash.nodes.base import Node
|
27
|
+
|
28
|
+
# BaseRuntime doesn't exist - we'll implement task tracking methods directly
|
29
|
+
from kailash.sdk_exceptions import NodeConfigurationError, NodeExecutionError
|
30
|
+
from kailash.tracking.manager import TaskManager
|
31
|
+
from kailash.workflow.graph import Workflow
|
32
|
+
|
33
|
+
logger = logging.getLogger(__name__)
|
34
|
+
|
35
|
+
|
36
|
+
class DockerNodeWrapper:
|
37
|
+
"""
|
38
|
+
Wrapper for running a Kailash node in a Docker container.
|
39
|
+
|
40
|
+
This class handles:
|
41
|
+
- Dockerfile generation
|
42
|
+
- Image building
|
43
|
+
- Container execution
|
44
|
+
- I/O mapping
|
45
|
+
- Result extraction
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
node: Node,
|
51
|
+
node_id: str,
|
52
|
+
base_image: str = "python:3.11-slim",
|
53
|
+
work_dir: Optional[Path] = None,
|
54
|
+
sdk_path: Optional[Path] = None,
|
55
|
+
):
|
56
|
+
"""
|
57
|
+
Initialize a Docker node wrapper.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
node: The Kailash node to containerize.
|
61
|
+
node_id: The ID of the node in the workflow.
|
62
|
+
base_image: Base Docker image to use.
|
63
|
+
work_dir: Working directory for Docker files. If None, a temp dir is created.
|
64
|
+
sdk_path: Path to the Kailash SDK source. If None, tries to determine it.
|
65
|
+
"""
|
66
|
+
self.node = node
|
67
|
+
self.node_id = node_id
|
68
|
+
self.base_image = base_image
|
69
|
+
|
70
|
+
# Create or use work directory
|
71
|
+
if work_dir:
|
72
|
+
self.work_dir = Path(work_dir)
|
73
|
+
self.work_dir.mkdir(parents=True, exist_ok=True)
|
74
|
+
self._created_temp_dir = False
|
75
|
+
else:
|
76
|
+
self.work_dir = Path(tempfile.mkdtemp(prefix=f"kailash_docker_{node_id}_"))
|
77
|
+
self._created_temp_dir = True
|
78
|
+
|
79
|
+
# Find SDK path if not provided
|
80
|
+
if sdk_path:
|
81
|
+
self.sdk_path = Path(sdk_path)
|
82
|
+
else:
|
83
|
+
# Try to find SDK path from node's module
|
84
|
+
module_path = Path(self.node.__class__.__module__.replace(".", "/"))
|
85
|
+
if module_path.parts and module_path.parts[0] == "kailash":
|
86
|
+
# Find the SDK path by walking up to find the src/kailash directory
|
87
|
+
module_file = sys.modules[self.node.__class__.__module__].__file__
|
88
|
+
if module_file:
|
89
|
+
file_path = Path(module_file)
|
90
|
+
for parent in file_path.parents:
|
91
|
+
if (parent / "kailash").exists() and "site-packages" not in str(
|
92
|
+
parent
|
93
|
+
):
|
94
|
+
self.sdk_path = parent.parent
|
95
|
+
break
|
96
|
+
|
97
|
+
if not hasattr(self, "sdk_path"):
|
98
|
+
raise NodeConfigurationError(
|
99
|
+
"Could not determine SDK path. Please provide it explicitly."
|
100
|
+
)
|
101
|
+
|
102
|
+
# Container properties
|
103
|
+
self.image_name = f"kailash-node-{self.node_id.lower()}"
|
104
|
+
self.container_name = f"kailash-{self.node_id.lower()}"
|
105
|
+
self.container_id = None
|
106
|
+
|
107
|
+
# I/O directories
|
108
|
+
self.input_dir = self.work_dir / "input"
|
109
|
+
self.output_dir = self.work_dir / "output"
|
110
|
+
self.input_dir.mkdir(exist_ok=True)
|
111
|
+
self.output_dir.mkdir(exist_ok=True)
|
112
|
+
|
113
|
+
def prepare_dockerfile(self) -> Path:
|
114
|
+
"""
|
115
|
+
Generate a Dockerfile for the node.
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Path to the generated Dockerfile.
|
119
|
+
"""
|
120
|
+
dockerfile_path = self.work_dir / "Dockerfile"
|
121
|
+
entrypoint_path = self.work_dir / "entrypoint.py"
|
122
|
+
node_config_path = self.work_dir / "node.json"
|
123
|
+
|
124
|
+
# Save node configuration
|
125
|
+
with open(node_config_path, "w") as f:
|
126
|
+
# Create a serializable representation of the node
|
127
|
+
node_data = {
|
128
|
+
"class": self.node.__class__.__name__,
|
129
|
+
"module": self.node.__class__.__module__,
|
130
|
+
"node_id": self.node_id,
|
131
|
+
"name": getattr(self.node, "name", self.node.__class__.__name__),
|
132
|
+
}
|
133
|
+
|
134
|
+
# Add parameters if available
|
135
|
+
if hasattr(self.node, "get_parameters"):
|
136
|
+
node_data["parameters"] = {}
|
137
|
+
for name, param in self.node.get_parameters().items():
|
138
|
+
node_data["parameters"][name] = {
|
139
|
+
"name": param.name,
|
140
|
+
"type": str(param.type),
|
141
|
+
"required": param.required,
|
142
|
+
"description": param.description,
|
143
|
+
}
|
144
|
+
|
145
|
+
json.dump(node_data, f, indent=2)
|
146
|
+
|
147
|
+
# Create entrypoint script
|
148
|
+
with open(entrypoint_path, "w") as f:
|
149
|
+
f.write(
|
150
|
+
"""#!/usr/bin/env python3
|
151
|
+
import os
|
152
|
+
import sys
|
153
|
+
import json
|
154
|
+
import logging
|
155
|
+
import importlib
|
156
|
+
from pathlib import Path
|
157
|
+
|
158
|
+
# Configure logging
|
159
|
+
logging.basicConfig(
|
160
|
+
level=logging.INFO,
|
161
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
162
|
+
)
|
163
|
+
logger = logging.getLogger("kailash_node")
|
164
|
+
|
165
|
+
def main():
|
166
|
+
\"\"\"Run a Kailash node in a Docker container.\"\"\"
|
167
|
+
logger.info("Starting node execution")
|
168
|
+
|
169
|
+
# Load node configuration
|
170
|
+
node_config_path = Path("/app/node.json")
|
171
|
+
with open(node_config_path, 'r') as f:
|
172
|
+
node_data = json.load(f)
|
173
|
+
|
174
|
+
logger.info(f"Loaded configuration for {node_data['class']} node")
|
175
|
+
|
176
|
+
# Load runtime inputs if available
|
177
|
+
input_path = Path("/data/input/inputs.json")
|
178
|
+
runtime_inputs = {}
|
179
|
+
if input_path.exists():
|
180
|
+
logger.info(f"Loading inputs from {input_path}")
|
181
|
+
with open(input_path, 'r') as f:
|
182
|
+
runtime_inputs = json.load(f)
|
183
|
+
|
184
|
+
# Dynamically import the node class
|
185
|
+
logger.info(f"Importing {node_data['module']}.{node_data['class']}")
|
186
|
+
try:
|
187
|
+
module = importlib.import_module(node_data['module'])
|
188
|
+
node_class = getattr(module, node_data['class'])
|
189
|
+
except (ImportError, AttributeError) as e:
|
190
|
+
logger.error(f"Failed to import node class: {e}")
|
191
|
+
return 1
|
192
|
+
|
193
|
+
# Create node instance
|
194
|
+
logger.info(f"Creating node instance: {node_data['name']}")
|
195
|
+
try:
|
196
|
+
node = node_class(name=node_data['name'])
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(f"Failed to create node instance: {e}")
|
199
|
+
return 1
|
200
|
+
|
201
|
+
# Execute node
|
202
|
+
logger.info(f"Executing node with inputs: {list(runtime_inputs.keys())}")
|
203
|
+
try:
|
204
|
+
result = node.run(**runtime_inputs)
|
205
|
+
logger.info("Node execution completed successfully")
|
206
|
+
except Exception as e:
|
207
|
+
logger.error(f"Node execution failed: {e}")
|
208
|
+
# Save error information
|
209
|
+
with open("/data/output/error.json", 'w') as f:
|
210
|
+
json.dump({
|
211
|
+
"error": str(e),
|
212
|
+
"type": e.__class__.__name__
|
213
|
+
}, f, indent=2)
|
214
|
+
return 1
|
215
|
+
|
216
|
+
# Save results
|
217
|
+
logger.info("Saving execution results")
|
218
|
+
try:
|
219
|
+
result_path = Path("/data/output/result.json")
|
220
|
+
with open(result_path, 'w') as f:
|
221
|
+
# Handle non-serializable objects with basic conversion
|
222
|
+
try:
|
223
|
+
json.dump(result, f, indent=2)
|
224
|
+
except TypeError:
|
225
|
+
logger.warning("Result not directly JSON serializable, converting to string")
|
226
|
+
json.dump({"result": str(result)}, f, indent=2)
|
227
|
+
|
228
|
+
logger.info(f"Results saved to {result_path}")
|
229
|
+
except Exception as e:
|
230
|
+
logger.error(f"Failed to save results: {e}")
|
231
|
+
return 1
|
232
|
+
|
233
|
+
logger.info(f"Node {node_data['name']} execution completed")
|
234
|
+
return 0
|
235
|
+
|
236
|
+
if __name__ == "__main__":
|
237
|
+
sys.exit(main())
|
238
|
+
"""
|
239
|
+
)
|
240
|
+
|
241
|
+
# Make entrypoint executable
|
242
|
+
os.chmod(entrypoint_path, 0o755)
|
243
|
+
|
244
|
+
# Create Dockerfile
|
245
|
+
with open(dockerfile_path, "w") as f:
|
246
|
+
f.write(
|
247
|
+
f"""FROM {self.base_image}
|
248
|
+
|
249
|
+
# Set working directory
|
250
|
+
WORKDIR /app
|
251
|
+
|
252
|
+
# Install system dependencies if needed
|
253
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \\
|
254
|
+
make build-essential \\
|
255
|
+
&& apt-get clean \\
|
256
|
+
&& rm -rf /var/lib/apt/lists/*
|
257
|
+
|
258
|
+
# Copy Kailash SDK
|
259
|
+
COPY sdk /app/sdk
|
260
|
+
|
261
|
+
# Install the SDK and its dependencies
|
262
|
+
RUN pip install --no-cache-dir -e /app/sdk
|
263
|
+
|
264
|
+
# Copy node configuration and entrypoint
|
265
|
+
COPY node.json /app/node.json
|
266
|
+
COPY entrypoint.py /app/entrypoint.py
|
267
|
+
|
268
|
+
# Create data directories
|
269
|
+
RUN mkdir -p /data/input /data/output
|
270
|
+
|
271
|
+
# Set entrypoint
|
272
|
+
ENTRYPOINT ["/app/entrypoint.py"]
|
273
|
+
"""
|
274
|
+
)
|
275
|
+
|
276
|
+
# Create SDK directory
|
277
|
+
sdk_dir = self.work_dir / "sdk"
|
278
|
+
sdk_dir.mkdir(exist_ok=True)
|
279
|
+
|
280
|
+
# Copy SDK files
|
281
|
+
self._copy_sdk_files(sdk_dir)
|
282
|
+
|
283
|
+
return dockerfile_path
|
284
|
+
|
285
|
+
def _copy_sdk_files(self, sdk_dir: Path):
|
286
|
+
"""
|
287
|
+
Copy SDK files to the Docker build context.
|
288
|
+
|
289
|
+
Args:
|
290
|
+
sdk_dir: Destination directory for SDK files.
|
291
|
+
"""
|
292
|
+
# Copy source directory
|
293
|
+
if (self.sdk_path / "src").exists():
|
294
|
+
import shutil
|
295
|
+
|
296
|
+
# Copy src directory
|
297
|
+
src_dir = self.sdk_path / "src"
|
298
|
+
shutil.copytree(src_dir, sdk_dir / "src", dirs_exist_ok=True)
|
299
|
+
|
300
|
+
# Copy setup files
|
301
|
+
for setup_file in ["setup.py", "pyproject.toml"]:
|
302
|
+
if (self.sdk_path / setup_file).exists():
|
303
|
+
shutil.copy(self.sdk_path / setup_file, sdk_dir / setup_file)
|
304
|
+
else:
|
305
|
+
raise NodeConfigurationError(
|
306
|
+
f"SDK source directory not found at {self.sdk_path}/src"
|
307
|
+
)
|
308
|
+
|
309
|
+
def build_image(self) -> str:
|
310
|
+
"""
|
311
|
+
Build the Docker image for the node.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
The name of the built Docker image.
|
315
|
+
"""
|
316
|
+
# Ensure Dockerfile exists
|
317
|
+
if not (self.work_dir / "Dockerfile").exists():
|
318
|
+
self.prepare_dockerfile()
|
319
|
+
|
320
|
+
logger.info(f"Building Docker image for node {self.node_id}")
|
321
|
+
|
322
|
+
try:
|
323
|
+
subprocess.run(
|
324
|
+
["docker", "build", "-t", self.image_name, "."],
|
325
|
+
cwd=self.work_dir,
|
326
|
+
check=True,
|
327
|
+
stdout=subprocess.PIPE,
|
328
|
+
stderr=subprocess.PIPE,
|
329
|
+
)
|
330
|
+
|
331
|
+
logger.info(f"Successfully built image: {self.image_name}")
|
332
|
+
return self.image_name
|
333
|
+
except subprocess.CalledProcessError as e:
|
334
|
+
error_msg = f"Failed to build Docker image for node {self.node_id}: {e.stderr.decode()}"
|
335
|
+
logger.error(error_msg)
|
336
|
+
raise RuntimeError(error_msg)
|
337
|
+
|
338
|
+
def prepare_inputs(self, inputs: Dict[str, Any]):
|
339
|
+
"""
|
340
|
+
Prepare inputs for node execution.
|
341
|
+
|
342
|
+
Args:
|
343
|
+
inputs: The inputs to pass to the node.
|
344
|
+
"""
|
345
|
+
input_file = self.input_dir / "inputs.json"
|
346
|
+
with open(input_file, "w") as f:
|
347
|
+
json.dump(inputs, f, indent=2)
|
348
|
+
|
349
|
+
def run_container(
|
350
|
+
self,
|
351
|
+
network: str = None,
|
352
|
+
env_vars: Dict[str, str] = None,
|
353
|
+
resource_limits: Dict[str, str] = None,
|
354
|
+
) -> bool:
|
355
|
+
"""
|
356
|
+
Run the node in a Docker container.
|
357
|
+
|
358
|
+
Args:
|
359
|
+
network: Docker network to use.
|
360
|
+
env_vars: Environment variables to pass to the container.
|
361
|
+
resource_limits: Resource limits (memory, CPU) for the container.
|
362
|
+
|
363
|
+
Returns:
|
364
|
+
True if container ran successfully.
|
365
|
+
"""
|
366
|
+
logger.info(f"Running node {self.node_id} in Docker container")
|
367
|
+
|
368
|
+
# Build command
|
369
|
+
cmd = ["docker", "run", "--rm"]
|
370
|
+
|
371
|
+
# Add container name
|
372
|
+
cmd.extend(["--name", self.container_name])
|
373
|
+
|
374
|
+
# Add network if specified
|
375
|
+
if network:
|
376
|
+
cmd.extend(["--network", network])
|
377
|
+
|
378
|
+
# Add environment variables
|
379
|
+
if env_vars:
|
380
|
+
for key, value in env_vars.items():
|
381
|
+
cmd.extend(["-e", f"{key}={value}"])
|
382
|
+
|
383
|
+
# Add resource limits
|
384
|
+
if resource_limits:
|
385
|
+
if "memory" in resource_limits:
|
386
|
+
cmd.extend(["--memory", resource_limits["memory"]])
|
387
|
+
if "cpu" in resource_limits:
|
388
|
+
cmd.extend(["--cpus", resource_limits["cpu"]])
|
389
|
+
|
390
|
+
# Add volume mounts for data
|
391
|
+
cmd.extend(
|
392
|
+
[
|
393
|
+
"-v",
|
394
|
+
f"{self.input_dir.absolute()}:/data/input",
|
395
|
+
"-v",
|
396
|
+
f"{self.output_dir.absolute()}:/data/output",
|
397
|
+
]
|
398
|
+
)
|
399
|
+
|
400
|
+
# Use the image
|
401
|
+
cmd.append(self.image_name)
|
402
|
+
|
403
|
+
try:
|
404
|
+
result = subprocess.run(
|
405
|
+
cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
406
|
+
)
|
407
|
+
|
408
|
+
logger.info(f"Container for node {self.node_id} ran successfully")
|
409
|
+
return True
|
410
|
+
except subprocess.CalledProcessError as e:
|
411
|
+
error_msg = f"Container for node {self.node_id} failed: {e.stderr.decode()}"
|
412
|
+
logger.error(error_msg)
|
413
|
+
|
414
|
+
# Check if there's an error file
|
415
|
+
error_file = self.output_dir / "error.json"
|
416
|
+
if error_file.exists():
|
417
|
+
with open(error_file, "r") as f:
|
418
|
+
error_data = json.load(f)
|
419
|
+
error_msg = f"Node execution error: {error_data.get('error', 'Unknown error')}"
|
420
|
+
|
421
|
+
raise NodeExecutionError(error_msg)
|
422
|
+
|
423
|
+
def get_results(self) -> Dict[str, Any]:
|
424
|
+
"""
|
425
|
+
Get the results of node execution.
|
426
|
+
|
427
|
+
Returns:
|
428
|
+
The node execution results.
|
429
|
+
"""
|
430
|
+
result_file = self.output_dir / "result.json"
|
431
|
+
if result_file.exists():
|
432
|
+
with open(result_file, "r") as f:
|
433
|
+
return json.load(f)
|
434
|
+
|
435
|
+
error_file = self.output_dir / "error.json"
|
436
|
+
if error_file.exists():
|
437
|
+
with open(error_file, "r") as f:
|
438
|
+
error_data = json.load(f)
|
439
|
+
raise NodeExecutionError(
|
440
|
+
f"Node {self.node_id} execution failed: {error_data.get('error', 'Unknown error')}"
|
441
|
+
)
|
442
|
+
|
443
|
+
return {"error": "No result or error file found"}
|
444
|
+
|
445
|
+
def cleanup(self):
|
446
|
+
"""Clean up resources created for this node."""
|
447
|
+
if self._created_temp_dir and self.work_dir.exists():
|
448
|
+
import shutil
|
449
|
+
|
450
|
+
shutil.rmtree(self.work_dir)
|
451
|
+
|
452
|
+
|
453
|
+
class DockerRuntime:
|
454
|
+
"""
|
455
|
+
Docker-based runtime for executing workflows.
|
456
|
+
|
457
|
+
This runtime executes each node in a separate Docker container,
|
458
|
+
handling dependencies, data passing, and workflow orchestration.
|
459
|
+
"""
|
460
|
+
|
461
|
+
def __init__(
|
462
|
+
self,
|
463
|
+
base_image: str = "python:3.11-slim",
|
464
|
+
network_name: str = "kailash-network",
|
465
|
+
work_dir: Optional[str] = None,
|
466
|
+
sdk_path: Optional[str] = None,
|
467
|
+
resource_limits: Optional[Dict[str, str]] = None,
|
468
|
+
task_manager: Optional[TaskManager] = None,
|
469
|
+
):
|
470
|
+
"""
|
471
|
+
Initialize the Docker runtime.
|
472
|
+
|
473
|
+
Args:
|
474
|
+
base_image: Base Docker image to use for nodes.
|
475
|
+
network_name: Docker network name for container communication.
|
476
|
+
work_dir: Working directory for Docker files.
|
477
|
+
sdk_path: Path to the Kailash SDK source.
|
478
|
+
resource_limits: Default resource limits for containers.
|
479
|
+
task_manager: Task manager for tracking workflow execution.
|
480
|
+
"""
|
481
|
+
self.task_manager = task_manager
|
482
|
+
|
483
|
+
self.base_image = base_image
|
484
|
+
self.network_name = network_name
|
485
|
+
self.resource_limits = resource_limits or {}
|
486
|
+
|
487
|
+
# Working directory
|
488
|
+
if work_dir:
|
489
|
+
self.work_dir = Path(work_dir)
|
490
|
+
self.work_dir.mkdir(parents=True, exist_ok=True)
|
491
|
+
else:
|
492
|
+
self.work_dir = Path(tempfile.mkdtemp(prefix="kailash_docker_runtime_"))
|
493
|
+
|
494
|
+
# SDK path
|
495
|
+
if sdk_path:
|
496
|
+
self.sdk_path = Path(sdk_path)
|
497
|
+
else:
|
498
|
+
# Try to find the SDK path
|
499
|
+
import kailash
|
500
|
+
|
501
|
+
kailash_path = Path(kailash.__file__).parent
|
502
|
+
|
503
|
+
# Check if we're in a development environment
|
504
|
+
if "site-packages" not in str(kailash_path):
|
505
|
+
# Development environment - use parent of src
|
506
|
+
self.sdk_path = kailash_path.parent.parent
|
507
|
+
else:
|
508
|
+
# Installed package - use package directory
|
509
|
+
self.sdk_path = kailash_path.parent
|
510
|
+
|
511
|
+
# Create Docker network
|
512
|
+
self._create_network()
|
513
|
+
|
514
|
+
# Track node wrappers
|
515
|
+
self.node_wrappers = {}
|
516
|
+
|
517
|
+
def _create_task_run(self, workflow: Workflow) -> Optional[str]:
|
518
|
+
"""Create a task run if task manager is available."""
|
519
|
+
if self.task_manager:
|
520
|
+
return self.task_manager.create_run(workflow.name)
|
521
|
+
return None
|
522
|
+
|
523
|
+
def _update_task_status(
|
524
|
+
self, run_id: Optional[str], node_id: str, status: str, output: Any = None
|
525
|
+
):
|
526
|
+
"""Update task status if task manager is available."""
|
527
|
+
if self.task_manager and run_id:
|
528
|
+
# For now, just update run status - task tracking needs more setup
|
529
|
+
if status == "failed":
|
530
|
+
error_msg = (
|
531
|
+
output.get("error", "Unknown error") if output else "Unknown error"
|
532
|
+
)
|
533
|
+
self.task_manager.update_run_status(run_id, "failed", error_msg)
|
534
|
+
|
535
|
+
def _complete_task_run(
|
536
|
+
self, run_id: Optional[str], status: str, result: Any = None
|
537
|
+
):
|
538
|
+
"""Complete task run if task manager is available."""
|
539
|
+
if self.task_manager and run_id:
|
540
|
+
if status == "completed":
|
541
|
+
self.task_manager.update_run_status(run_id, "completed")
|
542
|
+
else:
|
543
|
+
error_msg = (
|
544
|
+
result.get("error", "Unknown error") if result else "Unknown error"
|
545
|
+
)
|
546
|
+
self.task_manager.update_run_status(run_id, "failed", error_msg)
|
547
|
+
|
548
|
+
def _create_network(self):
|
549
|
+
"""Create a Docker network for container communication."""
|
550
|
+
try:
|
551
|
+
subprocess.run(
|
552
|
+
["docker", "network", "create", self.network_name],
|
553
|
+
check=True,
|
554
|
+
stdout=subprocess.PIPE,
|
555
|
+
stderr=subprocess.PIPE,
|
556
|
+
)
|
557
|
+
logger.info(f"Created Docker network: {self.network_name}")
|
558
|
+
except subprocess.CalledProcessError as e:
|
559
|
+
# Ignore if network already exists
|
560
|
+
if "already exists" not in e.stderr.decode():
|
561
|
+
error_msg = f"Failed to create Docker network: {e.stderr.decode()}"
|
562
|
+
logger.error(error_msg)
|
563
|
+
raise RuntimeError(error_msg)
|
564
|
+
|
565
|
+
def execute(
|
566
|
+
self,
|
567
|
+
workflow: Workflow,
|
568
|
+
inputs: Dict[str, Dict[str, Any]] = None,
|
569
|
+
node_resource_limits: Dict[str, Dict[str, str]] = None,
|
570
|
+
) -> Tuple[Dict[str, Dict[str, Any]], str]:
|
571
|
+
"""
|
572
|
+
Execute a workflow using Docker containers.
|
573
|
+
|
574
|
+
Args:
|
575
|
+
workflow: The workflow to execute.
|
576
|
+
inputs: The inputs for each node.
|
577
|
+
node_resource_limits: Resource limits for specific nodes.
|
578
|
+
|
579
|
+
Returns:
|
580
|
+
Tuple of (execution_results, run_id).
|
581
|
+
"""
|
582
|
+
# Create task run
|
583
|
+
run_id = self._create_task_run(workflow)
|
584
|
+
|
585
|
+
# Default inputs
|
586
|
+
inputs = inputs or {}
|
587
|
+
node_resource_limits = node_resource_limits or {}
|
588
|
+
|
589
|
+
try:
|
590
|
+
# Validate workflow
|
591
|
+
workflow.validate()
|
592
|
+
|
593
|
+
# Get execution order
|
594
|
+
execution_order = workflow.get_execution_order()
|
595
|
+
|
596
|
+
# Track results
|
597
|
+
results = {}
|
598
|
+
|
599
|
+
# Prepare all node wrappers and build images
|
600
|
+
logger.info("Preparing Docker containers for workflow execution")
|
601
|
+
for node_id, node in workflow.nodes.items():
|
602
|
+
self.node_wrappers[node_id] = DockerNodeWrapper(
|
603
|
+
node=node,
|
604
|
+
node_id=node_id,
|
605
|
+
base_image=self.base_image,
|
606
|
+
work_dir=self.work_dir / node_id,
|
607
|
+
sdk_path=self.sdk_path,
|
608
|
+
)
|
609
|
+
|
610
|
+
# Build image
|
611
|
+
self.node_wrappers[node_id].build_image()
|
612
|
+
|
613
|
+
# Execute nodes in order
|
614
|
+
logger.info(f"Executing workflow in order: {execution_order}")
|
615
|
+
for node_id in execution_order:
|
616
|
+
logger.info(f"Executing node: {node_id}")
|
617
|
+
|
618
|
+
# Get node wrapper
|
619
|
+
wrapper = self.node_wrappers[node_id]
|
620
|
+
|
621
|
+
# Update task status
|
622
|
+
self._update_task_status(run_id, node_id, "running")
|
623
|
+
|
624
|
+
# Get node inputs
|
625
|
+
node_inputs = inputs.get(node_id, {}).copy()
|
626
|
+
|
627
|
+
# Add inputs from upstream nodes
|
628
|
+
for upstream_id, mapping in workflow.connections.get(
|
629
|
+
node_id, {}
|
630
|
+
).items():
|
631
|
+
if upstream_id in results:
|
632
|
+
for dest_param, src_param in mapping.items():
|
633
|
+
if src_param in results[upstream_id]:
|
634
|
+
node_inputs[dest_param] = results[upstream_id][
|
635
|
+
src_param
|
636
|
+
]
|
637
|
+
|
638
|
+
# Prepare inputs
|
639
|
+
wrapper.prepare_inputs(node_inputs)
|
640
|
+
|
641
|
+
# Get resource limits for this node
|
642
|
+
resource_limits = None
|
643
|
+
if node_id in node_resource_limits:
|
644
|
+
resource_limits = node_resource_limits[node_id]
|
645
|
+
elif self.resource_limits:
|
646
|
+
resource_limits = self.resource_limits
|
647
|
+
|
648
|
+
# Run the container
|
649
|
+
success = wrapper.run_container(
|
650
|
+
network=self.network_name, resource_limits=resource_limits
|
651
|
+
)
|
652
|
+
|
653
|
+
# Get results
|
654
|
+
if success:
|
655
|
+
results[node_id] = wrapper.get_results()
|
656
|
+
self._update_task_status(
|
657
|
+
run_id, node_id, "completed", results[node_id]
|
658
|
+
)
|
659
|
+
else:
|
660
|
+
self._update_task_status(
|
661
|
+
run_id, node_id, "failed", {"error": "Execution failed"}
|
662
|
+
)
|
663
|
+
raise NodeExecutionError(f"Node {node_id} execution failed")
|
664
|
+
|
665
|
+
# Mark run as completed
|
666
|
+
self._complete_task_run(run_id, "completed")
|
667
|
+
|
668
|
+
return results, run_id
|
669
|
+
|
670
|
+
except Exception as e:
|
671
|
+
# Handle errors
|
672
|
+
logger.error(f"Workflow execution failed: {e}")
|
673
|
+
self._complete_task_run(run_id, "failed", {"error": str(e)})
|
674
|
+
raise
|
675
|
+
|
676
|
+
def cleanup(self):
|
677
|
+
"""Clean up Docker resources."""
|
678
|
+
# Clean up node wrappers
|
679
|
+
for wrapper in self.node_wrappers.values():
|
680
|
+
wrapper.cleanup()
|
681
|
+
|
682
|
+
# Remove Docker network
|
683
|
+
try:
|
684
|
+
subprocess.run(
|
685
|
+
["docker", "network", "rm", self.network_name],
|
686
|
+
check=False,
|
687
|
+
stdout=subprocess.PIPE,
|
688
|
+
stderr=subprocess.PIPE,
|
689
|
+
)
|
690
|
+
except Exception as e:
|
691
|
+
logger.warning(f"Failed to remove Docker network: {e}")
|
692
|
+
|
693
|
+
def __enter__(self):
|
694
|
+
return self
|
695
|
+
|
696
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
697
|
+
self.cleanup()
|