interloper-docker 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interloper_docker-0.2.0/PKG-INFO +14 -0
- interloper_docker-0.2.0/README.md +3 -0
- interloper_docker-0.2.0/pyproject.toml +46 -0
- interloper_docker-0.2.0/src/interloper_docker/__init__.py +9 -0
- interloper_docker-0.2.0/src/interloper_docker/backfiller.py +336 -0
- interloper_docker-0.2.0/src/interloper_docker/runner.py +253 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: interloper-docker
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Interloper Docker integration
|
|
5
|
+
Author: Guillaume Onfroy
|
|
6
|
+
Author-email: Guillaume Onfroy <guillaume@digitlcloud.com>
|
|
7
|
+
Requires-Dist: docker>=7.1.0
|
|
8
|
+
Requires-Dist: interloper-core
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# interloper-docker
|
|
13
|
+
|
|
14
|
+
Docker execution support for Interloper.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# ###############
|
|
2
|
+
# PROJECT / UV
|
|
3
|
+
# ###############
|
|
4
|
+
[project]
|
|
5
|
+
name = "interloper-docker"
|
|
6
|
+
version = "0.2.0"
|
|
7
|
+
description = "Interloper Docker integration"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
authors = [{ name = "Guillaume Onfroy", email = "guillaume@digitlcloud.com" }]
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"docker>=7.1.0",
|
|
13
|
+
"interloper-core",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["uv_build>=0.9.5,<0.10.0"]
|
|
20
|
+
build-backend = "uv_build"
|
|
21
|
+
|
|
22
|
+
[tool.uv.sources]
|
|
23
|
+
interloper-core = { workspace = true }
|
|
24
|
+
|
|
25
|
+
# ###############
|
|
26
|
+
# RUFF
|
|
27
|
+
# ###############
|
|
28
|
+
[tool.ruff]
|
|
29
|
+
line-length = 120
|
|
30
|
+
|
|
31
|
+
[tool.ruff.lint]
|
|
32
|
+
extend-select = ["E", "I", "UP", "ANN001", "ANN201", "ANN202"]
|
|
33
|
+
|
|
34
|
+
[tool.ruff.lint.per-file-ignores]
|
|
35
|
+
"__init__.py" = ["F401", "F403"]
|
|
36
|
+
"**/schemas/**" = ["E501"]
|
|
37
|
+
"tests/**" = ["ANN", "F811"]
|
|
38
|
+
|
|
39
|
+
# ###############
|
|
40
|
+
# PYRIGHT
|
|
41
|
+
# ###############
|
|
42
|
+
[tool.pyright]
|
|
43
|
+
include = ["src"]
|
|
44
|
+
typeCheckingMode = "basic"
|
|
45
|
+
reportMissingParameterType = true
|
|
46
|
+
ignore = ["libs/**", "tests/**", "scripts/**"]
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Docker Backfiller implementation for Interloper.
|
|
2
|
+
|
|
3
|
+
This backfiller starts a Docker container and invokes the Interloper CLI inside it
|
|
4
|
+
using an inline JSON config. It runs the entire DAG in the container, delegating
|
|
5
|
+
asset scheduling to the configured backfiller in the inline config (typically in_process).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import threading
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from time import sleep
|
|
13
|
+
|
|
14
|
+
import docker
|
|
15
|
+
from docker.errors import NotFound
|
|
16
|
+
from docker.models.containers import Container
|
|
17
|
+
from interloper.backfillers.base import Backfiller
|
|
18
|
+
from interloper.cli.config import Config
|
|
19
|
+
from interloper.dag.base import DAG
|
|
20
|
+
from interloper.errors import PartitionError
|
|
21
|
+
from interloper.events.base import Event, EventBus, parse_event_from_log_line
|
|
22
|
+
from interloper.partitioning.base import Partition, PartitionWindow
|
|
23
|
+
from interloper.partitioning.time import TimePartition, TimePartitionWindow
|
|
24
|
+
from interloper.runners.base import Runner
|
|
25
|
+
from interloper.runners.results import ExecutionStatus, RunResult
|
|
26
|
+
from interloper.serialization.backfiller import BackfillerSpec
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DockerBackfiller(Backfiller[Container]):
|
|
30
|
+
"""Run an Interloper DAG inside a Docker container via the Interloper CLI.
|
|
31
|
+
|
|
32
|
+
The image must contain the `interloper` package (CLI available on PATH).
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
image: str,
|
|
38
|
+
env_vars: dict[str, str] | None = None,
|
|
39
|
+
max_containers: int = 1,
|
|
40
|
+
runner: Runner | None = None,
|
|
41
|
+
volumes: dict[str, dict[str, str]] | list[str] | None = None,
|
|
42
|
+
dind: bool = False,
|
|
43
|
+
on_event: Callable[[Event], None] | None = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Initialize the DockerBackfiller.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
image: Docker image to use
|
|
49
|
+
env_vars: Environment variables to pass to the container
|
|
50
|
+
max_containers: Maximum number of concurrent containers (default 1)
|
|
51
|
+
runner: Runner to use for running assets
|
|
52
|
+
volumes: Volume mounts for the container
|
|
53
|
+
dind: If True, mount the Docker socket to enable Docker-in-Docker
|
|
54
|
+
on_event: Optional event handler for lifecycle events
|
|
55
|
+
"""
|
|
56
|
+
super().__init__(runner=runner, on_event=on_event)
|
|
57
|
+
|
|
58
|
+
# Force the runner to re-raise exceptions to make sure the container's exit code is propagated.
|
|
59
|
+
self.runner._reraise = True
|
|
60
|
+
|
|
61
|
+
self._image = image
|
|
62
|
+
self._env_vars = env_vars or {}
|
|
63
|
+
self._max_containers = max_containers
|
|
64
|
+
self._volumes = volumes or {}
|
|
65
|
+
self._dind = dind
|
|
66
|
+
self._docker = docker.from_env()
|
|
67
|
+
|
|
68
|
+
# Track log streaming threads for cleanup
|
|
69
|
+
self._log_threads: dict[str, threading.Thread] = {}
|
|
70
|
+
self._stop_log_streaming = threading.Event()
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def _capacity(self) -> int:
|
|
74
|
+
"""Maximum number of concurrent containers."""
|
|
75
|
+
return self._max_containers
|
|
76
|
+
|
|
77
|
+
def _on_start(self) -> None:
|
|
78
|
+
self._stop_log_streaming.clear()
|
|
79
|
+
|
|
80
|
+
def _on_end(self) -> None:
|
|
81
|
+
# Signal all log streaming threads to stop
|
|
82
|
+
self._stop_log_streaming.set()
|
|
83
|
+
|
|
84
|
+
# Wait for threads to finish
|
|
85
|
+
for thread in self._log_threads.values():
|
|
86
|
+
thread.join(timeout=2.0)
|
|
87
|
+
self._log_threads.clear()
|
|
88
|
+
|
|
89
|
+
def _build_command(
|
|
90
|
+
self,
|
|
91
|
+
dag: DAG,
|
|
92
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
93
|
+
backfill_id: str,
|
|
94
|
+
) -> list[str]:
|
|
95
|
+
"""Build the CLI command for a partition.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
dag: The DAG to execute
|
|
99
|
+
partition_or_window: The partition or window
|
|
100
|
+
backfill_id: The backfill ID
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Command list for the container
|
|
104
|
+
"""
|
|
105
|
+
config = Config(dag=dag, runner=self.runner)
|
|
106
|
+
|
|
107
|
+
cmd = [
|
|
108
|
+
"interloper",
|
|
109
|
+
"run",
|
|
110
|
+
"--format=inline",
|
|
111
|
+
f"--backfill-id={backfill_id}",
|
|
112
|
+
config.to_json(),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
if partition_or_window is None:
|
|
116
|
+
return cmd
|
|
117
|
+
|
|
118
|
+
if isinstance(partition_or_window, TimePartition):
|
|
119
|
+
cmd.extend(["--date", partition_or_window.value.strftime("%Y-%m-%d")])
|
|
120
|
+
elif isinstance(partition_or_window, TimePartitionWindow):
|
|
121
|
+
cmd.extend(
|
|
122
|
+
[
|
|
123
|
+
"--start-date",
|
|
124
|
+
partition_or_window.start.strftime("%Y-%m-%d"),
|
|
125
|
+
"--end-date",
|
|
126
|
+
partition_or_window.end.strftime("%Y-%m-%d"),
|
|
127
|
+
]
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
raise PartitionError("Unsupported partition or window type")
|
|
131
|
+
return cmd
|
|
132
|
+
|
|
133
|
+
def _build_env(self) -> dict[str, str]:
|
|
134
|
+
"""Build the environment variables for the container."""
|
|
135
|
+
env = dict(self._env_vars)
|
|
136
|
+
# Enable log-based event streaming
|
|
137
|
+
env["INTERLOPER_EVENTS_TO_STDERR"] = "true"
|
|
138
|
+
return env
|
|
139
|
+
|
|
140
|
+
def _build_volumes(self) -> dict[str, dict[str, str]]:
|
|
141
|
+
"""Build the volume mounts for the container."""
|
|
142
|
+
volumes = {}
|
|
143
|
+
if isinstance(self._volumes, dict):
|
|
144
|
+
volumes.update(self._volumes)
|
|
145
|
+
elif isinstance(self._volumes, list):
|
|
146
|
+
for volume in self._volumes:
|
|
147
|
+
volumes[volume.split(":")[0]] = {"bind": volume.split(":")[1], "mode": "rw"}
|
|
148
|
+
if self._dind:
|
|
149
|
+
volumes["/var/run/docker.sock"] = {"bind": "/var/run/docker.sock", "mode": "rw"}
|
|
150
|
+
return volumes
|
|
151
|
+
|
|
152
|
+
def _build_name(self, partition_or_window: Partition | PartitionWindow | None) -> str:
|
|
153
|
+
"""Build the name for the container."""
|
|
154
|
+
name = f"interloper_backfill_{self.state.backfill_id[:8]}"
|
|
155
|
+
if partition_or_window is not None:
|
|
156
|
+
name += f"-{partition_or_window}"
|
|
157
|
+
return name.replace(":", "-").replace("_", "-").lower()
|
|
158
|
+
|
|
159
|
+
def _start_log_streaming(self, container: Container) -> None:
|
|
160
|
+
"""Start a background thread to stream logs and parse events from a container.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
container: The Docker container to stream logs from
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def stream_logs() -> None:
|
|
167
|
+
try:
|
|
168
|
+
# Stream logs from the container (both stdout and stderr)
|
|
169
|
+
for log_line in container.logs(stream=True, follow=True, stdout=True, stderr=True):
|
|
170
|
+
if self._stop_log_streaming.is_set():
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
line = log_line.decode("utf-8", errors="ignore")
|
|
175
|
+
event = parse_event_from_log_line(line)
|
|
176
|
+
if event is not None:
|
|
177
|
+
EventBus.get_instance().emit(event)
|
|
178
|
+
except Exception:
|
|
179
|
+
# Ignore parsing errors, continue streaming
|
|
180
|
+
pass
|
|
181
|
+
except Exception:
|
|
182
|
+
# Container may have been removed or stopped
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
thread = threading.Thread(target=stream_logs, daemon=True)
|
|
186
|
+
thread.start()
|
|
187
|
+
if container.id is not None:
|
|
188
|
+
self._log_threads[container.id] = thread
|
|
189
|
+
|
|
190
|
+
def _stop_container_log_streaming(self, container: Container) -> None:
|
|
191
|
+
"""Stop and clean up the log streaming thread for a container.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
container: The Docker container to stop streaming for
|
|
195
|
+
"""
|
|
196
|
+
if container.id is None:
|
|
197
|
+
return
|
|
198
|
+
thread = self._log_threads.pop(container.id, None)
|
|
199
|
+
if thread is not None:
|
|
200
|
+
# Thread will stop on next iteration due to container exit
|
|
201
|
+
thread.join(timeout=1.0)
|
|
202
|
+
|
|
203
|
+
def _submit_run(
|
|
204
|
+
self,
|
|
205
|
+
dag: DAG,
|
|
206
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
207
|
+
) -> Container:
|
|
208
|
+
"""Submit execution of a run in a Docker container.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
dag: The DAG to execute
|
|
212
|
+
partition_or_window: Either a Partition or PartitionWindow object
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
The container as the handle
|
|
216
|
+
"""
|
|
217
|
+
cmd = self._build_command(dag, partition_or_window, self.state.backfill_id)
|
|
218
|
+
env = self._build_env()
|
|
219
|
+
volumes = self._build_volumes()
|
|
220
|
+
name = self._build_name(partition_or_window)
|
|
221
|
+
|
|
222
|
+
self.state.mark_run_running(partition_or_window)
|
|
223
|
+
|
|
224
|
+
container = self._docker.containers.run(
|
|
225
|
+
image=self._image,
|
|
226
|
+
name=name,
|
|
227
|
+
command=cmd,
|
|
228
|
+
environment=env,
|
|
229
|
+
volumes=volumes if volumes else None,
|
|
230
|
+
remove=False,
|
|
231
|
+
detach=True,
|
|
232
|
+
stdout=True,
|
|
233
|
+
stderr=True,
|
|
234
|
+
)
|
|
235
|
+
# Store partition in container object for _wait_any
|
|
236
|
+
setattr(container, "_interloper_partition", partition_or_window)
|
|
237
|
+
|
|
238
|
+
# Start log streaming for event collection
|
|
239
|
+
self._start_log_streaming(container)
|
|
240
|
+
|
|
241
|
+
return container
|
|
242
|
+
|
|
243
|
+
def _wait_any(self, handles: list[Container]) -> Container:
|
|
244
|
+
"""Wait for any container to complete by polling.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
handles: List of container objects to wait for
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
The container that completed
|
|
251
|
+
"""
|
|
252
|
+
while True:
|
|
253
|
+
for container in handles:
|
|
254
|
+
container.reload()
|
|
255
|
+
|
|
256
|
+
if container.status in ("exited", "dead"):
|
|
257
|
+
# Stop log streaming for this container
|
|
258
|
+
self._stop_container_log_streaming(container)
|
|
259
|
+
|
|
260
|
+
result = container.wait()
|
|
261
|
+
status_code = result.get("StatusCode", 1)
|
|
262
|
+
|
|
263
|
+
# Get partition from container object
|
|
264
|
+
partition = getattr(container, "_interloper_partition", None)
|
|
265
|
+
|
|
266
|
+
if status_code == 0:
|
|
267
|
+
# TODO: This is not the true RunResult, we need to get it from the container?
|
|
268
|
+
# Missing the asset_executions.
|
|
269
|
+
result = RunResult(partition, ExecutionStatus.COMPLETED)
|
|
270
|
+
self.state.mark_run_completed(partition, result)
|
|
271
|
+
else:
|
|
272
|
+
self.state.mark_run_failed(partition, f"Container exited with code {status_code}")
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
logs = container.logs(stdout=True, stderr=True)
|
|
276
|
+
if logs:
|
|
277
|
+
print("=============== START OF RUN CONTAINER LOGS ==================")
|
|
278
|
+
print(logs.decode("utf-8", errors="ignore"))
|
|
279
|
+
print("================ END OF RUN CONTAINER LOGS ===================")
|
|
280
|
+
except Exception:
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
# Remove the container after processing
|
|
284
|
+
try:
|
|
285
|
+
container.remove()
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print(f"Error removing container {container.id}: {e}")
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
return container
|
|
291
|
+
|
|
292
|
+
sleep(1.0)
|
|
293
|
+
|
|
294
|
+
def _cancel_all(self, handles: list[Container]) -> None:
|
|
295
|
+
"""Best-effort cancellation of outstanding containers.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
handles: List of container objects to cancel
|
|
299
|
+
"""
|
|
300
|
+
for container in handles:
|
|
301
|
+
partition = getattr(container, "_interloper_partition", None)
|
|
302
|
+
|
|
303
|
+
# Stop log streaming for this container
|
|
304
|
+
self._stop_container_log_streaming(container)
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
container.stop(timeout=5)
|
|
308
|
+
except NotFound:
|
|
309
|
+
# Container already removed, mark as cancelled
|
|
310
|
+
if partition is not None:
|
|
311
|
+
self.state.mark_run_cancelled(partition)
|
|
312
|
+
except Exception:
|
|
313
|
+
try:
|
|
314
|
+
container.kill()
|
|
315
|
+
except NotFound:
|
|
316
|
+
# Container already removed
|
|
317
|
+
self.state.mark_run_cancelled(partition)
|
|
318
|
+
except Exception:
|
|
319
|
+
pass
|
|
320
|
+
else:
|
|
321
|
+
# Only mark as cancelled if we successfully stopped/killed
|
|
322
|
+
if partition is not None:
|
|
323
|
+
self.state.mark_run_cancelled(partition)
|
|
324
|
+
|
|
325
|
+
def to_spec(self) -> BackfillerSpec:
|
|
326
|
+
"""Convert to serializable spec."""
|
|
327
|
+
return BackfillerSpec(
|
|
328
|
+
path=self.path,
|
|
329
|
+
init=dict(
|
|
330
|
+
image=self._image,
|
|
331
|
+
env_vars=self._env_vars,
|
|
332
|
+
volumes=self._volumes,
|
|
333
|
+
max_containers=self._max_containers,
|
|
334
|
+
dind=self._dind,
|
|
335
|
+
),
|
|
336
|
+
)
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""Docker-based runner that runs each asset in its own container.
|
|
2
|
+
|
|
3
|
+
Each submitted asset is executed inside a fresh container. To allow an asset
|
|
4
|
+
to resolve its upstream dependencies from IO without recomputing them, we pass
|
|
5
|
+
to the container a mini-DAG consisting of the target asset plus all its
|
|
6
|
+
upstream ancestors. The container runs the Interloper CLI with an inline
|
|
7
|
+
config, similar to the `DockerBackfiller`.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
|
|
14
|
+
import docker
|
|
15
|
+
from docker.models.containers import Container
|
|
16
|
+
from interloper.assets.base import Asset
|
|
17
|
+
from interloper.cli.config import Config
|
|
18
|
+
from interloper.dag.base import DAG
|
|
19
|
+
from interloper.errors import PartitionError, RunnerError
|
|
20
|
+
from interloper.events.base import Event
|
|
21
|
+
from interloper.partitioning.base import Partition, PartitionWindow
|
|
22
|
+
from interloper.partitioning.time import TimePartition, TimePartitionWindow
|
|
23
|
+
from interloper.runners.base import Runner
|
|
24
|
+
from interloper.serialization.runner import RunnerSpec
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DockerRunner(Runner[Container]):
|
|
28
|
+
"""Execute assets as individual Docker containers.
|
|
29
|
+
|
|
30
|
+
For each asset, constructs a mini-DAG comprising the asset and all its
|
|
31
|
+
upstream ancestors. The mini-DAG is sent to the container via inline JSON.
|
|
32
|
+
Inside the container, all non-target assets are marked as
|
|
33
|
+
`materializable=False` prior to execution to avoid recomputation while
|
|
34
|
+
still enabling IO-based dependency resolution.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
image: str,
|
|
40
|
+
max_containers: int = 4,
|
|
41
|
+
env_vars: dict[str, str] | None = None,
|
|
42
|
+
volumes: dict[str, dict[str, str]] | list[str] | None = None,
|
|
43
|
+
fail_fast: bool = False,
|
|
44
|
+
reraise: bool = False,
|
|
45
|
+
on_event: Callable[[Event], None] | None = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""Initialize the DockerRunner.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
image: Docker image to use for container execution.
|
|
51
|
+
max_containers: Maximum number of concurrent containers.
|
|
52
|
+
env_vars: Environment variables to pass to the container.
|
|
53
|
+
volumes: Volume mounts for the container.
|
|
54
|
+
fail_fast: Stop execution on first failure.
|
|
55
|
+
reraise: Re-raise exceptions.
|
|
56
|
+
on_event: Optional event handler for lifecycle events.
|
|
57
|
+
"""
|
|
58
|
+
super().__init__(fail_fast=fail_fast, reraise=reraise, on_event=on_event)
|
|
59
|
+
self._image = image
|
|
60
|
+
self._max_containers = max_containers
|
|
61
|
+
self._env_vars = env_vars or {}
|
|
62
|
+
self._volumes = volumes or {}
|
|
63
|
+
self._docker = docker.from_env()
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def _capacity(self) -> int:
|
|
67
|
+
return self._max_containers
|
|
68
|
+
|
|
69
|
+
def _build_command(
|
|
70
|
+
self,
|
|
71
|
+
dag: DAG,
|
|
72
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
73
|
+
run_id: str,
|
|
74
|
+
) -> list[str]:
|
|
75
|
+
"""Build the CLI command for asset execution in a container.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
dag: The DAG to execute.
|
|
79
|
+
partition_or_window: The partition or window.
|
|
80
|
+
run_id: The run ID.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Command list for the container.
|
|
84
|
+
"""
|
|
85
|
+
config = Config(dag=dag)
|
|
86
|
+
|
|
87
|
+
cmd = [
|
|
88
|
+
"interloper",
|
|
89
|
+
"run",
|
|
90
|
+
"--format",
|
|
91
|
+
"inline",
|
|
92
|
+
f"--run-id={run_id}",
|
|
93
|
+
config.to_json(),
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
if isinstance(partition_or_window, TimePartition):
|
|
97
|
+
cmd.extend(["--date", partition_or_window.value.strftime("%Y-%m-%d")])
|
|
98
|
+
elif isinstance(partition_or_window, TimePartitionWindow):
|
|
99
|
+
cmd.extend(
|
|
100
|
+
[
|
|
101
|
+
"--start-date",
|
|
102
|
+
partition_or_window.start.strftime("%Y-%m-%d"),
|
|
103
|
+
"--end-date",
|
|
104
|
+
partition_or_window.end.strftime("%Y-%m-%d"),
|
|
105
|
+
]
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
raise PartitionError("Unsupported partition or window type")
|
|
109
|
+
return cmd
|
|
110
|
+
|
|
111
|
+
def _build_env(self) -> dict[str, str]:
|
|
112
|
+
"""Build the environment variables for the container."""
|
|
113
|
+
return dict(self._env_vars)
|
|
114
|
+
|
|
115
|
+
def _build_volumes(self) -> dict[str, dict[str, str]]:
|
|
116
|
+
"""Build the volume mounts for the container."""
|
|
117
|
+
volumes = {}
|
|
118
|
+
if isinstance(self._volumes, dict):
|
|
119
|
+
volumes.update(self._volumes)
|
|
120
|
+
elif isinstance(self._volumes, list):
|
|
121
|
+
for volume in self._volumes:
|
|
122
|
+
volumes[volume.split(":")[0]] = {"bind": volume.split(":")[1], "mode": "rw"}
|
|
123
|
+
return volumes
|
|
124
|
+
|
|
125
|
+
def _build_name(self, asset: Asset) -> str:
|
|
126
|
+
"""Build the name for the container."""
|
|
127
|
+
name = f"interloper_run_{self.state.run_id[:8]}-{asset.instance_key}"
|
|
128
|
+
return name.replace(":", "-").replace("_", "-").lower()
|
|
129
|
+
|
|
130
|
+
def _submit_asset(
|
|
131
|
+
self,
|
|
132
|
+
asset: Asset,
|
|
133
|
+
partition_or_window: Partition | PartitionWindow | None,
|
|
134
|
+
) -> Container:
|
|
135
|
+
"""Submit execution of an asset and return the container object for completion tracking.
|
|
136
|
+
|
|
137
|
+
IMPORTANT: this method is not calling the `_execute_asset` method of the base class.
|
|
138
|
+
Therefore, the state has to be updated manually here and in `_wait_any` below.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
asset: The asset to execute
|
|
142
|
+
partition_or_window: Either a Partition or PartitionWindow object
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The container object for the asset execution
|
|
146
|
+
"""
|
|
147
|
+
# Build a mini-DAG: target asset + its parents (non-materializable)
|
|
148
|
+
mini_dag = self.state.dag.mini_dag(asset.instance_key)
|
|
149
|
+
|
|
150
|
+
cmd = self._build_command(mini_dag, partition_or_window, self.state.run_id)
|
|
151
|
+
name = self._build_name(asset)
|
|
152
|
+
env = self._build_env()
|
|
153
|
+
volumes = self._build_volumes()
|
|
154
|
+
|
|
155
|
+
self.state.mark_asset_running(asset)
|
|
156
|
+
|
|
157
|
+
container = self._docker.containers.run(
|
|
158
|
+
image=self._image,
|
|
159
|
+
name=name,
|
|
160
|
+
command=cmd,
|
|
161
|
+
environment=env,
|
|
162
|
+
volumes=volumes if volumes else None,
|
|
163
|
+
labels={"interloper.asset_key": asset.instance_key},
|
|
164
|
+
remove=False,
|
|
165
|
+
detach=True,
|
|
166
|
+
stdout=True,
|
|
167
|
+
stderr=True,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return container
|
|
171
|
+
|
|
172
|
+
def _wait_any(self, handles: list[Container]) -> Container:
|
|
173
|
+
"""Wait for any container to finish by polling.
|
|
174
|
+
|
|
175
|
+
IMPORTANT: the `_execute_asset` method of the base class is not called by `_submit_asset`.
|
|
176
|
+
Therefore, the state has to be updated manually here and in `_submit_asset` above.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
handles: List of container objects to wait for
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
The container object that finished
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
while True:
|
|
186
|
+
for container in handles:
|
|
187
|
+
container.reload()
|
|
188
|
+
|
|
189
|
+
if container.status in ("exited", "dead"):
|
|
190
|
+
result = container.wait()
|
|
191
|
+
status_code = result.get("StatusCode", 1)
|
|
192
|
+
|
|
193
|
+
# Map back to asset
|
|
194
|
+
asset: Asset | None = None
|
|
195
|
+
asset_key = container.labels.get("interloper.asset_key")
|
|
196
|
+
if asset_key and asset_key in self.state.dag.asset_map:
|
|
197
|
+
asset = self.state.dag.asset_map[asset_key]
|
|
198
|
+
if asset is None:
|
|
199
|
+
raise RunnerError("Failed to map container to asset")
|
|
200
|
+
|
|
201
|
+
if status_code == 0:
|
|
202
|
+
self.state.mark_asset_completed(asset)
|
|
203
|
+
else:
|
|
204
|
+
self.state.mark_asset_failed(asset, f"Container {container.id} exited with code {status_code}")
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
logs = container.logs(stdout=True, stderr=True)
|
|
208
|
+
if logs:
|
|
209
|
+
print("=============== START OF ASSET CONTAINER LOGS ================")
|
|
210
|
+
print(logs.decode("utf-8", errors="ignore"))
|
|
211
|
+
print("================ END OF ASSET CONTAINER LOGS =================")
|
|
212
|
+
|
|
213
|
+
except Exception:
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
if self._reraise or self._fail_fast:
|
|
217
|
+
raise RunnerError(f"Container {container.id} exited with code {status_code}")
|
|
218
|
+
|
|
219
|
+
# Remove the container after processing
|
|
220
|
+
try:
|
|
221
|
+
container.remove()
|
|
222
|
+
except Exception as e:
|
|
223
|
+
print(f"Error removing container {container.id}: {e}")
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
return container
|
|
227
|
+
|
|
228
|
+
def _cancel_all(self, handles: list[Container]) -> None:
|
|
229
|
+
for container in handles:
|
|
230
|
+
try:
|
|
231
|
+
container.stop(timeout=2)
|
|
232
|
+
except Exception:
|
|
233
|
+
try:
|
|
234
|
+
container.kill()
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
finally:
|
|
238
|
+
asset_key = container.labels.get("interloper.asset_key")
|
|
239
|
+
asset = self.state.dag.asset_map[asset_key]
|
|
240
|
+
self.state.mark_asset_cancelled(asset)
|
|
241
|
+
|
|
242
|
+
def to_spec(self) -> RunnerSpec:
|
|
243
|
+
return RunnerSpec(
|
|
244
|
+
path=self.path,
|
|
245
|
+
init=dict(
|
|
246
|
+
image=self._image,
|
|
247
|
+
max_containers=self._max_containers,
|
|
248
|
+
env_vars=self._env_vars,
|
|
249
|
+
volumes=self._volumes,
|
|
250
|
+
fail_fast=self._fail_fast,
|
|
251
|
+
reraise=self._reraise,
|
|
252
|
+
),
|
|
253
|
+
)
|