interloper-docker 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,12 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: interloper-docker
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Interloper Docker integration
5
5
  Author: Guillaume Onfroy
6
6
  Author-email: Guillaume Onfroy <guillaume@digitlcloud.com>
7
7
  Requires-Dist: docker>=7.1.0
8
8
  Requires-Dist: interloper-core
9
+ Requires-Dist: interloper-scheduler
9
10
  Requires-Python: >=3.10
10
11
  Description-Content-Type: text/markdown
11
12
 
12
- # interloper-docker
13
-
14
- Docker execution support for Interloper.
File without changes
@@ -3,24 +3,20 @@
3
3
  # ###############
4
4
  [project]
5
5
  name = "interloper-docker"
6
- version = "0.2.0"
6
+ version = "0.3.0"
7
7
  description = "Interloper Docker integration"
8
8
  readme = "README.md"
9
9
  authors = [{ name = "Guillaume Onfroy", email = "guillaume@digitlcloud.com" }]
10
10
  requires-python = ">=3.10"
11
- dependencies = [
12
- "docker>=7.1.0",
13
- "interloper-core",
14
- ]
15
-
16
- [project.optional-dependencies]
11
+ dependencies = ["docker>=7.1.0", "interloper-core", "interloper-scheduler"]
17
12
 
18
13
  [build-system]
19
- requires = ["uv_build>=0.9.5,<0.10.0"]
14
+ requires = ["uv_build>=0.11.5,<0.12"]
20
15
  build-backend = "uv_build"
21
16
 
22
17
  [tool.uv.sources]
23
18
  interloper-core = { workspace = true }
19
+ interloper-scheduler = { workspace = true }
24
20
 
25
21
  # ###############
26
22
  # RUFF
@@ -33,7 +29,6 @@ extend-select = ["E", "I", "UP", "ANN001", "ANN201", "ANN202"]
33
29
 
34
30
  [tool.ruff.lint.per-file-ignores]
35
31
  "__init__.py" = ["F401", "F403"]
36
- "**/schemas/**" = ["E501"]
37
32
  "tests/**" = ["ANN", "F811"]
38
33
 
39
34
  # ###############
@@ -43,4 +38,4 @@ extend-select = ["E", "I", "UP", "ANN001", "ANN201", "ANN202"]
43
38
  include = ["src"]
44
39
  typeCheckingMode = "basic"
45
40
  reportMissingParameterType = true
46
- ignore = ["libs/**", "tests/**", "scripts/**"]
41
+ ignore = ["tests/**"]
@@ -1,9 +1,9 @@
1
1
  """Interloper Docker integration for container-based asset execution."""
2
2
 
3
- from interloper_docker.backfiller import DockerBackfiller
3
+ from interloper_docker.launcher import DockerLauncher
4
4
  from interloper_docker.runner import DockerRunner
5
5
 
6
6
  __all__ = [
7
- "DockerBackfiller",
7
+ "DockerLauncher",
8
8
  "DockerRunner",
9
9
  ]
@@ -0,0 +1,165 @@
1
+ """Docker launcher: runs each job in its own container."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ from typing import Any
9
+ from uuid import UUID
10
+
11
+ import docker
12
+ from interloper.catalog.base import Catalog
13
+ from interloper_scheduler.launcher import Launcher, RunState, RunStatus
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # TODO: Implement a grace period to check if the container is running before returning in order to avoid
18
+ # stale run statuses due to container startup errors?
19
+
20
+ # TODO: `launch` command should supoort --catalog option to pass the catalog as a serialized string
21
+ # Then use this instead of INTERLOPER_CATALOG
22
+
23
+
24
+ class DockerLauncher(Launcher):
25
+ """Launches each run in its own Docker container.
26
+
27
+ The container executes the ``interloper launch <run_id>`` CLI command,
28
+ which hydrates the DAG from the database and runs it to completion.
29
+
30
+ Postgres connection parameters are passed as plain values. The caller
31
+ (``_build_launcher``) injects the app-level defaults; any overrides
32
+ from the launcher YAML config take precedence.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ catalog: Catalog,
38
+ postgres_host: str,
39
+ postgres_port: int,
40
+ postgres_user: str,
41
+ postgres_password: str,
42
+ postgres_database: str,
43
+ image: str = "interloper:latest-scheduler",
44
+ runner_type: str = "multi_thread",
45
+ runner_config: dict[str, Any] | None = None,
46
+ volumes: dict[str, dict[str, str]] | None = None,
47
+ ) -> None:
48
+ """Initialize the Docker launcher.
49
+
50
+ Args:
51
+ catalog: Catalog to inject into the container so it builds an identical catalog.
52
+ postgres_host: Postgres host to inject into the container.
53
+ postgres_port: Postgres port to inject into the container.
54
+ postgres_user: Postgres user to inject into the container.
55
+ postgres_password: Postgres password to inject into the container.
56
+ postgres_database: Postgres database to inject into the container.
57
+ image: Docker image to use.
58
+ runner_type: Runner type name forwarded to the container.
59
+ runner_config: Runner-specific kwargs forwarded to the container.
60
+ volumes: Volume mounts for the container. When
61
+ ``runner_type`` is ``"docker"``, the Docker socket is
62
+ mounted automatically if not already included.
63
+ """
64
+ super().__init__(runner_type=runner_type, runner_config=runner_config)
65
+ self._client = docker.from_env()
66
+ self._catalog = catalog
67
+ self._image = image
68
+ self._postgres_host = postgres_host
69
+ self._postgres_port = postgres_port
70
+ self._postgres_user = postgres_user
71
+ self._postgres_password = postgres_password
72
+ self._postgres_database = postgres_database
73
+ self._volumes = dict(volumes or {})
74
+ if runner_type == "docker" and "/var/run/docker.sock" not in self._volumes:
75
+ self._volumes["/var/run/docker.sock"] = {"bind": "/var/run/docker.sock", "mode": "rw"}
76
+
77
+ def launch(self, run_id: UUID) -> None:
78
+ """Start a container that executes a single run.
79
+
80
+ Args:
81
+ run_id: The run UUID to execute.
82
+ """
83
+ environment = self._build_environment()
84
+ container_name = f"interloper_run_{str(run_id)[:8]}"
85
+
86
+ try:
87
+ container = self._client.containers.run(
88
+ image=self._image,
89
+ name=container_name,
90
+ command=["interloper", "launch", str(run_id)],
91
+ environment=environment,
92
+ volumes=self._volumes if self._volumes else None,
93
+ user="root" if self._runner_type == "docker" else None,
94
+ detach=True,
95
+ auto_remove=False,
96
+ labels={"interloper.run_id": str(run_id)},
97
+ )
98
+ logger.info("Started container %s for run %s", container.short_id, run_id)
99
+ except Exception:
100
+ logger.exception("Failed to start container for run %s", run_id)
101
+ raise
102
+
103
+ def describe_run(self, run_id: UUID) -> RunState:
104
+ """Return the authoritative state of a run's container.
105
+
106
+ Used by the reaper to catch failed runs as soon as the
107
+ container terminates, without waiting for the fallback timeout.
108
+
109
+ Args:
110
+ run_id: The run UUID to describe.
111
+
112
+ Returns:
113
+ A :class:`RunState` indicating whether the container is
114
+ still running, has succeeded, has failed, or is gone.
115
+ """
116
+ container_name = f"interloper_run_{str(run_id)[:8]}"
117
+ try:
118
+ container = self._client.containers.get(container_name)
119
+ except Exception:
120
+ return RunState(status=RunStatus.NOT_FOUND)
121
+
122
+ try:
123
+ container.reload()
124
+ except Exception:
125
+ return RunState(status=RunStatus.NOT_FOUND)
126
+
127
+ state = container.attrs.get("State", {}) if container.attrs else {}
128
+ docker_status = (state.get("Status") or container.status or "").lower()
129
+
130
+ # "running", "created", "restarting", "paused" — still alive
131
+ if docker_status in ("running", "created", "restarting", "paused"):
132
+ return RunState(status=RunStatus.RUNNING)
133
+
134
+ # Terminal states: "exited", "dead", "removing"
135
+ exit_code = state.get("ExitCode")
136
+ if exit_code == 0:
137
+ return RunState(status=RunStatus.SUCCEEDED)
138
+
139
+ # Anything else is a failure
140
+ parts = [f"Container {container.short_id} status={docker_status}"]
141
+ if exit_code is not None:
142
+ parts.append(f"exit_code={exit_code}")
143
+ if state.get("OOMKilled"):
144
+ parts.append("OOMKilled")
145
+ error = state.get("Error") or ""
146
+ if error:
147
+ parts.append(f"error={error}")
148
+ return RunState(status=RunStatus.FAILED, error=" ".join(parts))
149
+
150
+ def _build_environment(self) -> dict[str, str]:
151
+ """Build environment variables for the container."""
152
+ environment: dict[str, str] = {
153
+ "INTERLOPER_POSTGRES_HOST": self._postgres_host,
154
+ "INTERLOPER_POSTGRES_PORT": str(self._postgres_port),
155
+ "INTERLOPER_POSTGRES_USER": self._postgres_user,
156
+ "INTERLOPER_POSTGRES_PASSWORD": self._postgres_password,
157
+ "INTERLOPER_POSTGRES_DATABASE": self._postgres_database,
158
+ "INTERLOPER_CATALOG": json.dumps(self._catalog.to_paths()),
159
+ "INTERLOPER_RUNNER_TYPE": self._runner_type,
160
+ "INTERLOPER_RUNNER_CONFIG": json.dumps(self._runner_config),
161
+ }
162
+ encryption_key = os.environ.get("SECRETS_ENCRYPTION_KEY")
163
+ if encryption_key:
164
+ environment["SECRETS_ENCRYPTION_KEY"] = encryption_key
165
+ return environment
@@ -0,0 +1,343 @@
1
+ """Docker-based runner that executes each asset in its own container.
2
+
3
+ Each submitted asset runs inside a fresh container. A mini-DAG comprising
4
+ the target asset and all its upstream ancestors is sent to the container
5
+ via inline JSON so the asset can resolve its upstream dependencies from IO
6
+ without recomputing them.
7
+
8
+ **Real-time events** are streamed via **stderr** using the ``@EVENT:``
9
+ prefix (see :class:`~interloper.events.StderrEventHandler`). Events for
10
+ the target asset are forwarded to the host EventBus; events for
11
+ non-materializable parent assets and container-internal ``RUN_*`` events
12
+ are dropped. The host updates internal state with ``emit=False`` to
13
+ avoid duplicate events.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import threading
21
+ from concurrent.futures import Future, ThreadPoolExecutor
22
+ from typing import Any
23
+
24
+ import docker
25
+ from docker.client import DockerClient
26
+ from docker.models.containers import Container
27
+ from interloper.asset.base import Asset
28
+ from interloper.errors import RunnerError
29
+ from interloper.events import EventBus, EventType
30
+ from interloper.events.event import parse_event_from_log_line
31
+ from interloper.partitioning.base import Partition, PartitionWindow
32
+ from interloper.partitioning.time import TimePartition, TimePartitionWindow
33
+ from interloper.runner.sync_runner import SyncRunner
34
+ from pydantic import Field, PrivateAttr
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # Events emitted by the container's inner run — not forwarded to the host
39
+ # because the host manages its own run lifecycle.
40
+ _RUN_EVENTS = frozenset(
41
+ {
42
+ EventType.RUN_STARTED,
43
+ EventType.RUN_COMPLETED,
44
+ EventType.RUN_FAILED,
45
+ }
46
+ )
47
+
48
+
49
+ class DockerRunner(SyncRunner):
50
+ """Execute assets as individual Docker containers.
51
+
52
+ For each asset, constructs a mini-DAG comprising the asset and all its
53
+ upstream ancestors. The mini-DAG is sent to the container via inline JSON.
54
+ Inside the container, all non-target assets are marked as
55
+ ``materializable=False`` to avoid recomputation while still enabling
56
+ IO-based dependency resolution.
57
+
58
+ Events are emitted by the container process and streamed to the host
59
+ via stderr. The host forwards them to its EventBus (for persistence
60
+ by the scheduler) and updates internal state silently (``emit=False``)
61
+ to avoid duplicate events.
62
+
63
+ Fully synchronous::
64
+
65
+ with DockerRunner(image="my-image", on_event=log_event) as runner:
66
+ result = runner.run(dag)
67
+ """
68
+
69
+ image: str = "interloper:latest-worker"
70
+ max_containers: int = 4
71
+ env_vars: dict[str, str] = Field(default_factory=dict)
72
+ volumes: dict[str, dict[str, str]] | list[str] = Field(default_factory=dict)
73
+ fail_fast: bool = False
74
+ reraise: bool = False
75
+ auto_remove: bool = True
76
+
77
+ _docker: DockerClient = PrivateAttr()
78
+ _poll_pool: ThreadPoolExecutor | None = PrivateAttr(default=None)
79
+ _log_threads: dict[str, threading.Thread] = PrivateAttr(default_factory=dict)
80
+ _stop_log_streaming: threading.Event = PrivateAttr(default_factory=threading.Event)
81
+ _container_map: dict[Future[Any], Container] = PrivateAttr(default_factory=dict)
82
+
83
+ def model_post_init(self, context: Any) -> None:
84
+ """Initialize Docker client after model initialization."""
85
+ super().model_post_init(context)
86
+ self._docker = docker.from_env()
87
+
88
+ # ------------------------------------------------------------------
89
+ # Scheduling primitives
90
+ # ------------------------------------------------------------------
91
+
92
+ @property
93
+ def _capacity(self) -> int:
94
+ """Maximum number of concurrent containers."""
95
+ return self.max_containers
96
+
97
+ def _on_start(self) -> None:
98
+ """Create the polling thread pool."""
99
+ self._stop_log_streaming.clear()
100
+ self._poll_pool = ThreadPoolExecutor(max_workers=self.max_containers)
101
+
102
+ def _on_end(self) -> None:
103
+ """Shut down log streaming and the polling pool."""
104
+ self._stop_log_streaming.set()
105
+ for thread in self._log_threads.values():
106
+ thread.join(timeout=2.0)
107
+ self._log_threads.clear()
108
+ if self._poll_pool is not None:
109
+ self._poll_pool.shutdown(wait=True, cancel_futures=False)
110
+ self._poll_pool = None
111
+ self._container_map.clear()
112
+
113
+ def _submit_asset(
114
+ self,
115
+ asset: Asset,
116
+ partition_or_window: Partition | PartitionWindow | None,
117
+ ) -> Future[Any]:
118
+ """Launch an asset in a Docker container and return a polling Future.
119
+
120
+ Returns:
121
+ A Future that raises :class:`RunnerError` on container failure.
122
+ """
123
+ if self._poll_pool is None:
124
+ raise RunnerError("Poll pool not initialized")
125
+
126
+ mini_dag = self.state.dag.mini_dag(asset.id)
127
+ dag_spec = mini_dag.to_spec().model_dump(mode="json")
128
+
129
+ cmd = self._build_command(dag_spec, partition_or_window, self.state.run_id)
130
+ name = self._build_name(asset)
131
+ env = self._build_env()
132
+ volumes = self._build_volumes()
133
+
134
+ # emit=False: the container emits ASSET_STARTED via stderr
135
+ self.state.mark_asset_running(asset, emit=False)
136
+
137
+ try:
138
+ container = self._docker.containers.run(
139
+ image=self.image,
140
+ name=name,
141
+ command=cmd,
142
+ environment=env,
143
+ volumes=volumes if volumes else None,
144
+ labels={"interloper.asset_id": asset.id},
145
+ remove=False,
146
+ detach=True,
147
+ stdout=True,
148
+ stderr=True,
149
+ )
150
+ except Exception as e:
151
+ # Container never started — emit from the host
152
+ self.state.mark_asset_failed(asset, str(e))
153
+ done: Future[None] = Future()
154
+ done.set_result(None)
155
+ return done
156
+
157
+ self._start_log_streaming(container, target_asset_id=asset.id)
158
+
159
+ future = self._poll_pool.submit(self._poll_container, container)
160
+ self._container_map[future] = container
161
+ return future
162
+
163
+ def _handle_completed(self, future: Future[Any], asset: Asset) -> None:
164
+ """Process a completed container future and clean up.
165
+
166
+ Updates internal state silently (``emit=False``) because the
167
+ container already emitted the real events. Assets already in a
168
+ terminal state (e.g. marked failed during ``_submit_asset``) are
169
+ skipped.
170
+ """
171
+ container = self._container_map.pop(future, None)
172
+ if container is not None:
173
+ self._stop_container_log_streaming(container)
174
+
175
+ info = self.state.asset_executions.get(asset.id)
176
+ if not (info and info.is_terminal):
177
+ try:
178
+ future.result()
179
+ except Exception as e:
180
+ self.state.mark_asset_failed(asset, str(e), emit=False)
181
+ if self.fail_fast or self.reraise:
182
+ raise
183
+ else:
184
+ self.state.mark_asset_completed(asset, emit=False)
185
+
186
+ if container is not None and self.auto_remove:
187
+ try:
188
+ container.remove()
189
+ except Exception: # noqa: BLE001, S110
190
+ pass
191
+
192
+ def _handle_flushed_future(self, future: Future[Any], asset: Asset) -> None:
193
+ """Clean up container after flush."""
194
+ container = self._container_map.pop(future, None)
195
+ if container is not None:
196
+ self._stop_container_log_streaming(container)
197
+
198
+ info = self.state.asset_executions.get(asset.id)
199
+ if not (info and info.is_terminal):
200
+ try:
201
+ future.result()
202
+ except Exception as e: # noqa: BLE001
203
+ self.state.mark_asset_failed(asset, str(e), emit=False)
204
+ else:
205
+ self.state.mark_asset_completed(asset, emit=False)
206
+
207
+ if container is not None and self.auto_remove:
208
+ try:
209
+ container.remove()
210
+ except Exception: # noqa: BLE001, S110
211
+ pass
212
+
213
+ # ------------------------------------------------------------------
214
+ # Container polling
215
+ # ------------------------------------------------------------------
216
+
217
+ def _poll_container(self, container: Container) -> None:
218
+ """Block until the container exits; raise on failure.
219
+
220
+ Raises:
221
+ RunnerError: If the container exits with a non-zero code.
222
+ """
223
+ result = container.wait()
224
+ status_code = result.get("StatusCode", 1)
225
+ if status_code != 0:
226
+ cid = (container.id or "unknown")[:12]
227
+ raise RunnerError(f"Container {cid} exited with code {status_code}")
228
+
229
+ # ------------------------------------------------------------------
230
+ # Command and environment builders
231
+ # ------------------------------------------------------------------
232
+
233
+ def _build_command(
234
+ self,
235
+ dag_spec: dict[str, Any],
236
+ partition_or_window: Partition | PartitionWindow | None,
237
+ run_id: str,
238
+ ) -> list[str]:
239
+ """Build the CLI command for asset execution in a container."""
240
+ cmd = [
241
+ "interloper",
242
+ "run",
243
+ "--format",
244
+ "inline",
245
+ f"--run-id={run_id}",
246
+ json.dumps(dag_spec),
247
+ ]
248
+
249
+ if isinstance(partition_or_window, TimePartition):
250
+ cmd.extend(["--date", partition_or_window.value.strftime("%Y-%m-%d")])
251
+ elif isinstance(partition_or_window, TimePartitionWindow):
252
+ cmd.extend(
253
+ [
254
+ "--start-date",
255
+ partition_or_window.start.strftime("%Y-%m-%d"),
256
+ "--end-date",
257
+ partition_or_window.end.strftime("%Y-%m-%d"),
258
+ ]
259
+ )
260
+
261
+ return cmd
262
+
263
+ def _build_env(self) -> dict[str, str]:
264
+ """Build the environment variables for the container."""
265
+ env = dict(self.env_vars)
266
+ env["INTERLOPER_EVENTS_TO_STDERR"] = "true"
267
+ return env
268
+
269
+ def _build_volumes(self) -> dict[str, dict[str, str]]:
270
+ """Build the volume mounts for the container."""
271
+ volumes: dict[str, dict[str, str]] = {}
272
+ if isinstance(self.volumes, dict):
273
+ volumes.update(self.volumes)
274
+ elif isinstance(self.volumes, list):
275
+ for volume in self.volumes:
276
+ parts = volume.split(":")
277
+ volumes[parts[0]] = {"bind": parts[1], "mode": "rw"}
278
+ return volumes
279
+
280
+ def _build_name(self, asset: Asset) -> str:
281
+ """Build the name for the container."""
282
+ return f"interloper_run_{self.state.run_id[:8]}_{asset.id[:8]}"
283
+
284
+ # ------------------------------------------------------------------
285
+ # Real-time event streaming (stderr)
286
+ # ------------------------------------------------------------------
287
+
288
+ def _start_log_streaming(self, container: Container, *, target_asset_id: str) -> None:
289
+ """Stream events from the container's stderr to the host EventBus.
290
+
291
+ Only events belonging to the **target asset** are forwarded.
292
+ Events for non-materializable parent assets in the mini-DAG and
293
+ container-internal ``RUN_*`` events are dropped.
294
+
295
+ Args:
296
+ container: The Docker container to stream from.
297
+ target_asset_id: Only forward events with this ``asset_id``.
298
+ """
299
+ cid = (container.id or "???")[:12]
300
+
301
+ def stream_logs() -> None:
302
+ buf = ""
303
+ try:
304
+ for chunk in container.logs(stream=True, follow=True, stdout=False, stderr=True):
305
+ if self._stop_log_streaming.is_set():
306
+ break
307
+ buf += chunk.decode("utf-8", errors="ignore")
308
+ while "\n" in buf:
309
+ line, buf = buf.split("\n", 1)
310
+ line = line.rstrip()
311
+ if not line:
312
+ continue
313
+ try:
314
+ event = parse_event_from_log_line(line)
315
+ if event is not None:
316
+ if event.type in _RUN_EVENTS:
317
+ continue
318
+ event_asset_id = event.metadata.get("asset_id")
319
+ if event_asset_id and event_asset_id != target_asset_id:
320
+ continue
321
+ EventBus.emit(event.type, metadata=event.metadata)
322
+ continue
323
+ except Exception: # noqa: BLE001, S110
324
+ pass
325
+ logger.debug("[container %s] %s", cid, line)
326
+ except Exception: # noqa: BLE001, S110
327
+ pass
328
+ buf = buf.rstrip()
329
+ if buf:
330
+ logger.debug("[container %s] %s", cid, buf)
331
+
332
+ thread = threading.Thread(target=stream_logs, daemon=True)
333
+ thread.start()
334
+ if container.id is not None:
335
+ self._log_threads[container.id] = thread
336
+
337
+ def _stop_container_log_streaming(self, container: Container) -> None:
338
+ """Stop and clean up the log streaming thread for a container."""
339
+ if container.id is None:
340
+ return
341
+ thread = self._log_threads.pop(container.id, None)
342
+ if thread is not None:
343
+ thread.join(timeout=1.0)
@@ -1,3 +0,0 @@
1
- # interloper-docker
2
-
3
- Docker execution support for Interloper.
@@ -1,336 +0,0 @@
1
- """Docker Backfiller implementation for Interloper.
2
-
3
- This backfiller starts a Docker container and invokes the Interloper CLI inside it
4
- using an inline JSON config. It runs the entire DAG in the container, delegating
5
- asset scheduling to the configured backfiller in the inline config (typically in_process).
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import threading
11
- from collections.abc import Callable
12
- from time import sleep
13
-
14
- import docker
15
- from docker.errors import NotFound
16
- from docker.models.containers import Container
17
- from interloper.backfillers.base import Backfiller
18
- from interloper.cli.config import Config
19
- from interloper.dag.base import DAG
20
- from interloper.errors import PartitionError
21
- from interloper.events.base import Event, EventBus, parse_event_from_log_line
22
- from interloper.partitioning.base import Partition, PartitionWindow
23
- from interloper.partitioning.time import TimePartition, TimePartitionWindow
24
- from interloper.runners.base import Runner
25
- from interloper.runners.results import ExecutionStatus, RunResult
26
- from interloper.serialization.backfiller import BackfillerSpec
27
-
28
-
29
- class DockerBackfiller(Backfiller[Container]):
30
- """Run an Interloper DAG inside a Docker container via the Interloper CLI.
31
-
32
- The image must contain the `interloper` package (CLI available on PATH).
33
- """
34
-
35
- def __init__(
36
- self,
37
- image: str,
38
- env_vars: dict[str, str] | None = None,
39
- max_containers: int = 1,
40
- runner: Runner | None = None,
41
- volumes: dict[str, dict[str, str]] | list[str] | None = None,
42
- dind: bool = False,
43
- on_event: Callable[[Event], None] | None = None,
44
- ) -> None:
45
- """Initialize the DockerBackfiller.
46
-
47
- Args:
48
- image: Docker image to use
49
- env_vars: Environment variables to pass to the container
50
- max_containers: Maximum number of concurrent containers (default 1)
51
- runner: Runner to use for running assets
52
- volumes: Volume mounts for the container
53
- dind: If True, mount the Docker socket to enable Docker-in-Docker
54
- on_event: Optional event handler for lifecycle events
55
- """
56
- super().__init__(runner=runner, on_event=on_event)
57
-
58
- # Force the runner to re-raise exceptions to make sure the container's exit code is propagated.
59
- self.runner._reraise = True
60
-
61
- self._image = image
62
- self._env_vars = env_vars or {}
63
- self._max_containers = max_containers
64
- self._volumes = volumes or {}
65
- self._dind = dind
66
- self._docker = docker.from_env()
67
-
68
- # Track log streaming threads for cleanup
69
- self._log_threads: dict[str, threading.Thread] = {}
70
- self._stop_log_streaming = threading.Event()
71
-
72
- @property
73
- def _capacity(self) -> int:
74
- """Maximum number of concurrent containers."""
75
- return self._max_containers
76
-
77
- def _on_start(self) -> None:
78
- self._stop_log_streaming.clear()
79
-
80
- def _on_end(self) -> None:
81
- # Signal all log streaming threads to stop
82
- self._stop_log_streaming.set()
83
-
84
- # Wait for threads to finish
85
- for thread in self._log_threads.values():
86
- thread.join(timeout=2.0)
87
- self._log_threads.clear()
88
-
89
- def _build_command(
90
- self,
91
- dag: DAG,
92
- partition_or_window: Partition | PartitionWindow | None,
93
- backfill_id: str,
94
- ) -> list[str]:
95
- """Build the CLI command for a partition.
96
-
97
- Args:
98
- dag: The DAG to execute
99
- partition_or_window: The partition or window
100
- backfill_id: The backfill ID
101
-
102
- Returns:
103
- Command list for the container
104
- """
105
- config = Config(dag=dag, runner=self.runner)
106
-
107
- cmd = [
108
- "interloper",
109
- "run",
110
- "--format=inline",
111
- f"--backfill-id={backfill_id}",
112
- config.to_json(),
113
- ]
114
-
115
- if partition_or_window is None:
116
- return cmd
117
-
118
- if isinstance(partition_or_window, TimePartition):
119
- cmd.extend(["--date", partition_or_window.value.strftime("%Y-%m-%d")])
120
- elif isinstance(partition_or_window, TimePartitionWindow):
121
- cmd.extend(
122
- [
123
- "--start-date",
124
- partition_or_window.start.strftime("%Y-%m-%d"),
125
- "--end-date",
126
- partition_or_window.end.strftime("%Y-%m-%d"),
127
- ]
128
- )
129
- else:
130
- raise PartitionError("Unsupported partition or window type")
131
- return cmd
132
-
133
- def _build_env(self) -> dict[str, str]:
134
- """Build the environment variables for the container."""
135
- env = dict(self._env_vars)
136
- # Enable log-based event streaming
137
- env["INTERLOPER_EVENTS_TO_STDERR"] = "true"
138
- return env
139
-
140
- def _build_volumes(self) -> dict[str, dict[str, str]]:
141
- """Build the volume mounts for the container."""
142
- volumes = {}
143
- if isinstance(self._volumes, dict):
144
- volumes.update(self._volumes)
145
- elif isinstance(self._volumes, list):
146
- for volume in self._volumes:
147
- volumes[volume.split(":")[0]] = {"bind": volume.split(":")[1], "mode": "rw"}
148
- if self._dind:
149
- volumes["/var/run/docker.sock"] = {"bind": "/var/run/docker.sock", "mode": "rw"}
150
- return volumes
151
-
152
- def _build_name(self, partition_or_window: Partition | PartitionWindow | None) -> str:
153
- """Build the name for the container."""
154
- name = f"interloper_backfill_{self.state.backfill_id[:8]}"
155
- if partition_or_window is not None:
156
- name += f"-{partition_or_window}"
157
- return name.replace(":", "-").replace("_", "-").lower()
158
-
159
- def _start_log_streaming(self, container: Container) -> None:
160
- """Start a background thread to stream logs and parse events from a container.
161
-
162
- Args:
163
- container: The Docker container to stream logs from
164
- """
165
-
166
- def stream_logs() -> None:
167
- try:
168
- # Stream logs from the container (both stdout and stderr)
169
- for log_line in container.logs(stream=True, follow=True, stdout=True, stderr=True):
170
- if self._stop_log_streaming.is_set():
171
- break
172
-
173
- try:
174
- line = log_line.decode("utf-8", errors="ignore")
175
- event = parse_event_from_log_line(line)
176
- if event is not None:
177
- EventBus.get_instance().emit(event)
178
- except Exception:
179
- # Ignore parsing errors, continue streaming
180
- pass
181
- except Exception:
182
- # Container may have been removed or stopped
183
- pass
184
-
185
- thread = threading.Thread(target=stream_logs, daemon=True)
186
- thread.start()
187
- if container.id is not None:
188
- self._log_threads[container.id] = thread
189
-
190
- def _stop_container_log_streaming(self, container: Container) -> None:
191
- """Stop and clean up the log streaming thread for a container.
192
-
193
- Args:
194
- container: The Docker container to stop streaming for
195
- """
196
- if container.id is None:
197
- return
198
- thread = self._log_threads.pop(container.id, None)
199
- if thread is not None:
200
- # Thread will stop on next iteration due to container exit
201
- thread.join(timeout=1.0)
202
-
203
- def _submit_run(
204
- self,
205
- dag: DAG,
206
- partition_or_window: Partition | PartitionWindow | None,
207
- ) -> Container:
208
- """Submit execution of a run in a Docker container.
209
-
210
- Args:
211
- dag: The DAG to execute
212
- partition_or_window: Either a Partition or PartitionWindow object
213
-
214
- Returns:
215
- The container as the handle
216
- """
217
- cmd = self._build_command(dag, partition_or_window, self.state.backfill_id)
218
- env = self._build_env()
219
- volumes = self._build_volumes()
220
- name = self._build_name(partition_or_window)
221
-
222
- self.state.mark_run_running(partition_or_window)
223
-
224
- container = self._docker.containers.run(
225
- image=self._image,
226
- name=name,
227
- command=cmd,
228
- environment=env,
229
- volumes=volumes if volumes else None,
230
- remove=False,
231
- detach=True,
232
- stdout=True,
233
- stderr=True,
234
- )
235
- # Store partition in container object for _wait_any
236
- setattr(container, "_interloper_partition", partition_or_window)
237
-
238
- # Start log streaming for event collection
239
- self._start_log_streaming(container)
240
-
241
- return container
242
-
243
- def _wait_any(self, handles: list[Container]) -> Container:
244
- """Wait for any container to complete by polling.
245
-
246
- Args:
247
- handles: List of container objects to wait for
248
-
249
- Returns:
250
- The container that completed
251
- """
252
- while True:
253
- for container in handles:
254
- container.reload()
255
-
256
- if container.status in ("exited", "dead"):
257
- # Stop log streaming for this container
258
- self._stop_container_log_streaming(container)
259
-
260
- result = container.wait()
261
- status_code = result.get("StatusCode", 1)
262
-
263
- # Get partition from container object
264
- partition = getattr(container, "_interloper_partition", None)
265
-
266
- if status_code == 0:
267
- # TODO: This is not the true RunResult, we need to get it from the container?
268
- # Missing the asset_executions.
269
- result = RunResult(partition, ExecutionStatus.COMPLETED)
270
- self.state.mark_run_completed(partition, result)
271
- else:
272
- self.state.mark_run_failed(partition, f"Container exited with code {status_code}")
273
-
274
- try:
275
- logs = container.logs(stdout=True, stderr=True)
276
- if logs:
277
- print("=============== START OF RUN CONTAINER LOGS ==================")
278
- print(logs.decode("utf-8", errors="ignore"))
279
- print("================ END OF RUN CONTAINER LOGS ===================")
280
- except Exception:
281
- pass
282
-
283
- # Remove the container after processing
284
- try:
285
- container.remove()
286
- except Exception as e:
287
- print(f"Error removing container {container.id}: {e}")
288
- pass
289
-
290
- return container
291
-
292
- sleep(1.0)
293
-
294
- def _cancel_all(self, handles: list[Container]) -> None:
295
- """Best-effort cancellation of outstanding containers.
296
-
297
- Args:
298
- handles: List of container objects to cancel
299
- """
300
- for container in handles:
301
- partition = getattr(container, "_interloper_partition", None)
302
-
303
- # Stop log streaming for this container
304
- self._stop_container_log_streaming(container)
305
-
306
- try:
307
- container.stop(timeout=5)
308
- except NotFound:
309
- # Container already removed, mark as cancelled
310
- if partition is not None:
311
- self.state.mark_run_cancelled(partition)
312
- except Exception:
313
- try:
314
- container.kill()
315
- except NotFound:
316
- # Container already removed
317
- self.state.mark_run_cancelled(partition)
318
- except Exception:
319
- pass
320
- else:
321
- # Only mark as cancelled if we successfully stopped/killed
322
- if partition is not None:
323
- self.state.mark_run_cancelled(partition)
324
-
325
- def to_spec(self) -> BackfillerSpec:
326
- """Convert to serializable spec."""
327
- return BackfillerSpec(
328
- path=self.path,
329
- init=dict(
330
- image=self._image,
331
- env_vars=self._env_vars,
332
- volumes=self._volumes,
333
- max_containers=self._max_containers,
334
- dind=self._dind,
335
- ),
336
- )
@@ -1,253 +0,0 @@
1
- """Docker-based runner that runs each asset in its own container.
2
-
3
- Each submitted asset is executed inside a fresh container. To allow an asset
4
- to resolve its upstream dependencies from IO without recomputing them, we pass
5
- to the container a mini-DAG consisting of the target asset plus all its
6
- upstream ancestors. The container runs the Interloper CLI with an inline
7
- config, similar to the `DockerBackfiller`.
8
- """
9
-
10
- from __future__ import annotations
11
-
12
- from collections.abc import Callable
13
-
14
- import docker
15
- from docker.models.containers import Container
16
- from interloper.assets.base import Asset
17
- from interloper.cli.config import Config
18
- from interloper.dag.base import DAG
19
- from interloper.errors import PartitionError, RunnerError
20
- from interloper.events.base import Event
21
- from interloper.partitioning.base import Partition, PartitionWindow
22
- from interloper.partitioning.time import TimePartition, TimePartitionWindow
23
- from interloper.runners.base import Runner
24
- from interloper.serialization.runner import RunnerSpec
25
-
26
-
27
- class DockerRunner(Runner[Container]):
28
- """Execute assets as individual Docker containers.
29
-
30
- For each asset, constructs a mini-DAG comprising the asset and all its
31
- upstream ancestors. The mini-DAG is sent to the container via inline JSON.
32
- Inside the container, all non-target assets are marked as
33
- `materializable=False` prior to execution to avoid recomputation while
34
- still enabling IO-based dependency resolution.
35
- """
36
-
37
- def __init__(
38
- self,
39
- image: str,
40
- max_containers: int = 4,
41
- env_vars: dict[str, str] | None = None,
42
- volumes: dict[str, dict[str, str]] | list[str] | None = None,
43
- fail_fast: bool = False,
44
- reraise: bool = False,
45
- on_event: Callable[[Event], None] | None = None,
46
- ) -> None:
47
- """Initialize the DockerRunner.
48
-
49
- Args:
50
- image: Docker image to use for container execution.
51
- max_containers: Maximum number of concurrent containers.
52
- env_vars: Environment variables to pass to the container.
53
- volumes: Volume mounts for the container.
54
- fail_fast: Stop execution on first failure.
55
- reraise: Re-raise exceptions.
56
- on_event: Optional event handler for lifecycle events.
57
- """
58
- super().__init__(fail_fast=fail_fast, reraise=reraise, on_event=on_event)
59
- self._image = image
60
- self._max_containers = max_containers
61
- self._env_vars = env_vars or {}
62
- self._volumes = volumes or {}
63
- self._docker = docker.from_env()
64
-
65
- @property
66
- def _capacity(self) -> int:
67
- return self._max_containers
68
-
69
- def _build_command(
70
- self,
71
- dag: DAG,
72
- partition_or_window: Partition | PartitionWindow | None,
73
- run_id: str,
74
- ) -> list[str]:
75
- """Build the CLI command for asset execution in a container.
76
-
77
- Args:
78
- dag: The DAG to execute.
79
- partition_or_window: The partition or window.
80
- run_id: The run ID.
81
-
82
- Returns:
83
- Command list for the container.
84
- """
85
- config = Config(dag=dag)
86
-
87
- cmd = [
88
- "interloper",
89
- "run",
90
- "--format",
91
- "inline",
92
- f"--run-id={run_id}",
93
- config.to_json(),
94
- ]
95
-
96
- if isinstance(partition_or_window, TimePartition):
97
- cmd.extend(["--date", partition_or_window.value.strftime("%Y-%m-%d")])
98
- elif isinstance(partition_or_window, TimePartitionWindow):
99
- cmd.extend(
100
- [
101
- "--start-date",
102
- partition_or_window.start.strftime("%Y-%m-%d"),
103
- "--end-date",
104
- partition_or_window.end.strftime("%Y-%m-%d"),
105
- ]
106
- )
107
- else:
108
- raise PartitionError("Unsupported partition or window type")
109
- return cmd
110
-
111
- def _build_env(self) -> dict[str, str]:
112
- """Build the environment variables for the container."""
113
- return dict(self._env_vars)
114
-
115
- def _build_volumes(self) -> dict[str, dict[str, str]]:
116
- """Build the volume mounts for the container."""
117
- volumes = {}
118
- if isinstance(self._volumes, dict):
119
- volumes.update(self._volumes)
120
- elif isinstance(self._volumes, list):
121
- for volume in self._volumes:
122
- volumes[volume.split(":")[0]] = {"bind": volume.split(":")[1], "mode": "rw"}
123
- return volumes
124
-
125
- def _build_name(self, asset: Asset) -> str:
126
- """Build the name for the container."""
127
- name = f"interloper_run_{self.state.run_id[:8]}-{asset.instance_key}"
128
- return name.replace(":", "-").replace("_", "-").lower()
129
-
130
- def _submit_asset(
131
- self,
132
- asset: Asset,
133
- partition_or_window: Partition | PartitionWindow | None,
134
- ) -> Container:
135
- """Submit execution of an asset and return the container object for completion tracking.
136
-
137
- IMPORTANT: this method is not calling the `_execute_asset` method of the base class.
138
- Therefore, the state has to be updated manually here and in `_wait_any` below.
139
-
140
- Args:
141
- asset: The asset to execute
142
- partition_or_window: Either a Partition or PartitionWindow object
143
-
144
- Returns:
145
- The container object for the asset execution
146
- """
147
- # Build a mini-DAG: target asset + its parents (non-materializable)
148
- mini_dag = self.state.dag.mini_dag(asset.instance_key)
149
-
150
- cmd = self._build_command(mini_dag, partition_or_window, self.state.run_id)
151
- name = self._build_name(asset)
152
- env = self._build_env()
153
- volumes = self._build_volumes()
154
-
155
- self.state.mark_asset_running(asset)
156
-
157
- container = self._docker.containers.run(
158
- image=self._image,
159
- name=name,
160
- command=cmd,
161
- environment=env,
162
- volumes=volumes if volumes else None,
163
- labels={"interloper.asset_key": asset.instance_key},
164
- remove=False,
165
- detach=True,
166
- stdout=True,
167
- stderr=True,
168
- )
169
-
170
- return container
171
-
172
- def _wait_any(self, handles: list[Container]) -> Container:
173
- """Wait for any container to finish by polling.
174
-
175
- IMPORTANT: the `_execute_asset` method of the base class is not called by `_submit_asset`.
176
- Therefore, the state has to be updated manually here and in `_submit_asset` above.
177
-
178
- Args:
179
- handles: List of container objects to wait for
180
-
181
- Returns:
182
- The container object that finished
183
- """
184
-
185
- while True:
186
- for container in handles:
187
- container.reload()
188
-
189
- if container.status in ("exited", "dead"):
190
- result = container.wait()
191
- status_code = result.get("StatusCode", 1)
192
-
193
- # Map back to asset
194
- asset: Asset | None = None
195
- asset_key = container.labels.get("interloper.asset_key")
196
- if asset_key and asset_key in self.state.dag.asset_map:
197
- asset = self.state.dag.asset_map[asset_key]
198
- if asset is None:
199
- raise RunnerError("Failed to map container to asset")
200
-
201
- if status_code == 0:
202
- self.state.mark_asset_completed(asset)
203
- else:
204
- self.state.mark_asset_failed(asset, f"Container {container.id} exited with code {status_code}")
205
-
206
- try:
207
- logs = container.logs(stdout=True, stderr=True)
208
- if logs:
209
- print("=============== START OF ASSET CONTAINER LOGS ================")
210
- print(logs.decode("utf-8", errors="ignore"))
211
- print("================ END OF ASSET CONTAINER LOGS =================")
212
-
213
- except Exception:
214
- pass
215
-
216
- if self._reraise or self._fail_fast:
217
- raise RunnerError(f"Container {container.id} exited with code {status_code}")
218
-
219
- # Remove the container after processing
220
- try:
221
- container.remove()
222
- except Exception as e:
223
- print(f"Error removing container {container.id}: {e}")
224
- pass
225
-
226
- return container
227
-
228
- def _cancel_all(self, handles: list[Container]) -> None:
229
- for container in handles:
230
- try:
231
- container.stop(timeout=2)
232
- except Exception:
233
- try:
234
- container.kill()
235
- except Exception:
236
- pass
237
- finally:
238
- asset_key = container.labels.get("interloper.asset_key")
239
- asset = self.state.dag.asset_map[asset_key]
240
- self.state.mark_asset_cancelled(asset)
241
-
242
- def to_spec(self) -> RunnerSpec:
243
- return RunnerSpec(
244
- path=self.path,
245
- init=dict(
246
- image=self._image,
247
- max_containers=self._max_containers,
248
- env_vars=self._env_vars,
249
- volumes=self._volumes,
250
- fail_fast=self._fail_fast,
251
- reraise=self._reraise,
252
- ),
253
- )