modalflow 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ Metadata-Version: 2.3
2
+ Name: modalflow
3
+ Version: 0.2.2
4
+ Summary: Serverless Airflow Executor on Modal
5
+ Requires-Dist: apache-airflow>=3.0.6
6
+ Requires-Dist: modal>=1.3.0
7
+ Requires-Dist: click>=8.0.0
8
+ Requires-Dist: requests>=2.31.0
9
+ Requires-Dist: pytest>=7.0 ; extra == 'dev'
10
+ Requires-Dist: pytest-mock ; extra == 'dev'
11
+ Requires-Dist: pytest-timeout ; extra == 'dev'
12
+ Requires-Dist: black ; extra == 'dev'
13
+ Requires-Dist: ruff ; extra == 'dev'
14
+ Requires-Python: >=3.10
15
+ Provides-Extra: dev
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Modalflow
19
+
20
+ A serverless Airflow Executor for Modal.
21
+
22
+ ## Usage
23
+
24
+ ### Prerequisites
25
+ - pip
26
+ - A Modal account, workspace, and environment
27
+ - Modal CLI configured to your workspace
28
+
29
+ ### Steps
30
+
31
+ 1. `pip install modalflow` on your local machine, and add it to your Airflow cluster.
32
+ 2. Run `modalflow deploy --env {environment name}` to deploy the resources into your Modal environment.
33
+ 3. Add `modalflow.executor.ModalExecutor` to your Airflow config
34
+
35
+ ## Networking Configuration
36
+
37
+ ### Local Development
38
+
39
+ ModalExecutor automatically detects when you're running Airflow locally and creates a tunnel to your local Airflow instance (localhost:8080). No additional configuration needed.
40
+
41
+ The executor will:
42
+ - Detect if localhost:8080 is accessible
43
+ - Create a Modal tunnel to expose your local Airflow API
44
+ - Pass the tunnel URL to Modal Functions so they can "phone home"
45
+
46
+ ### Production / VPC Deployments
47
+
48
+ If your Airflow deployment is in a VPC or behind a firewall, you need to configure the execution API URL so Modal Functions can reach it.
49
+
50
+ **Option 1: Environment Variable (Recommended)**
51
+
52
+ Set the execution API URL as an environment variable:
53
+
54
+ ```bash
55
+ export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://your-airflow-api.example.com/execution/
56
+ ```
57
+
58
+ **Option 2: Airflow Config**
59
+
60
+ Set in `airflow.cfg`:
61
+
62
+ ```ini
63
+ [core]
64
+ execution_api_server_url = https://your-airflow-api.example.com/execution/
65
+ ```
66
+
67
+ **VPC Setup Considerations**
68
+
69
+ If Airflow is deployed in a VPC, ensure:
70
+
71
+ - **Public Endpoint**: Airflow API must be accessible via a public endpoint. Options include:
72
+ - Application Load Balancer (ALB) in public subnets
73
+ - API Gateway in front of Airflow
74
+ - Reverse tunnel (ngrok, Cloudflare Tunnel, AWS IoT Secure Tunneling)
75
+
76
+ - **Security**: Use authentication/authorization to protect the endpoint:
77
+ - API keys or bearer tokens
78
+ - Security groups restricting access
79
+ - VPN or private networking (if Modal supports VPC peering)
80
+
81
+ - **Network Access**: Modal Functions run in Modal's cloud infrastructure and can reach public internet endpoints. Ensure:
82
+ - No firewall rules blocking Modal's IP ranges
83
+ - The endpoint is reachable from the internet (not just internal VPC)
84
+
85
+ **Example: Using Reverse Tunnel**
86
+
87
+ If you can't expose Airflow directly, use a reverse tunnel:
88
+
89
+ 1. Set up ngrok or similar: `ngrok http 8080`
90
+ 2. Configure the executor with the ngrok URL:
91
+ ```bash
92
+ export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://abc123.ngrok.io/execution/
93
+ ```
94
+
95
+ ## Development
96
+
97
+ We use `uv` for development. To setup:
98
+
99
+ 1. `cd modalflow`
100
+ 2. `uv sync`
101
+ 3. To run the CLI, use: `uv run -- modalflow [COMMAND]`
102
+ 4. To run unit tests, use: `uv run pytest`
@@ -0,0 +1,85 @@
1
+ # Modalflow
2
+
3
+ A serverless Airflow Executor for Modal.
4
+
5
+ ## Usage
6
+
7
+ ### Prerequisites
8
+ - pip
9
+ - A Modal account, workspace, and environment
10
+ - Modal CLI configured to your workspace
11
+
12
+ ### Steps
13
+
14
+ 1. `pip install modalflow` on your local machine, and add it to your Airflow cluster.
15
+ 2. Run `modalflow deploy --env {environment name}` to deploy the resources into your Modal environment.
16
+ 3. Add `modalflow.executor.ModalExecutor` to your Airflow config
17
+
18
+ ## Networking Configuration
19
+
20
+ ### Local Development
21
+
22
+ ModalExecutor automatically detects when you're running Airflow locally and creates a tunnel to your local Airflow instance (localhost:8080). No additional configuration needed.
23
+
24
+ The executor will:
25
+ - Detect if localhost:8080 is accessible
26
+ - Create a Modal tunnel to expose your local Airflow API
27
+ - Pass the tunnel URL to Modal Functions so they can "phone home"
28
+
29
+ ### Production / VPC Deployments
30
+
31
+ If your Airflow deployment is in a VPC or behind a firewall, you need to configure the execution API URL so Modal Functions can reach it.
32
+
33
+ **Option 1: Environment Variable (Recommended)**
34
+
35
+ Set the execution API URL as an environment variable:
36
+
37
+ ```bash
38
+ export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://your-airflow-api.example.com/execution/
39
+ ```
40
+
41
+ **Option 2: Airflow Config**
42
+
43
+ Set in `airflow.cfg`:
44
+
45
+ ```ini
46
+ [core]
47
+ execution_api_server_url = https://your-airflow-api.example.com/execution/
48
+ ```
49
+
50
+ **VPC Setup Considerations**
51
+
52
+ If Airflow is deployed in a VPC, ensure:
53
+
54
+ - **Public Endpoint**: Airflow API must be accessible via a public endpoint. Options include:
55
+ - Application Load Balancer (ALB) in public subnets
56
+ - API Gateway in front of Airflow
57
+ - Reverse tunnel (ngrok, Cloudflare Tunnel, AWS IoT Secure Tunneling)
58
+
59
+ - **Security**: Use authentication/authorization to protect the endpoint:
60
+ - API keys or bearer tokens
61
+ - Security groups restricting access
62
+ - VPN or private networking (if Modal supports VPC peering)
63
+
64
+ - **Network Access**: Modal Functions run in Modal's cloud infrastructure and can reach public internet endpoints. Ensure:
65
+ - No firewall rules blocking Modal's IP ranges
66
+ - The endpoint is reachable from the internet (not just internal VPC)
67
+
68
+ **Example: Using Reverse Tunnel**
69
+
70
+ If you can't expose Airflow directly, use a reverse tunnel:
71
+
72
+ 1. Set up ngrok or similar: `ngrok http 8080`
73
+ 2. Configure the executor with the ngrok URL:
74
+ ```bash
75
+ export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://abc123.ngrok.io/execution/
76
+ ```
77
+
78
+ ## Development
79
+
80
+ We use `uv` for development. To setup:
81
+
82
+ 1. `cd modalflow`
83
+ 2. `uv sync`
84
+ 3. To run the CLI, use: `uv run -- modalflow [COMMAND]`
85
+ 4. To run unit tests, use: `uv run pytest`
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["uv_build>=0.9.18,<0.10.0"]
3
+ build-backend = "uv_build"
4
+
5
+ [project]
6
+ name = "modalflow"
7
+ version = "0.2.2"
8
+ description = "Serverless Airflow Executor on Modal"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "apache-airflow>=3.0.6",
13
+ "modal>=1.3.0",
14
+ "click>=8.0.0",
15
+ "requests>=2.31.0",
16
+ ]
17
+
18
+ [project.scripts]
19
+ modalflow = "modalflow.cli:cli"
20
+
21
+ [project.optional-dependencies]
22
+ dev = [
23
+ "pytest>=7.0",
24
+ "pytest-mock",
25
+ "pytest-timeout",
26
+ "black",
27
+ "ruff",
28
+ ]
29
+
30
+ [tool.pytest.ini_options]
31
+ pythonpath = "src"
32
+ testpaths = ["tests/unit"]
File without changes
@@ -0,0 +1,44 @@
1
+ import click
2
+ import os
3
+ import subprocess
4
+ import sys
5
+
6
+ @click.group()
7
+ def cli():
8
+ """Modalflow CLI."""
9
+ pass
10
+
11
+ @cli.command()
12
+ @click.option("--env", default="main", help="Target environment name (default: main)")
13
+ def deploy(env):
14
+ """
15
+ Deploy the Modalflow application to the specified environment.
16
+
17
+ This deploys the Modal App, Volume, and Dict defined in modal_app.py.
18
+ """
19
+ click.echo(f"Deploying Modalflow to environment: '{env}'...")
20
+
21
+ # Set the environment variable to ensure modal_app.py picks up the correct name
22
+ env_vars = os.environ.copy()
23
+ env_vars["MODALFLOW_ENV"] = env
24
+
25
+ # Run 'modal deploy' targeting the modal_app module
26
+ # We use sys.executable -m modal to ensure we use the same python environment
27
+ cmd = [sys.executable, "-m", "modal", "deploy", "modalflow.modal_app"]
28
+
29
+ try:
30
+ subprocess.run(
31
+ cmd,
32
+ env=env_vars,
33
+ check=True
34
+ )
35
+ click.echo(f"Successfully deployed to environment '{env}'!")
36
+ except subprocess.CalledProcessError as e:
37
+ click.echo(f"Deployment failed with exit code {e.returncode}.", err=True)
38
+ sys.exit(e.returncode)
39
+ except Exception as e:
40
+ click.echo(f"An error occurred: {e}", err=True)
41
+ sys.exit(1)
42
+
43
+ if __name__ == "__main__":
44
+ cli()
File without changes
@@ -0,0 +1,349 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
5
+ from urllib.parse import urljoin
6
+
7
+ import modal
8
+ from airflow.configuration import conf
9
+ from airflow.executors.base_executor import BaseExecutor
10
+ from airflow.executors import workloads as executor_workloads
11
+ from airflow.models.taskinstance import TaskInstanceKey
12
+
13
+ if TYPE_CHECKING:
14
+ from airflow.executors.workloads import All as ExecutorWorkload
15
+
16
+ # Type alias for command - can be a list containing a workload or list of strings
17
+ CommandType = Union[List[executor_workloads.ExecuteTask], List[str]]
18
+
19
+ # Configuration - should match modal_app.py
20
+ ENV = os.environ.get("MODALFLOW_ENV", "main")
21
+ CONCURRENCY_LIMIT = 100
22
+
23
+
24
+ class ModalExecutor(BaseExecutor):
25
+ """
26
+ An Airflow Executor that runs tasks as Modal Functions.
27
+ """
28
+
29
+ is_local: bool = True
30
+
31
+ def __init__(self):
32
+ # Use the same concurrency limit as the Modal function
33
+ super().__init__(parallelism=CONCURRENCY_LIMIT)
34
+ self.active_tasks: Dict[str, TaskInstanceKey] = {}
35
+ # These will be initialized in start()
36
+ self._modal_function = None
37
+ self._state_dict = None
38
+ self._tunnel_context = None # Store the context manager
39
+ self._tunnel = None # Store the tunnel object from __enter__()
40
+ self._execution_api_url = None
41
+
42
+ @property
43
+ def slots_available(self) -> int:
44
+ """
45
+ Return the number of slots available to run tasks.
46
+ This is checked by the scheduler to determine if more tasks can be queued.
47
+ """
48
+ return self.parallelism - len(self.running) - len(self.queued_tasks)
49
+
50
+ def start(self):
51
+ """
52
+ Initialize the executor by looking up the deployed Modal function and state dict.
53
+ Also sets up networking (tunnel for local or production URL).
54
+ """
55
+ self.log.info("Starting ModalExecutor")
56
+
57
+ app_name = f"modalflow-{ENV}"
58
+ dict_name = f"airflow-state-{ENV}"
59
+
60
+ # Look up the deployed Modal function
61
+ try:
62
+ self._modal_function = modal.Function.from_name(
63
+ app_name, "execute_modal_task"
64
+ )
65
+ self.log.info(f"Connected to Modal function: {app_name}/execute_modal_task")
66
+ except Exception as e:
67
+ self.log.error(
68
+ f"Failed to look up Modal function {app_name}/execute_modal_task: {e}"
69
+ )
70
+ raise
71
+
72
+ # Look up the state dictionary
73
+ try:
74
+ self._state_dict = modal.Dict.from_name(dict_name)
75
+ self.log.info(f"Connected to Modal Dict: {dict_name}")
76
+ except Exception as e:
77
+ self.log.error(f"Failed to connect to Modal Dict {dict_name}: {e}")
78
+ raise
79
+
80
+ # Set up execution API URL.
81
+ # Priority: env var > Airflow config > modal.forward() tunnel > error
82
+ self._execution_api_url = self._resolve_execution_api_url()
83
+ self.log.info(f"Execution API URL: {self._execution_api_url}")
84
+
85
+ def execute_async(
86
+ self,
87
+ key: TaskInstanceKey,
88
+ command: CommandType,
89
+ queue: Optional[str] = None,
90
+ executor_config: Optional[Any] = None,
91
+ ) -> None:
92
+ """
93
+ Trigger a task execution on Modal.
94
+
95
+ Handles two formats:
96
+ - New-style: command is a list containing an ExecuteTask workload object.
97
+ - Old-style: command is a list of strings (CLI command).
98
+ """
99
+ task_key_str = self._get_key_str(key)
100
+
101
+ if len(command) == 1 and isinstance(command[0], executor_workloads.ExecuteTask):
102
+ # New-style: serialize the workload to JSON
103
+ workload = command[0]
104
+ serialized_workload = workload.model_dump_json()
105
+ payload = {
106
+ "task_key": task_key_str,
107
+ "workload_json": serialized_workload,
108
+ "env": self._get_task_env(key, executor_config),
109
+ }
110
+ elif all(isinstance(c, str) for c in command):
111
+ # Old-style: pass the CLI command for direct execution
112
+ payload = {
113
+ "task_key": task_key_str,
114
+ "command": command,
115
+ "env": self._get_task_env(key, executor_config),
116
+ }
117
+ else:
118
+ raise RuntimeError(
119
+ f"ModalExecutor doesn't know how to handle command of type: {type(command)}"
120
+ )
121
+
122
+ self.log.info(f"Spawning Modal task for {task_key_str}")
123
+
124
+ if self._execution_api_url is None:
125
+ raise RuntimeError(
126
+ "Execution API URL not configured. Ensure start() was called successfully."
127
+ )
128
+
129
+ try:
130
+ self._modal_function.spawn(payload)
131
+ self.active_tasks[task_key_str] = key
132
+ except Exception as e:
133
+ self.log.error(f"Failed to spawn Modal task: {e}")
134
+ self.fail(key)
135
+
136
+ def _process_workloads(self, workloads: Sequence) -> None:
137
+ """
138
+ Process workloads from the base executor.
139
+
140
+ Handles both new-style ExecuteTask workloads and old-style
141
+ (command, priority, queue, executor_config) tuples queued via
142
+ queue_command.
143
+ """
144
+ for workload in workloads:
145
+ if isinstance(workload, executor_workloads.ExecuteTask):
146
+ ti = workload.ti
147
+ key = TaskInstanceKey(
148
+ dag_id=ti.dag_id,
149
+ task_id=ti.task_id,
150
+ run_id=ti.run_id,
151
+ try_number=ti.try_number,
152
+ map_index=ti.map_index,
153
+ )
154
+ queue = ti.queue
155
+ executor_config = ti.executor_config or {}
156
+ command = [workload]
157
+ elif isinstance(workload, tuple):
158
+ # Old-style: (command, priority, queue, executor_config)
159
+ command, _priority, queue, executor_config = workload
160
+ # Parse TaskInstanceKey from the CLI command list
161
+ # Format: ['airflow', 'tasks', 'run', dag_id, task_id, run_id, ...]
162
+ key = TaskInstanceKey(
163
+ dag_id=command[3],
164
+ task_id=command[4],
165
+ run_id=command[5],
166
+ try_number=1,
167
+ map_index=-1,
168
+ )
169
+ else:
170
+ self.log.error(
171
+ f"Skipping unrecognized workload type: {type(workload)}"
172
+ )
173
+ continue
174
+
175
+ if key in self.queued_tasks:
176
+ del self.queued_tasks[key]
177
+
178
+ self.execute_async(
179
+ key=key,
180
+ command=command,
181
+ queue=queue,
182
+ executor_config=executor_config,
183
+ )
184
+ self.running.add(key)
185
+
186
+ def sync(self) -> None:
187
+ """
188
+ Check the status of running tasks.
189
+ """
190
+ if not self.active_tasks:
191
+ return
192
+
193
+ # Poll the state dictionary
194
+ # TODO: batch this or use a more efficient lookup
195
+
196
+ completed_keys = []
197
+
198
+ for task_key_str, key in self.active_tasks.items():
199
+ # Check if this key exists in the remote dict
200
+ # We use .get() to avoid errors if key is missing
201
+ try:
202
+ task_state = self._state_dict.get(task_key_str)
203
+ except Exception as e:
204
+ self.log.warning(f"Error reading state for {task_key_str}: {e}")
205
+ continue
206
+
207
+ if not task_state:
208
+ # Task not yet registered by worker, or lost
209
+ # TODO: implement a timeout logic here
210
+ continue
211
+
212
+ status = task_state.get("status")
213
+
214
+ if status == "SUCCESS":
215
+ self.success(key)
216
+ completed_keys.append(task_key_str)
217
+ self.log.info(f"Task {task_key_str} succeeded")
218
+
219
+ elif status == "FAILED":
220
+ self.fail(key)
221
+ completed_keys.append(task_key_str)
222
+ error_msg = task_state.get("error", "Unknown error")
223
+ self.log.error(f"Task {task_key_str} failed: {error_msg}")
224
+ if task_state.get("stderr"):
225
+ self.log.error(f"Task {task_key_str} stderr: {task_state['stderr']}")
226
+ if task_state.get("stdout"):
227
+ self.log.info(f"Task {task_key_str} stdout: {task_state['stdout']}")
228
+
229
+ # Cleanup local state
230
+ for k in completed_keys:
231
+ del self.active_tasks[k]
232
+ # Cleanup remote state
233
+ try:
234
+ self._state_dict.pop(k)
235
+ except Exception as e:
236
+ self.log.warning(f"Failed to cleanup remote state for {k}: {e}")
237
+
238
+ def end(self) -> None:
239
+ """
240
+ Terminate the executor and cleanup resources.
241
+ """
242
+ self.log.info("Shutting down ModalExecutor")
243
+ self.heartbeat_interval = 0
244
+
245
+ # Cleanup tunnel if it exists
246
+ if self._tunnel_context is not None:
247
+ try:
248
+ # Exit the context manager
249
+ self._tunnel_context.__exit__(None, None, None)
250
+ self.log.info("Closed tunnel")
251
+ except Exception as e:
252
+ self.log.warning(f"Error closing tunnel: {e}")
253
+ finally:
254
+ self._tunnel_context = None
255
+ self._tunnel = None
256
+
257
+ def terminate(self) -> None:
258
+ """
259
+ Force terminate.
260
+ """
261
+ self.end()
262
+
263
+ def _get_key_str(self, key: TaskInstanceKey) -> str:
264
+ """
265
+ Serialize TaskInstanceKey to a string.
266
+ Format: dag_id:task_id:run_id:try_number
267
+ """
268
+ # Note: TaskInstanceKey is a named tuple, but the fields vary slightly by Airflow version
269
+ # We construct a stable string key
270
+ return f"{key.dag_id}:{key.task_id}:{key.run_id}:{key.try_number}"
271
+
272
+ def _resolve_execution_api_url(self) -> str:
273
+ """
274
+ Resolve the execution API URL.
275
+
276
+ Priority:
277
+ 1. Environment variable AIRFLOW__CORE__EXECUTION_API_SERVER_URL
278
+ 2. Airflow config: core.execution_api_server_url
279
+ 3. modal.forward() tunnel (only works inside Modal Functions)
280
+
281
+ Returns:
282
+ Execution API URL string
283
+
284
+ Raises:
285
+ RuntimeError: If URL cannot be determined
286
+ """
287
+ # 1. Check environment variable (takes precedence)
288
+ env_url = os.environ.get("AIRFLOW__CORE__EXECUTION_API_SERVER_URL")
289
+ if env_url:
290
+ self._validate_api_url(env_url)
291
+ return env_url
292
+
293
+ # 2. Check Airflow config
294
+ try:
295
+ config_url = conf.get("core", "execution_api_server_url", fallback=None)
296
+ if config_url:
297
+ self._validate_api_url(config_url)
298
+ return config_url
299
+ except Exception as e:
300
+ self.log.warning(f"Error reading execution_api_server_url from config: {e}")
301
+
302
+ # 3. Try modal.forward() tunnel (works inside Modal Functions only)
303
+ self.log.info(
304
+ "No execution API URL configured, attempting modal.forward() tunnel"
305
+ )
306
+ try:
307
+ self._tunnel_context = modal.forward(8080)
308
+ self._tunnel = self._tunnel_context.__enter__()
309
+ tunnel_url = self._tunnel.url
310
+ return urljoin(tunnel_url, "/execution/")
311
+ except Exception as e:
312
+ raise RuntimeError(
313
+ f"Execution API URL not configured and tunnel creation failed: {e}. "
314
+ "Set AIRFLOW__CORE__EXECUTION_API_SERVER_URL or run inside a Modal container."
315
+ ) from e
316
+
317
+ def _validate_api_url(self, url: str) -> None:
318
+ """
319
+ Validate that the execution API URL is properly formatted.
320
+
321
+ Args:
322
+ url: URL string to validate
323
+
324
+ Raises:
325
+ ValueError: If URL is invalid
326
+ """
327
+ if not url:
328
+ raise ValueError("Execution API URL cannot be empty")
329
+
330
+ if not (url.startswith("http://") or url.startswith("https://")):
331
+ raise ValueError(
332
+ f"Execution API URL must start with http:// or https://: {url}"
333
+ )
334
+
335
+ def _get_task_env(self, key: TaskInstanceKey, executor_config: Any) -> Dict[str, str]:
336
+ """
337
+ Gather environment variables to pass to the worker.
338
+
339
+ Includes the execution API URL so Modal Functions can phone home.
340
+ """
341
+ if self._execution_api_url is None:
342
+ raise RuntimeError(
343
+ "Execution API URL not set. Ensure start() was called successfully."
344
+ )
345
+
346
+ env = {
347
+ "AIRFLOW__CORE__EXECUTION_API_SERVER_URL": self._execution_api_url,
348
+ }
349
+ return env
@@ -0,0 +1,194 @@
1
+ import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ import modal
6
+
7
+ # Allow overriding the environment name via env var
8
+ ENV = os.environ.get("MODALFLOW_ENV", "main")
9
+
10
+ # Maximum number of concurrent Modal function calls
11
+ # This should match the executor's parallelism setting
12
+ CONCURRENCY_LIMIT = 100
13
+
14
+ # Optional: path to DAGs directory to include in the image.
15
+ # Set MODALFLOW_DAGS_DIR to include DAGs so the task worker can load them.
16
+ DAGS_DIR = os.environ.get("MODALFLOW_DAGS_DIR", None)
17
+
18
+ # Define the base image
19
+ # We use the official Airflow image to ensure compatibility.
20
+ # Upgrade from 3.0.6 to 3.1.5 (3.0.6 lacks queue_workload support).
21
+ airflow_image = (
22
+ modal.Image.from_registry("apache/airflow:3.0.6-python3.10")
23
+ .run_commands(
24
+ 'su -s /bin/bash airflow -c "pip install apache-airflow==3.1.5"'
25
+ )
26
+ .pip_install(
27
+ "modal",
28
+ "rich",
29
+ "click",
30
+ "pyyaml",
31
+ "psycopg2-binary",
32
+ )
33
+ )
34
+
35
+ # Include DAG files in the image if a DAGs directory is specified.
36
+ # The task worker (execute_workload) needs DAG files to load task definitions.
37
+ if DAGS_DIR:
38
+ airflow_image = airflow_image.add_local_dir(
39
+ DAGS_DIR, remote_path="/opt/airflow/dags", copy=True
40
+ )
41
+
42
+ # Create the Modal App
43
+ app = modal.App(f"modalflow-{ENV}", image=airflow_image)
44
+
45
+ # Define the volume for logs
46
+ # We use a dedicated volume for logs so they persist and can be read back
47
+ log_volume = modal.Volume.from_name(f"airflow-logs-{ENV}", create_if_missing=True)
48
+
49
+ # Define the dict for coordination (hot cache)
50
+ # Maps task_key -> {status, return_code, last_updated}
51
+ state_dict = modal.Dict.from_name(f"airflow-state-{ENV}", create_if_missing=True)
52
+
53
+
54
+ @app.function(
55
+ volumes={"/opt/airflow/logs": log_volume},
56
+ timeout=3600, # Default 1 hour timeout
57
+ max_containers=CONCURRENCY_LIMIT,
58
+ )
59
+ def execute_modal_task(payload: dict):
60
+ """
61
+ Executes an Airflow task either via the SDK workload API or a CLI command.
62
+
63
+ Payload structure (new-style):
64
+ {
65
+ "task_key": "dag_id:task_id:run_id:try_number",
66
+ "workload_json": "<serialized ExecuteTask workload JSON>",
67
+ "env": {"AIRFLOW__CORE__...", ...}
68
+ }
69
+
70
+ Payload structure (old-style):
71
+ {
72
+ "task_key": "dag_id:task_id:run_id:try_number",
73
+ "command": ["airflow", "tasks", "run", dag_id, task_id, run_id, ...],
74
+ "env": {"AIRFLOW__CORE__...", ...}
75
+ }
76
+ """
77
+ import json
78
+ import os
79
+
80
+ task_key = payload.get("task_key")
81
+ workload_json = payload.get("workload_json")
82
+ cli_command = payload.get("command")
83
+ env_vars = payload.get("env", {})
84
+
85
+ print(f"Starting execution for {task_key}")
86
+
87
+ if workload_json:
88
+ # New-style: use the Airflow SDK execute_workload module
89
+ command = [
90
+ "python",
91
+ "-m",
92
+ "airflow.sdk.execution_time.execute_workload",
93
+ "--json-string",
94
+ workload_json,
95
+ ]
96
+ print(
97
+ "Using SDK workload path: python -m airflow.sdk.execution_time.execute_workload --json-string <workload>"
98
+ )
99
+ elif cli_command:
100
+ # Old-style: run the CLI command directly
101
+ command = cli_command
102
+ print(f"Using CLI path: {' '.join(command)}")
103
+ else:
104
+ raise ValueError(
105
+ f"Payload must contain 'workload_json' or 'command', got keys: {list(payload.keys())}"
106
+ )
107
+
108
+ # Set environment variables
109
+ # Merge with existing env, but executor-provided vars take precedence
110
+ run_env = os.environ.copy()
111
+ run_env.update(env_vars)
112
+
113
+ # Try to extract task info for logging
114
+ log_file_path = None
115
+ try:
116
+ if workload_json:
117
+ workload_data = json.loads(workload_json)
118
+ ti = workload_data.get("ti", {})
119
+ dag_id = ti.get("dag_id", "unknown")
120
+ task_id = ti.get("task_id", "unknown")
121
+ run_id = ti.get("run_id", "unknown")
122
+ try_number = ti.get("try_number", 1)
123
+ elif cli_command and len(cli_command) >= 6:
124
+ # Parse from CLI args: airflow tasks run dag_id task_id run_id ...
125
+ dag_id = cli_command[3]
126
+ task_id = cli_command[4]
127
+ run_id = cli_command[5]
128
+ try_number = 1
129
+ else:
130
+ dag_id = task_id = run_id = "unknown"
131
+ try_number = 1
132
+
133
+ # Construct path: /opt/airflow/logs/dag_id/task_id/run_id/try_number.log
134
+ log_dir = os.path.join(
135
+ "/opt/airflow/logs",
136
+ f"dag_id={dag_id}",
137
+ f"run_id={run_id}",
138
+ f"task_id={task_id}",
139
+ )
140
+ os.makedirs(log_dir, exist_ok=True)
141
+ log_file_path = os.path.join(log_dir, f"attempt={try_number}.log")
142
+ print(f"Writing logs to {log_file_path}")
143
+ except Exception as e:
144
+ print(f"Warning: Failed to setup log directory structure: {e}")
145
+
146
+ # Update state to RUNNING right before execution
147
+ state_dict[task_key] = {
148
+ "status": "RUNNING",
149
+ "return_code": None,
150
+ "ts": 0, # Timestamp placeholder
151
+ }
152
+
153
+ try:
154
+ # Run the command
155
+ # We use subprocess.run to block until completion
156
+ result = subprocess.run(
157
+ command,
158
+ env=run_env,
159
+ capture_output=True,
160
+ text=True,
161
+ check=False,
162
+ )
163
+
164
+ # Log output to Modal's centralized logging
165
+ print(f"Return code: {result.returncode}")
166
+ print(f"STDOUT (first 500): {result.stdout[:500]}")
167
+ print(f"STDERR (first 500): {result.stderr[:500]}")
168
+
169
+ # Write output to the log file on the volume
170
+ if log_file_path:
171
+ try:
172
+ with open(log_file_path, "w") as f:
173
+ f.write(f"*** STDOUT ***\n{result.stdout}\n")
174
+ f.write(f"*** STDERR ***\n{result.stderr}\n")
175
+ except Exception as e:
176
+ print(f"Failed to write log file: {e}")
177
+
178
+ status = "SUCCESS" if result.returncode == 0 else "FAILED"
179
+
180
+ state_dict[task_key] = {
181
+ "status": status,
182
+ "return_code": result.returncode,
183
+ "stdout": result.stdout[-2000:], # Store last 2KB for quick debug
184
+ "stderr": result.stderr[-2000:],
185
+ }
186
+
187
+ except Exception as e:
188
+ print(f"Execution failed: {e}")
189
+ state_dict[task_key] = {
190
+ "status": "FAILED",
191
+ "return_code": -1,
192
+ "error": str(e),
193
+ }
194
+ raise