modalflow 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modalflow-0.2.2/PKG-INFO +102 -0
- modalflow-0.2.2/README.md +85 -0
- modalflow-0.2.2/pyproject.toml +32 -0
- modalflow-0.2.2/src/modalflow/__init__.py +0 -0
- modalflow-0.2.2/src/modalflow/cli.py +44 -0
- modalflow-0.2.2/src/modalflow/executor/__init__.py +0 -0
- modalflow-0.2.2/src/modalflow/executor/modal_executor.py +349 -0
- modalflow-0.2.2/src/modalflow/modal_app.py +194 -0
modalflow-0.2.2/PKG-INFO
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: modalflow
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Serverless Airflow Executor on Modal
|
|
5
|
+
Requires-Dist: apache-airflow>=3.0.6
|
|
6
|
+
Requires-Dist: modal>=1.3.0
|
|
7
|
+
Requires-Dist: click>=8.0.0
|
|
8
|
+
Requires-Dist: requests>=2.31.0
|
|
9
|
+
Requires-Dist: pytest>=7.0 ; extra == 'dev'
|
|
10
|
+
Requires-Dist: pytest-mock ; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest-timeout ; extra == 'dev'
|
|
12
|
+
Requires-Dist: black ; extra == 'dev'
|
|
13
|
+
Requires-Dist: ruff ; extra == 'dev'
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# Modalflow
|
|
19
|
+
|
|
20
|
+
A serverless Airflow Executor for Modal.
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
### Prerequisites
|
|
25
|
+
- pip
|
|
26
|
+
- A Modal account, workspace, and environment
|
|
27
|
+
- Modal CLI configured to your workspace
|
|
28
|
+
|
|
29
|
+
### Steps
|
|
30
|
+
|
|
31
|
+
1. `pip install modalflow` on your local machine, and add it to your Airflow cluster.
|
|
32
|
+
2. Run `modalflow deploy --env {environment name}` to deploy the resources into your Modal environment.
|
|
33
|
+
3. Add `modalflow.executor.ModalExecutor` to your Airflow config
|
|
34
|
+
|
|
35
|
+
## Networking Configuration
|
|
36
|
+
|
|
37
|
+
### Local Development
|
|
38
|
+
|
|
39
|
+
ModalExecutor automatically detects when you're running Airflow locally and creates a tunnel to your local Airflow instance (localhost:8080). No additional configuration needed.
|
|
40
|
+
|
|
41
|
+
The executor will:
|
|
42
|
+
- Detect if localhost:8080 is accessible
|
|
43
|
+
- Create a Modal tunnel to expose your local Airflow API
|
|
44
|
+
- Pass the tunnel URL to Modal Functions so they can "phone home"
|
|
45
|
+
|
|
46
|
+
### Production / VPC Deployments
|
|
47
|
+
|
|
48
|
+
If your Airflow deployment is in a VPC or behind a firewall, you need to configure the execution API URL so Modal Functions can reach it.
|
|
49
|
+
|
|
50
|
+
**Option 1: Environment Variable (Recommended)**
|
|
51
|
+
|
|
52
|
+
Set the execution API URL as an environment variable:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://your-airflow-api.example.com/execution/
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Option 2: Airflow Config**
|
|
59
|
+
|
|
60
|
+
Set in `airflow.cfg`:
|
|
61
|
+
|
|
62
|
+
```ini
|
|
63
|
+
[core]
|
|
64
|
+
execution_api_server_url = https://your-airflow-api.example.com/execution/
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**VPC Setup Considerations**
|
|
68
|
+
|
|
69
|
+
If Airflow is deployed in a VPC, ensure:
|
|
70
|
+
|
|
71
|
+
- **Public Endpoint**: Airflow API must be accessible via a public endpoint. Options include:
|
|
72
|
+
- Application Load Balancer (ALB) in public subnets
|
|
73
|
+
- API Gateway in front of Airflow
|
|
74
|
+
- Reverse tunnel (ngrok, Cloudflare Tunnel, AWS IoT Secure Tunneling)
|
|
75
|
+
|
|
76
|
+
- **Security**: Use authentication/authorization to protect the endpoint:
|
|
77
|
+
- API keys or bearer tokens
|
|
78
|
+
- Security groups restricting access
|
|
79
|
+
- VPN or private networking (if Modal supports VPC peering)
|
|
80
|
+
|
|
81
|
+
- **Network Access**: Modal Functions run in Modal's cloud infrastructure and can reach public internet endpoints. Ensure:
|
|
82
|
+
- No firewall rules blocking Modal's IP ranges
|
|
83
|
+
- The endpoint is reachable from the internet (not just internal VPC)
|
|
84
|
+
|
|
85
|
+
**Example: Using Reverse Tunnel**
|
|
86
|
+
|
|
87
|
+
If you can't expose Airflow directly, use a reverse tunnel:
|
|
88
|
+
|
|
89
|
+
1. Set up ngrok or similar: `ngrok http 8080`
|
|
90
|
+
2. Configure the executor with the ngrok URL:
|
|
91
|
+
```bash
|
|
92
|
+
export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://abc123.ngrok.io/execution/
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Development
|
|
96
|
+
|
|
97
|
+
We use `uv` for development. To setup:
|
|
98
|
+
|
|
99
|
+
1. `cd modalflow`
|
|
100
|
+
2. `uv sync`
|
|
101
|
+
3. To run the CLI, use: `uv run -- modalflow [COMMAND]`
|
|
102
|
+
4. To run unit tests, use: `uv run pytest`
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Modalflow
|
|
2
|
+
|
|
3
|
+
A serverless Airflow Executor for Modal.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
### Prerequisites
|
|
8
|
+
- pip
|
|
9
|
+
- A Modal account, workspace, and environment
|
|
10
|
+
- Modal CLI configured to your workspace
|
|
11
|
+
|
|
12
|
+
### Steps
|
|
13
|
+
|
|
14
|
+
1. `pip install modalflow` on your local machine, and add it to your Airflow cluster.
|
|
15
|
+
2. Run `modalflow deploy --env {environment name}` to deploy the resources into your Modal environment.
|
|
16
|
+
3. Add `modalflow.executor.ModalExecutor` to your Airflow config
|
|
17
|
+
|
|
18
|
+
## Networking Configuration
|
|
19
|
+
|
|
20
|
+
### Local Development
|
|
21
|
+
|
|
22
|
+
ModalExecutor automatically detects when you're running Airflow locally and creates a tunnel to your local Airflow instance (localhost:8080). No additional configuration needed.
|
|
23
|
+
|
|
24
|
+
The executor will:
|
|
25
|
+
- Detect if localhost:8080 is accessible
|
|
26
|
+
- Create a Modal tunnel to expose your local Airflow API
|
|
27
|
+
- Pass the tunnel URL to Modal Functions so they can "phone home"
|
|
28
|
+
|
|
29
|
+
### Production / VPC Deployments
|
|
30
|
+
|
|
31
|
+
If your Airflow deployment is in a VPC or behind a firewall, you need to configure the execution API URL so Modal Functions can reach it.
|
|
32
|
+
|
|
33
|
+
**Option 1: Environment Variable (Recommended)**
|
|
34
|
+
|
|
35
|
+
Set the execution API URL as an environment variable:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://your-airflow-api.example.com/execution/
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Option 2: Airflow Config**
|
|
42
|
+
|
|
43
|
+
Set in `airflow.cfg`:
|
|
44
|
+
|
|
45
|
+
```ini
|
|
46
|
+
[core]
|
|
47
|
+
execution_api_server_url = https://your-airflow-api.example.com/execution/
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**VPC Setup Considerations**
|
|
51
|
+
|
|
52
|
+
If Airflow is deployed in a VPC, ensure:
|
|
53
|
+
|
|
54
|
+
- **Public Endpoint**: Airflow API must be accessible via a public endpoint. Options include:
|
|
55
|
+
- Application Load Balancer (ALB) in public subnets
|
|
56
|
+
- API Gateway in front of Airflow
|
|
57
|
+
- Reverse tunnel (ngrok, Cloudflare Tunnel, AWS IoT Secure Tunneling)
|
|
58
|
+
|
|
59
|
+
- **Security**: Use authentication/authorization to protect the endpoint:
|
|
60
|
+
- API keys or bearer tokens
|
|
61
|
+
- Security groups restricting access
|
|
62
|
+
- VPN or private networking (if Modal supports VPC peering)
|
|
63
|
+
|
|
64
|
+
- **Network Access**: Modal Functions run in Modal's cloud infrastructure and can reach public internet endpoints. Ensure:
|
|
65
|
+
- No firewall rules blocking Modal's IP ranges
|
|
66
|
+
- The endpoint is reachable from the internet (not just internal VPC)
|
|
67
|
+
|
|
68
|
+
**Example: Using Reverse Tunnel**
|
|
69
|
+
|
|
70
|
+
If you can't expose Airflow directly, use a reverse tunnel:
|
|
71
|
+
|
|
72
|
+
1. Set up ngrok or similar: `ngrok http 8080`
|
|
73
|
+
2. Configure the executor with the ngrok URL:
|
|
74
|
+
```bash
|
|
75
|
+
export AIRFLOW__CORE__EXECUTION_API_SERVER_URL=https://abc123.ngrok.io/execution/
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Development
|
|
79
|
+
|
|
80
|
+
We use `uv` for development. To setup:
|
|
81
|
+
|
|
82
|
+
1. `cd modalflow`
|
|
83
|
+
2. `uv sync`
|
|
84
|
+
3. To run the CLI, use: `uv run -- modalflow [COMMAND]`
|
|
85
|
+
4. To run unit tests, use: `uv run pytest`
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["uv_build>=0.9.18,<0.10.0"]
|
|
3
|
+
build-backend = "uv_build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "modalflow"
|
|
7
|
+
version = "0.2.2"
|
|
8
|
+
description = "Serverless Airflow Executor on Modal"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"apache-airflow>=3.0.6",
|
|
13
|
+
"modal>=1.3.0",
|
|
14
|
+
"click>=8.0.0",
|
|
15
|
+
"requests>=2.31.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
modalflow = "modalflow.cli:cli"
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dev = [
|
|
23
|
+
"pytest>=7.0",
|
|
24
|
+
"pytest-mock",
|
|
25
|
+
"pytest-timeout",
|
|
26
|
+
"black",
|
|
27
|
+
"ruff",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
pythonpath = "src"
|
|
32
|
+
testpaths = ["tests/unit"]
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import click
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
@click.group()
|
|
7
|
+
def cli():
|
|
8
|
+
"""Modalflow CLI."""
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
@cli.command()
|
|
12
|
+
@click.option("--env", default="main", help="Target environment name (default: main)")
|
|
13
|
+
def deploy(env):
|
|
14
|
+
"""
|
|
15
|
+
Deploy the Modalflow application to the specified environment.
|
|
16
|
+
|
|
17
|
+
This deploys the Modal App, Volume, and Dict defined in modal_app.py.
|
|
18
|
+
"""
|
|
19
|
+
click.echo(f"Deploying Modalflow to environment: '{env}'...")
|
|
20
|
+
|
|
21
|
+
# Set the environment variable to ensure modal_app.py picks up the correct name
|
|
22
|
+
env_vars = os.environ.copy()
|
|
23
|
+
env_vars["MODALFLOW_ENV"] = env
|
|
24
|
+
|
|
25
|
+
# Run 'modal deploy' targeting the modal_app module
|
|
26
|
+
# We use sys.executable -m modal to ensure we use the same python environment
|
|
27
|
+
cmd = [sys.executable, "-m", "modal", "deploy", "modalflow.modal_app"]
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
subprocess.run(
|
|
31
|
+
cmd,
|
|
32
|
+
env=env_vars,
|
|
33
|
+
check=True
|
|
34
|
+
)
|
|
35
|
+
click.echo(f"Successfully deployed to environment '{env}'!")
|
|
36
|
+
except subprocess.CalledProcessError as e:
|
|
37
|
+
click.echo(f"Deployment failed with exit code {e.returncode}.", err=True)
|
|
38
|
+
sys.exit(e.returncode)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
click.echo(f"An error occurred: {e}", err=True)
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
if __name__ == "__main__":
|
|
44
|
+
cli()
|
|
File without changes
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
|
|
5
|
+
from urllib.parse import urljoin
|
|
6
|
+
|
|
7
|
+
import modal
|
|
8
|
+
from airflow.configuration import conf
|
|
9
|
+
from airflow.executors.base_executor import BaseExecutor
|
|
10
|
+
from airflow.executors import workloads as executor_workloads
|
|
11
|
+
from airflow.models.taskinstance import TaskInstanceKey
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from airflow.executors.workloads import All as ExecutorWorkload
|
|
15
|
+
|
|
16
|
+
# Type alias for command - can be a list containing a workload or list of strings
|
|
17
|
+
CommandType = Union[List[executor_workloads.ExecuteTask], List[str]]
|
|
18
|
+
|
|
19
|
+
# Configuration - should match modal_app.py
|
|
20
|
+
ENV = os.environ.get("MODALFLOW_ENV", "main")
|
|
21
|
+
CONCURRENCY_LIMIT = 100
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ModalExecutor(BaseExecutor):
|
|
25
|
+
"""
|
|
26
|
+
An Airflow Executor that runs tasks as Modal Functions.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
is_local: bool = True
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
# Use the same concurrency limit as the Modal function
|
|
33
|
+
super().__init__(parallelism=CONCURRENCY_LIMIT)
|
|
34
|
+
self.active_tasks: Dict[str, TaskInstanceKey] = {}
|
|
35
|
+
# These will be initialized in start()
|
|
36
|
+
self._modal_function = None
|
|
37
|
+
self._state_dict = None
|
|
38
|
+
self._tunnel_context = None # Store the context manager
|
|
39
|
+
self._tunnel = None # Store the tunnel object from __enter__()
|
|
40
|
+
self._execution_api_url = None
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def slots_available(self) -> int:
|
|
44
|
+
"""
|
|
45
|
+
Return the number of slots available to run tasks.
|
|
46
|
+
This is checked by the scheduler to determine if more tasks can be queued.
|
|
47
|
+
"""
|
|
48
|
+
return self.parallelism - len(self.running) - len(self.queued_tasks)
|
|
49
|
+
|
|
50
|
+
def start(self):
|
|
51
|
+
"""
|
|
52
|
+
Initialize the executor by looking up the deployed Modal function and state dict.
|
|
53
|
+
Also sets up networking (tunnel for local or production URL).
|
|
54
|
+
"""
|
|
55
|
+
self.log.info("Starting ModalExecutor")
|
|
56
|
+
|
|
57
|
+
app_name = f"modalflow-{ENV}"
|
|
58
|
+
dict_name = f"airflow-state-{ENV}"
|
|
59
|
+
|
|
60
|
+
# Look up the deployed Modal function
|
|
61
|
+
try:
|
|
62
|
+
self._modal_function = modal.Function.from_name(
|
|
63
|
+
app_name, "execute_modal_task"
|
|
64
|
+
)
|
|
65
|
+
self.log.info(f"Connected to Modal function: {app_name}/execute_modal_task")
|
|
66
|
+
except Exception as e:
|
|
67
|
+
self.log.error(
|
|
68
|
+
f"Failed to look up Modal function {app_name}/execute_modal_task: {e}"
|
|
69
|
+
)
|
|
70
|
+
raise
|
|
71
|
+
|
|
72
|
+
# Look up the state dictionary
|
|
73
|
+
try:
|
|
74
|
+
self._state_dict = modal.Dict.from_name(dict_name)
|
|
75
|
+
self.log.info(f"Connected to Modal Dict: {dict_name}")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
self.log.error(f"Failed to connect to Modal Dict {dict_name}: {e}")
|
|
78
|
+
raise
|
|
79
|
+
|
|
80
|
+
# Set up execution API URL.
|
|
81
|
+
# Priority: env var > Airflow config > modal.forward() tunnel > error
|
|
82
|
+
self._execution_api_url = self._resolve_execution_api_url()
|
|
83
|
+
self.log.info(f"Execution API URL: {self._execution_api_url}")
|
|
84
|
+
|
|
85
|
+
def execute_async(
|
|
86
|
+
self,
|
|
87
|
+
key: TaskInstanceKey,
|
|
88
|
+
command: CommandType,
|
|
89
|
+
queue: Optional[str] = None,
|
|
90
|
+
executor_config: Optional[Any] = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Trigger a task execution on Modal.
|
|
94
|
+
|
|
95
|
+
Handles two formats:
|
|
96
|
+
- New-style: command is a list containing an ExecuteTask workload object.
|
|
97
|
+
- Old-style: command is a list of strings (CLI command).
|
|
98
|
+
"""
|
|
99
|
+
task_key_str = self._get_key_str(key)
|
|
100
|
+
|
|
101
|
+
if len(command) == 1 and isinstance(command[0], executor_workloads.ExecuteTask):
|
|
102
|
+
# New-style: serialize the workload to JSON
|
|
103
|
+
workload = command[0]
|
|
104
|
+
serialized_workload = workload.model_dump_json()
|
|
105
|
+
payload = {
|
|
106
|
+
"task_key": task_key_str,
|
|
107
|
+
"workload_json": serialized_workload,
|
|
108
|
+
"env": self._get_task_env(key, executor_config),
|
|
109
|
+
}
|
|
110
|
+
elif all(isinstance(c, str) for c in command):
|
|
111
|
+
# Old-style: pass the CLI command for direct execution
|
|
112
|
+
payload = {
|
|
113
|
+
"task_key": task_key_str,
|
|
114
|
+
"command": command,
|
|
115
|
+
"env": self._get_task_env(key, executor_config),
|
|
116
|
+
}
|
|
117
|
+
else:
|
|
118
|
+
raise RuntimeError(
|
|
119
|
+
f"ModalExecutor doesn't know how to handle command of type: {type(command)}"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.log.info(f"Spawning Modal task for {task_key_str}")
|
|
123
|
+
|
|
124
|
+
if self._execution_api_url is None:
|
|
125
|
+
raise RuntimeError(
|
|
126
|
+
"Execution API URL not configured. Ensure start() was called successfully."
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
self._modal_function.spawn(payload)
|
|
131
|
+
self.active_tasks[task_key_str] = key
|
|
132
|
+
except Exception as e:
|
|
133
|
+
self.log.error(f"Failed to spawn Modal task: {e}")
|
|
134
|
+
self.fail(key)
|
|
135
|
+
|
|
136
|
+
def _process_workloads(self, workloads: Sequence) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Process workloads from the base executor.
|
|
139
|
+
|
|
140
|
+
Handles both new-style ExecuteTask workloads and old-style
|
|
141
|
+
(command, priority, queue, executor_config) tuples queued via
|
|
142
|
+
queue_command.
|
|
143
|
+
"""
|
|
144
|
+
for workload in workloads:
|
|
145
|
+
if isinstance(workload, executor_workloads.ExecuteTask):
|
|
146
|
+
ti = workload.ti
|
|
147
|
+
key = TaskInstanceKey(
|
|
148
|
+
dag_id=ti.dag_id,
|
|
149
|
+
task_id=ti.task_id,
|
|
150
|
+
run_id=ti.run_id,
|
|
151
|
+
try_number=ti.try_number,
|
|
152
|
+
map_index=ti.map_index,
|
|
153
|
+
)
|
|
154
|
+
queue = ti.queue
|
|
155
|
+
executor_config = ti.executor_config or {}
|
|
156
|
+
command = [workload]
|
|
157
|
+
elif isinstance(workload, tuple):
|
|
158
|
+
# Old-style: (command, priority, queue, executor_config)
|
|
159
|
+
command, _priority, queue, executor_config = workload
|
|
160
|
+
# Parse TaskInstanceKey from the CLI command list
|
|
161
|
+
# Format: ['airflow', 'tasks', 'run', dag_id, task_id, run_id, ...]
|
|
162
|
+
key = TaskInstanceKey(
|
|
163
|
+
dag_id=command[3],
|
|
164
|
+
task_id=command[4],
|
|
165
|
+
run_id=command[5],
|
|
166
|
+
try_number=1,
|
|
167
|
+
map_index=-1,
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
self.log.error(
|
|
171
|
+
f"Skipping unrecognized workload type: {type(workload)}"
|
|
172
|
+
)
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if key in self.queued_tasks:
|
|
176
|
+
del self.queued_tasks[key]
|
|
177
|
+
|
|
178
|
+
self.execute_async(
|
|
179
|
+
key=key,
|
|
180
|
+
command=command,
|
|
181
|
+
queue=queue,
|
|
182
|
+
executor_config=executor_config,
|
|
183
|
+
)
|
|
184
|
+
self.running.add(key)
|
|
185
|
+
|
|
186
|
+
def sync(self) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Check the status of running tasks.
|
|
189
|
+
"""
|
|
190
|
+
if not self.active_tasks:
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
# Poll the state dictionary
|
|
194
|
+
# TODO: batch this or use a more efficient lookup
|
|
195
|
+
|
|
196
|
+
completed_keys = []
|
|
197
|
+
|
|
198
|
+
for task_key_str, key in self.active_tasks.items():
|
|
199
|
+
# Check if this key exists in the remote dict
|
|
200
|
+
# We use .get() to avoid errors if key is missing
|
|
201
|
+
try:
|
|
202
|
+
task_state = self._state_dict.get(task_key_str)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
self.log.warning(f"Error reading state for {task_key_str}: {e}")
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if not task_state:
|
|
208
|
+
# Task not yet registered by worker, or lost
|
|
209
|
+
# TODO: implement a timeout logic here
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
status = task_state.get("status")
|
|
213
|
+
|
|
214
|
+
if status == "SUCCESS":
|
|
215
|
+
self.success(key)
|
|
216
|
+
completed_keys.append(task_key_str)
|
|
217
|
+
self.log.info(f"Task {task_key_str} succeeded")
|
|
218
|
+
|
|
219
|
+
elif status == "FAILED":
|
|
220
|
+
self.fail(key)
|
|
221
|
+
completed_keys.append(task_key_str)
|
|
222
|
+
error_msg = task_state.get("error", "Unknown error")
|
|
223
|
+
self.log.error(f"Task {task_key_str} failed: {error_msg}")
|
|
224
|
+
if task_state.get("stderr"):
|
|
225
|
+
self.log.error(f"Task {task_key_str} stderr: {task_state['stderr']}")
|
|
226
|
+
if task_state.get("stdout"):
|
|
227
|
+
self.log.info(f"Task {task_key_str} stdout: {task_state['stdout']}")
|
|
228
|
+
|
|
229
|
+
# Cleanup local state
|
|
230
|
+
for k in completed_keys:
|
|
231
|
+
del self.active_tasks[k]
|
|
232
|
+
# Cleanup remote state
|
|
233
|
+
try:
|
|
234
|
+
self._state_dict.pop(k)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
self.log.warning(f"Failed to cleanup remote state for {k}: {e}")
|
|
237
|
+
|
|
238
|
+
def end(self) -> None:
|
|
239
|
+
"""
|
|
240
|
+
Terminate the executor and cleanup resources.
|
|
241
|
+
"""
|
|
242
|
+
self.log.info("Shutting down ModalExecutor")
|
|
243
|
+
self.heartbeat_interval = 0
|
|
244
|
+
|
|
245
|
+
# Cleanup tunnel if it exists
|
|
246
|
+
if self._tunnel_context is not None:
|
|
247
|
+
try:
|
|
248
|
+
# Exit the context manager
|
|
249
|
+
self._tunnel_context.__exit__(None, None, None)
|
|
250
|
+
self.log.info("Closed tunnel")
|
|
251
|
+
except Exception as e:
|
|
252
|
+
self.log.warning(f"Error closing tunnel: {e}")
|
|
253
|
+
finally:
|
|
254
|
+
self._tunnel_context = None
|
|
255
|
+
self._tunnel = None
|
|
256
|
+
|
|
257
|
+
def terminate(self) -> None:
|
|
258
|
+
"""
|
|
259
|
+
Force terminate.
|
|
260
|
+
"""
|
|
261
|
+
self.end()
|
|
262
|
+
|
|
263
|
+
def _get_key_str(self, key: TaskInstanceKey) -> str:
|
|
264
|
+
"""
|
|
265
|
+
Serialize TaskInstanceKey to a string.
|
|
266
|
+
Format: dag_id:task_id:run_id:try_number
|
|
267
|
+
"""
|
|
268
|
+
# Note: TaskInstanceKey is a named tuple, but the fields vary slightly by Airflow version
|
|
269
|
+
# We construct a stable string key
|
|
270
|
+
return f"{key.dag_id}:{key.task_id}:{key.run_id}:{key.try_number}"
|
|
271
|
+
|
|
272
|
+
def _resolve_execution_api_url(self) -> str:
|
|
273
|
+
"""
|
|
274
|
+
Resolve the execution API URL.
|
|
275
|
+
|
|
276
|
+
Priority:
|
|
277
|
+
1. Environment variable AIRFLOW__CORE__EXECUTION_API_SERVER_URL
|
|
278
|
+
2. Airflow config: core.execution_api_server_url
|
|
279
|
+
3. modal.forward() tunnel (only works inside Modal Functions)
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Execution API URL string
|
|
283
|
+
|
|
284
|
+
Raises:
|
|
285
|
+
RuntimeError: If URL cannot be determined
|
|
286
|
+
"""
|
|
287
|
+
# 1. Check environment variable (takes precedence)
|
|
288
|
+
env_url = os.environ.get("AIRFLOW__CORE__EXECUTION_API_SERVER_URL")
|
|
289
|
+
if env_url:
|
|
290
|
+
self._validate_api_url(env_url)
|
|
291
|
+
return env_url
|
|
292
|
+
|
|
293
|
+
# 2. Check Airflow config
|
|
294
|
+
try:
|
|
295
|
+
config_url = conf.get("core", "execution_api_server_url", fallback=None)
|
|
296
|
+
if config_url:
|
|
297
|
+
self._validate_api_url(config_url)
|
|
298
|
+
return config_url
|
|
299
|
+
except Exception as e:
|
|
300
|
+
self.log.warning(f"Error reading execution_api_server_url from config: {e}")
|
|
301
|
+
|
|
302
|
+
# 3. Try modal.forward() tunnel (works inside Modal Functions only)
|
|
303
|
+
self.log.info(
|
|
304
|
+
"No execution API URL configured, attempting modal.forward() tunnel"
|
|
305
|
+
)
|
|
306
|
+
try:
|
|
307
|
+
self._tunnel_context = modal.forward(8080)
|
|
308
|
+
self._tunnel = self._tunnel_context.__enter__()
|
|
309
|
+
tunnel_url = self._tunnel.url
|
|
310
|
+
return urljoin(tunnel_url, "/execution/")
|
|
311
|
+
except Exception as e:
|
|
312
|
+
raise RuntimeError(
|
|
313
|
+
f"Execution API URL not configured and tunnel creation failed: {e}. "
|
|
314
|
+
"Set AIRFLOW__CORE__EXECUTION_API_SERVER_URL or run inside a Modal container."
|
|
315
|
+
) from e
|
|
316
|
+
|
|
317
|
+
def _validate_api_url(self, url: str) -> None:
|
|
318
|
+
"""
|
|
319
|
+
Validate that the execution API URL is properly formatted.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
url: URL string to validate
|
|
323
|
+
|
|
324
|
+
Raises:
|
|
325
|
+
ValueError: If URL is invalid
|
|
326
|
+
"""
|
|
327
|
+
if not url:
|
|
328
|
+
raise ValueError("Execution API URL cannot be empty")
|
|
329
|
+
|
|
330
|
+
if not (url.startswith("http://") or url.startswith("https://")):
|
|
331
|
+
raise ValueError(
|
|
332
|
+
f"Execution API URL must start with http:// or https://: {url}"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def _get_task_env(self, key: TaskInstanceKey, executor_config: Any) -> Dict[str, str]:
|
|
336
|
+
"""
|
|
337
|
+
Gather environment variables to pass to the worker.
|
|
338
|
+
|
|
339
|
+
Includes the execution API URL so Modal Functions can phone home.
|
|
340
|
+
"""
|
|
341
|
+
if self._execution_api_url is None:
|
|
342
|
+
raise RuntimeError(
|
|
343
|
+
"Execution API URL not set. Ensure start() was called successfully."
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
env = {
|
|
347
|
+
"AIRFLOW__CORE__EXECUTION_API_SERVER_URL": self._execution_api_url,
|
|
348
|
+
}
|
|
349
|
+
return env
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import modal
|
|
6
|
+
|
|
7
|
+
# Allow overriding the environment name via env var
|
|
8
|
+
ENV = os.environ.get("MODALFLOW_ENV", "main")
|
|
9
|
+
|
|
10
|
+
# Maximum number of concurrent Modal function calls
|
|
11
|
+
# This should match the executor's parallelism setting
|
|
12
|
+
CONCURRENCY_LIMIT = 100
|
|
13
|
+
|
|
14
|
+
# Optional: path to DAGs directory to include in the image.
|
|
15
|
+
# Set MODALFLOW_DAGS_DIR to include DAGs so the task worker can load them.
|
|
16
|
+
DAGS_DIR = os.environ.get("MODALFLOW_DAGS_DIR", None)
|
|
17
|
+
|
|
18
|
+
# Define the base image
|
|
19
|
+
# We use the official Airflow image to ensure compatibility.
|
|
20
|
+
# Upgrade from 3.0.6 to 3.1.5 (3.0.6 lacks queue_workload support).
|
|
21
|
+
airflow_image = (
|
|
22
|
+
modal.Image.from_registry("apache/airflow:3.0.6-python3.10")
|
|
23
|
+
.run_commands(
|
|
24
|
+
'su -s /bin/bash airflow -c "pip install apache-airflow==3.1.5"'
|
|
25
|
+
)
|
|
26
|
+
.pip_install(
|
|
27
|
+
"modal",
|
|
28
|
+
"rich",
|
|
29
|
+
"click",
|
|
30
|
+
"pyyaml",
|
|
31
|
+
"psycopg2-binary",
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Include DAG files in the image if a DAGs directory is specified.
|
|
36
|
+
# The task worker (execute_workload) needs DAG files to load task definitions.
|
|
37
|
+
if DAGS_DIR:
|
|
38
|
+
airflow_image = airflow_image.add_local_dir(
|
|
39
|
+
DAGS_DIR, remote_path="/opt/airflow/dags", copy=True
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Create the Modal App
|
|
43
|
+
app = modal.App(f"modalflow-{ENV}", image=airflow_image)
|
|
44
|
+
|
|
45
|
+
# Define the volume for logs
|
|
46
|
+
# We use a dedicated volume for logs so they persist and can be read back
|
|
47
|
+
log_volume = modal.Volume.from_name(f"airflow-logs-{ENV}", create_if_missing=True)
|
|
48
|
+
|
|
49
|
+
# Define the dict for coordination (hot cache)
|
|
50
|
+
# Maps task_key -> {status, return_code, last_updated}
|
|
51
|
+
state_dict = modal.Dict.from_name(f"airflow-state-{ENV}", create_if_missing=True)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@app.function(
|
|
55
|
+
volumes={"/opt/airflow/logs": log_volume},
|
|
56
|
+
timeout=3600, # Default 1 hour timeout
|
|
57
|
+
max_containers=CONCURRENCY_LIMIT,
|
|
58
|
+
)
|
|
59
|
+
def execute_modal_task(payload: dict):
|
|
60
|
+
"""
|
|
61
|
+
Executes an Airflow task either via the SDK workload API or a CLI command.
|
|
62
|
+
|
|
63
|
+
Payload structure (new-style):
|
|
64
|
+
{
|
|
65
|
+
"task_key": "dag_id:task_id:run_id:try_number",
|
|
66
|
+
"workload_json": "<serialized ExecuteTask workload JSON>",
|
|
67
|
+
"env": {"AIRFLOW__CORE__...", ...}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
Payload structure (old-style):
|
|
71
|
+
{
|
|
72
|
+
"task_key": "dag_id:task_id:run_id:try_number",
|
|
73
|
+
"command": ["airflow", "tasks", "run", dag_id, task_id, run_id, ...],
|
|
74
|
+
"env": {"AIRFLOW__CORE__...", ...}
|
|
75
|
+
}
|
|
76
|
+
"""
|
|
77
|
+
import json
|
|
78
|
+
import os
|
|
79
|
+
|
|
80
|
+
task_key = payload.get("task_key")
|
|
81
|
+
workload_json = payload.get("workload_json")
|
|
82
|
+
cli_command = payload.get("command")
|
|
83
|
+
env_vars = payload.get("env", {})
|
|
84
|
+
|
|
85
|
+
print(f"Starting execution for {task_key}")
|
|
86
|
+
|
|
87
|
+
if workload_json:
|
|
88
|
+
# New-style: use the Airflow SDK execute_workload module
|
|
89
|
+
command = [
|
|
90
|
+
"python",
|
|
91
|
+
"-m",
|
|
92
|
+
"airflow.sdk.execution_time.execute_workload",
|
|
93
|
+
"--json-string",
|
|
94
|
+
workload_json,
|
|
95
|
+
]
|
|
96
|
+
print(
|
|
97
|
+
"Using SDK workload path: python -m airflow.sdk.execution_time.execute_workload --json-string <workload>"
|
|
98
|
+
)
|
|
99
|
+
elif cli_command:
|
|
100
|
+
# Old-style: run the CLI command directly
|
|
101
|
+
command = cli_command
|
|
102
|
+
print(f"Using CLI path: {' '.join(command)}")
|
|
103
|
+
else:
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"Payload must contain 'workload_json' or 'command', got keys: {list(payload.keys())}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Set environment variables
|
|
109
|
+
# Merge with existing env, but executor-provided vars take precedence
|
|
110
|
+
run_env = os.environ.copy()
|
|
111
|
+
run_env.update(env_vars)
|
|
112
|
+
|
|
113
|
+
# Try to extract task info for logging
|
|
114
|
+
log_file_path = None
|
|
115
|
+
try:
|
|
116
|
+
if workload_json:
|
|
117
|
+
workload_data = json.loads(workload_json)
|
|
118
|
+
ti = workload_data.get("ti", {})
|
|
119
|
+
dag_id = ti.get("dag_id", "unknown")
|
|
120
|
+
task_id = ti.get("task_id", "unknown")
|
|
121
|
+
run_id = ti.get("run_id", "unknown")
|
|
122
|
+
try_number = ti.get("try_number", 1)
|
|
123
|
+
elif cli_command and len(cli_command) >= 6:
|
|
124
|
+
# Parse from CLI args: airflow tasks run dag_id task_id run_id ...
|
|
125
|
+
dag_id = cli_command[3]
|
|
126
|
+
task_id = cli_command[4]
|
|
127
|
+
run_id = cli_command[5]
|
|
128
|
+
try_number = 1
|
|
129
|
+
else:
|
|
130
|
+
dag_id = task_id = run_id = "unknown"
|
|
131
|
+
try_number = 1
|
|
132
|
+
|
|
133
|
+
# Construct path: /opt/airflow/logs/dag_id/task_id/run_id/try_number.log
|
|
134
|
+
log_dir = os.path.join(
|
|
135
|
+
"/opt/airflow/logs",
|
|
136
|
+
f"dag_id={dag_id}",
|
|
137
|
+
f"run_id={run_id}",
|
|
138
|
+
f"task_id={task_id}",
|
|
139
|
+
)
|
|
140
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
141
|
+
log_file_path = os.path.join(log_dir, f"attempt={try_number}.log")
|
|
142
|
+
print(f"Writing logs to {log_file_path}")
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(f"Warning: Failed to setup log directory structure: {e}")
|
|
145
|
+
|
|
146
|
+
# Update state to RUNNING right before execution
|
|
147
|
+
state_dict[task_key] = {
|
|
148
|
+
"status": "RUNNING",
|
|
149
|
+
"return_code": None,
|
|
150
|
+
"ts": 0, # Timestamp placeholder
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
# Run the command
|
|
155
|
+
# We use subprocess.run to block until completion
|
|
156
|
+
result = subprocess.run(
|
|
157
|
+
command,
|
|
158
|
+
env=run_env,
|
|
159
|
+
capture_output=True,
|
|
160
|
+
text=True,
|
|
161
|
+
check=False,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Log output to Modal's centralized logging
|
|
165
|
+
print(f"Return code: {result.returncode}")
|
|
166
|
+
print(f"STDOUT (first 500): {result.stdout[:500]}")
|
|
167
|
+
print(f"STDERR (first 500): {result.stderr[:500]}")
|
|
168
|
+
|
|
169
|
+
# Write output to the log file on the volume
|
|
170
|
+
if log_file_path:
|
|
171
|
+
try:
|
|
172
|
+
with open(log_file_path, "w") as f:
|
|
173
|
+
f.write(f"*** STDOUT ***\n{result.stdout}\n")
|
|
174
|
+
f.write(f"*** STDERR ***\n{result.stderr}\n")
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"Failed to write log file: {e}")
|
|
177
|
+
|
|
178
|
+
status = "SUCCESS" if result.returncode == 0 else "FAILED"
|
|
179
|
+
|
|
180
|
+
state_dict[task_key] = {
|
|
181
|
+
"status": status,
|
|
182
|
+
"return_code": result.returncode,
|
|
183
|
+
"stdout": result.stdout[-2000:], # Store last 2KB for quick debug
|
|
184
|
+
"stderr": result.stderr[-2000:],
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
print(f"Execution failed: {e}")
|
|
189
|
+
state_dict[task_key] = {
|
|
190
|
+
"status": "FAILED",
|
|
191
|
+
"return_code": -1,
|
|
192
|
+
"error": str(e),
|
|
193
|
+
}
|
|
194
|
+
raise
|