astro-airflow-mcp 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- astro_airflow_mcp/__init__.py +5 -0
- astro_airflow_mcp/__main__.py +75 -0
- astro_airflow_mcp/logging.py +61 -0
- astro_airflow_mcp/plugin.py +66 -0
- astro_airflow_mcp/server.py +1374 -0
- astro_airflow_mcp-0.1.5.dist-info/METADATA +231 -0
- astro_airflow_mcp-0.1.5.dist-info/RECORD +10 -0
- astro_airflow_mcp-0.1.5.dist-info/WHEEL +4 -0
- astro_airflow_mcp-0.1.5.dist-info/entry_points.txt +5 -0
- astro_airflow_mcp-0.1.5.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,1374 @@
|
|
|
1
|
+
"""FastMCP server for Airflow integration."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
from fastmcp import FastMCP
|
|
7
|
+
|
|
8
|
+
# Default configuration values
|
|
9
|
+
DEFAULT_AIRFLOW_URL = "http://localhost:8080"
|
|
10
|
+
DEFAULT_LIMIT = 100
|
|
11
|
+
DEFAULT_OFFSET = 0
|
|
12
|
+
|
|
13
|
+
# Create MCP server
|
|
14
|
+
mcp = FastMCP(
|
|
15
|
+
"Airflow MCP Server",
|
|
16
|
+
instructions="""
|
|
17
|
+
This server provides access to Apache Airflow's REST API through MCP tools.
|
|
18
|
+
|
|
19
|
+
Use these tools to:
|
|
20
|
+
- List and inspect DAGs (Directed Acyclic Graphs / workflows)
|
|
21
|
+
- View DAG runs and their execution status
|
|
22
|
+
- Check task instances and their states
|
|
23
|
+
- Inspect Airflow connections, variables, and pools
|
|
24
|
+
- Monitor DAG statistics and warnings
|
|
25
|
+
- View system configuration and version information
|
|
26
|
+
|
|
27
|
+
When the user asks about Airflow workflows, pipelines, or data orchestration,
|
|
28
|
+
use these tools to provide detailed, accurate information directly from the
|
|
29
|
+
Airflow instance.
|
|
30
|
+
""",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Global configuration for Airflow API access
|
|
35
|
+
class AirflowConfig:
|
|
36
|
+
"""Global configuration for Airflow API access."""
|
|
37
|
+
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self.url: str = DEFAULT_AIRFLOW_URL
|
|
40
|
+
self.auth_token: str | None = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_config = AirflowConfig()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def configure(
|
|
47
|
+
url: str | None = None,
|
|
48
|
+
auth_token: str | None = None,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Configure global Airflow connection settings.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
url: Base URL of Airflow webserver
|
|
54
|
+
auth_token: Bearer token for authentication
|
|
55
|
+
"""
|
|
56
|
+
if url:
|
|
57
|
+
_config.url = url
|
|
58
|
+
if auth_token:
|
|
59
|
+
_config.auth_token = auth_token
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Helper functions for API calls and response formatting
|
|
63
|
+
def _call_airflow_api(
|
|
64
|
+
endpoint: str,
|
|
65
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
66
|
+
params: dict[str, Any] | None = None,
|
|
67
|
+
auth_token: str | None = None,
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
"""Call Airflow REST API with error handling and optional authentication.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
endpoint: API endpoint path (e.g., 'dags', 'dagRuns')
|
|
73
|
+
airflow_url: Base URL of the Airflow webserver
|
|
74
|
+
params: Optional query parameters
|
|
75
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Parsed JSON response from the API
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
Exception: If the API call fails with error details
|
|
82
|
+
|
|
83
|
+
Note:
|
|
84
|
+
If auth_token is provided, Bearer token authentication is used.
|
|
85
|
+
If not provided, no authentication is used.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
api_url = f"{airflow_url}/api/v2/{endpoint}"
|
|
89
|
+
headers: dict[str, str] = {}
|
|
90
|
+
|
|
91
|
+
# Handle authentication
|
|
92
|
+
if auth_token:
|
|
93
|
+
headers["Authorization"] = f"Bearer {auth_token}"
|
|
94
|
+
|
|
95
|
+
response = requests.get(api_url, params=params, headers=headers, timeout=30)
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
return response.json()
|
|
98
|
+
except requests.exceptions.RequestException as e:
|
|
99
|
+
raise Exception(f"Error connecting to Airflow API: {str(e)}") from e
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise Exception(f"Error calling API endpoint '{endpoint}': {str(e)}") from e
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _wrap_list_response(items: list[dict[str, Any]], key_name: str, data: dict[str, Any]) -> str:
|
|
105
|
+
"""Wrap API list response with pagination metadata.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
items: List of items from the API
|
|
109
|
+
key_name: Name for the items key in response (e.g., 'dags', 'dag_runs')
|
|
110
|
+
data: Original API response data (for total_entries)
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
JSON string with pagination metadata
|
|
114
|
+
"""
|
|
115
|
+
import json
|
|
116
|
+
|
|
117
|
+
total_entries = data.get("total_entries", len(items))
|
|
118
|
+
result: dict[str, Any] = {
|
|
119
|
+
f"total_{key_name}": total_entries,
|
|
120
|
+
"returned_count": len(items),
|
|
121
|
+
key_name: items,
|
|
122
|
+
}
|
|
123
|
+
return json.dumps(result, indent=2)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _get_dag_details_impl(
|
|
127
|
+
dag_id: str,
|
|
128
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
129
|
+
auth_token: str | None = None,
|
|
130
|
+
) -> str:
|
|
131
|
+
"""Internal implementation for getting details about a specific DAG.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
dag_id: The ID of the DAG to get details for
|
|
135
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
136
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
JSON string containing the DAG details
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
data = _call_airflow_api(
|
|
143
|
+
f"dags/{dag_id}",
|
|
144
|
+
airflow_url,
|
|
145
|
+
auth_token=auth_token,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
import json
|
|
149
|
+
|
|
150
|
+
return json.dumps(data, indent=2)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
return str(e)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@mcp.tool()
|
|
156
|
+
def get_dag_details(dag_id: str) -> str:
|
|
157
|
+
"""Get detailed information about a specific Apache Airflow DAG.
|
|
158
|
+
|
|
159
|
+
Use this tool when the user asks about:
|
|
160
|
+
- "Show me details for DAG X" or "What are the details of DAG Y?"
|
|
161
|
+
- "Tell me about DAG Z" or "Get information for this specific DAG"
|
|
162
|
+
- "What's the schedule for DAG X?" or "When does this DAG run?"
|
|
163
|
+
- "Is DAG Y paused?" or "Show me the configuration of DAG Z"
|
|
164
|
+
- "Who owns this DAG?" or "What are the tags for this workflow?"
|
|
165
|
+
|
|
166
|
+
Returns complete DAG information including:
|
|
167
|
+
- dag_id: Unique identifier for the DAG
|
|
168
|
+
- is_paused: Whether the DAG is currently paused
|
|
169
|
+
- is_active: Whether the DAG is active
|
|
170
|
+
- is_subdag: Whether this is a SubDAG
|
|
171
|
+
- fileloc: File path where the DAG is defined
|
|
172
|
+
- file_token: Unique token for the DAG file
|
|
173
|
+
- owners: List of DAG owners
|
|
174
|
+
- description: Human-readable description of what the DAG does
|
|
175
|
+
- schedule_interval: Cron expression or timedelta for scheduling
|
|
176
|
+
- tags: List of tags/labels for categorization
|
|
177
|
+
- max_active_runs: Maximum number of concurrent runs
|
|
178
|
+
- max_active_tasks: Maximum number of concurrent tasks
|
|
179
|
+
- has_task_concurrency_limits: Whether task concurrency limits are set
|
|
180
|
+
- has_import_errors: Whether the DAG has import errors
|
|
181
|
+
- next_dagrun: When the next DAG run is scheduled
|
|
182
|
+
- next_dagrun_create_after: Earliest time for next DAG run creation
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
dag_id: The ID of the DAG to get details for
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
JSON with complete details about the specified DAG
|
|
189
|
+
"""
|
|
190
|
+
return _get_dag_details_impl(
|
|
191
|
+
dag_id=dag_id,
|
|
192
|
+
airflow_url=_config.url,
|
|
193
|
+
auth_token=_config.auth_token,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _list_dags_impl(
|
|
198
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
199
|
+
limit: int = DEFAULT_LIMIT,
|
|
200
|
+
offset: int = DEFAULT_OFFSET,
|
|
201
|
+
auth_token: str | None = None,
|
|
202
|
+
) -> str:
|
|
203
|
+
"""Internal implementation for listing DAGs from Airflow.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
207
|
+
limit: Maximum number of DAGs to return (default: 100)
|
|
208
|
+
offset: Offset for pagination (default: 0)
|
|
209
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
JSON string containing the list of DAGs with their metadata
|
|
213
|
+
"""
|
|
214
|
+
try:
|
|
215
|
+
params = {"limit": limit, "offset": offset}
|
|
216
|
+
data = _call_airflow_api(
|
|
217
|
+
"dags",
|
|
218
|
+
airflow_url,
|
|
219
|
+
params,
|
|
220
|
+
auth_token=auth_token,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
if "dags" in data:
|
|
224
|
+
return _wrap_list_response(data["dags"], "dags", data)
|
|
225
|
+
else:
|
|
226
|
+
return f"No DAGs found. Response: {data}"
|
|
227
|
+
except Exception as e:
|
|
228
|
+
return str(e)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@mcp.tool()
|
|
232
|
+
def list_dags() -> str:
|
|
233
|
+
"""Get information about all Apache Airflow DAGs (Directed Acyclic Graphs).
|
|
234
|
+
|
|
235
|
+
Use this tool when the user asks about:
|
|
236
|
+
- "What DAGs are available?" or "List all DAGs"
|
|
237
|
+
- "Show me the workflows" or "What pipelines exist?"
|
|
238
|
+
- "Which DAGs are paused/active?"
|
|
239
|
+
- DAG schedules, descriptions, or tags
|
|
240
|
+
- Finding a specific DAG by name
|
|
241
|
+
|
|
242
|
+
Returns comprehensive DAG metadata including:
|
|
243
|
+
- dag_id: Unique identifier for the DAG
|
|
244
|
+
- is_paused: Whether the DAG is currently paused
|
|
245
|
+
- is_active: Whether the DAG is active
|
|
246
|
+
- schedule_interval: How often the DAG runs
|
|
247
|
+
- description: Human-readable description
|
|
248
|
+
- tags: Labels/categories for the DAG
|
|
249
|
+
- owners: Who maintains the DAG
|
|
250
|
+
- file_token: Location of the DAG file
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
JSON with list of all DAGs and their complete metadata
|
|
254
|
+
"""
|
|
255
|
+
return _list_dags_impl(
|
|
256
|
+
airflow_url=_config.url,
|
|
257
|
+
auth_token=_config.auth_token,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _get_dag_source_impl(
|
|
262
|
+
dag_id: str,
|
|
263
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
264
|
+
auth_token: str | None = None,
|
|
265
|
+
) -> str:
|
|
266
|
+
"""Internal implementation for getting DAG source code from Airflow.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
dag_id: The ID of the DAG to get source code for
|
|
270
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
271
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
JSON string containing the DAG source code and metadata
|
|
275
|
+
"""
|
|
276
|
+
try:
|
|
277
|
+
# Using dagSources/{dag_id} endpoint
|
|
278
|
+
source_data = _call_airflow_api(
|
|
279
|
+
f"dagSources/{dag_id}",
|
|
280
|
+
airflow_url,
|
|
281
|
+
auth_token=auth_token,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
import json
|
|
285
|
+
|
|
286
|
+
return json.dumps(source_data, indent=2)
|
|
287
|
+
except Exception as e:
|
|
288
|
+
return str(e)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
@mcp.tool()
|
|
292
|
+
def get_dag_source(dag_id: str) -> str:
|
|
293
|
+
"""Get the source code for a specific Apache Airflow DAG.
|
|
294
|
+
|
|
295
|
+
Use this tool when the user asks about:
|
|
296
|
+
- "Show me the code for DAG X" or "What's the source of DAG Y?"
|
|
297
|
+
- "How is DAG Z implemented?" or "What does the DAG file look like?"
|
|
298
|
+
- "Can I see the Python code for this workflow?"
|
|
299
|
+
- "What tasks are defined in the DAG code?"
|
|
300
|
+
|
|
301
|
+
Returns the DAG source file contents including:
|
|
302
|
+
- content: The actual Python source code of the DAG file
|
|
303
|
+
- file_token: Unique identifier for the source file
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
dag_id: The ID of the DAG to get source code for
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
JSON with DAG source code and metadata
|
|
310
|
+
"""
|
|
311
|
+
return _get_dag_source_impl(
|
|
312
|
+
dag_id=dag_id,
|
|
313
|
+
airflow_url=_config.url,
|
|
314
|
+
auth_token=_config.auth_token,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _get_dag_stats_impl(
|
|
319
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
320
|
+
auth_token: str | None = None,
|
|
321
|
+
) -> str:
|
|
322
|
+
"""Internal implementation for getting DAG statistics from Airflow.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
326
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
JSON string containing DAG run statistics by state
|
|
330
|
+
"""
|
|
331
|
+
try:
|
|
332
|
+
# Using dagStats endpoint
|
|
333
|
+
stats_data = _call_airflow_api(
|
|
334
|
+
"dagStats",
|
|
335
|
+
airflow_url,
|
|
336
|
+
auth_token=auth_token,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
import json
|
|
340
|
+
|
|
341
|
+
return json.dumps(stats_data, indent=2)
|
|
342
|
+
except Exception as e:
|
|
343
|
+
return str(e)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
@mcp.tool()
|
|
347
|
+
def get_dag_stats() -> str:
|
|
348
|
+
"""Get statistics about DAG runs across all DAGs (success/failure counts by state).
|
|
349
|
+
|
|
350
|
+
Use this tool when the user asks about:
|
|
351
|
+
- "What's the overall health of my DAGs?" or "Show me DAG statistics"
|
|
352
|
+
- "How many DAG runs succeeded/failed?" or "What's the success rate?"
|
|
353
|
+
- "Give me a summary of DAG run states"
|
|
354
|
+
- "How many runs are currently running/queued?"
|
|
355
|
+
|
|
356
|
+
Returns statistics showing counts of DAG runs grouped by state:
|
|
357
|
+
- success: Number of successful runs
|
|
358
|
+
- failed: Number of failed runs
|
|
359
|
+
- running: Number of currently running runs
|
|
360
|
+
- queued: Number of queued runs
|
|
361
|
+
- And other possible states
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
JSON with DAG run statistics organized by DAG and state
|
|
365
|
+
"""
|
|
366
|
+
return _get_dag_stats_impl(
|
|
367
|
+
airflow_url=_config.url,
|
|
368
|
+
auth_token=_config.auth_token,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _list_dag_warnings_impl(
|
|
373
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
374
|
+
limit: int = DEFAULT_LIMIT,
|
|
375
|
+
offset: int = DEFAULT_OFFSET,
|
|
376
|
+
auth_token: str | None = None,
|
|
377
|
+
) -> str:
|
|
378
|
+
"""Internal implementation for listing DAG warnings from Airflow.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
382
|
+
limit: Maximum number of warnings to return (default: 100)
|
|
383
|
+
offset: Offset for pagination (default: 0)
|
|
384
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
JSON string containing the list of DAG warnings
|
|
388
|
+
"""
|
|
389
|
+
try:
|
|
390
|
+
params = {"limit": limit, "offset": offset}
|
|
391
|
+
data = _call_airflow_api(
|
|
392
|
+
"dagWarnings",
|
|
393
|
+
airflow_url,
|
|
394
|
+
params,
|
|
395
|
+
auth_token=auth_token,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if "dag_warnings" in data:
|
|
399
|
+
return _wrap_list_response(data["dag_warnings"], "dag_warnings", data)
|
|
400
|
+
else:
|
|
401
|
+
return f"No DAG warnings found. Response: {data}"
|
|
402
|
+
except Exception as e:
|
|
403
|
+
return str(e)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
@mcp.tool()
|
|
407
|
+
def list_dag_warnings() -> str:
|
|
408
|
+
"""Get warnings and issues detected in DAG definitions.
|
|
409
|
+
|
|
410
|
+
Use this tool when the user asks about:
|
|
411
|
+
- "Are there any DAG warnings?" or "Show me DAG issues"
|
|
412
|
+
- "What problems exist with my DAGs?" or "Any DAG errors?"
|
|
413
|
+
- "Check DAG health" or "Show me DAG validation warnings"
|
|
414
|
+
- "What's wrong with my workflows?"
|
|
415
|
+
|
|
416
|
+
Returns warnings about DAG configuration issues including:
|
|
417
|
+
- dag_id: Which DAG has the warning
|
|
418
|
+
- warning_type: Type of warning (e.g., deprecation, configuration issue)
|
|
419
|
+
- message: Description of the warning
|
|
420
|
+
- timestamp: When the warning was detected
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
JSON with list of DAG warnings and their details
|
|
424
|
+
"""
|
|
425
|
+
return _list_dag_warnings_impl(
|
|
426
|
+
airflow_url=_config.url,
|
|
427
|
+
auth_token=_config.auth_token,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _get_task_impl(
|
|
432
|
+
dag_id: str,
|
|
433
|
+
task_id: str,
|
|
434
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
435
|
+
auth_token: str | None = None,
|
|
436
|
+
) -> str:
|
|
437
|
+
"""Internal implementation for getting task details from Airflow.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
dag_id: The ID of the DAG
|
|
441
|
+
task_id: The ID of the task
|
|
442
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
443
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
JSON string containing the task details
|
|
447
|
+
"""
|
|
448
|
+
try:
|
|
449
|
+
endpoint = f"dags/{dag_id}/tasks/{task_id}"
|
|
450
|
+
data = _call_airflow_api(
|
|
451
|
+
endpoint,
|
|
452
|
+
airflow_url,
|
|
453
|
+
auth_token=auth_token,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
import json
|
|
457
|
+
|
|
458
|
+
return json.dumps(data, indent=2)
|
|
459
|
+
except Exception as e:
|
|
460
|
+
return str(e)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _list_tasks_impl(
|
|
464
|
+
dag_id: str,
|
|
465
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
466
|
+
auth_token: str | None = None,
|
|
467
|
+
) -> str:
|
|
468
|
+
"""Internal implementation for listing tasks in a DAG from Airflow.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
dag_id: The ID of the DAG to list tasks for
|
|
472
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
473
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
JSON string containing the list of tasks with their metadata
|
|
477
|
+
"""
|
|
478
|
+
try:
|
|
479
|
+
endpoint = f"dags/{dag_id}/tasks"
|
|
480
|
+
data = _call_airflow_api(
|
|
481
|
+
endpoint,
|
|
482
|
+
airflow_url,
|
|
483
|
+
auth_token=auth_token,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
if "tasks" in data:
|
|
487
|
+
return _wrap_list_response(data["tasks"], "tasks", data)
|
|
488
|
+
else:
|
|
489
|
+
return f"No tasks found. Response: {data}"
|
|
490
|
+
except Exception as e:
|
|
491
|
+
return str(e)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _get_task_instance_impl(
|
|
495
|
+
dag_id: str,
|
|
496
|
+
dag_run_id: str,
|
|
497
|
+
task_id: str,
|
|
498
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
499
|
+
auth_token: str | None = None,
|
|
500
|
+
) -> str:
|
|
501
|
+
"""Internal implementation for getting task instance details from Airflow.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
dag_id: The ID of the DAG
|
|
505
|
+
dag_run_id: The ID of the DAG run
|
|
506
|
+
task_id: The ID of the task
|
|
507
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
508
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
JSON string containing the task instance details
|
|
512
|
+
"""
|
|
513
|
+
try:
|
|
514
|
+
# Using dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id} endpoint
|
|
515
|
+
endpoint = f"dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}"
|
|
516
|
+
data = _call_airflow_api(
|
|
517
|
+
endpoint,
|
|
518
|
+
airflow_url,
|
|
519
|
+
auth_token=auth_token,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
import json
|
|
523
|
+
|
|
524
|
+
return json.dumps(data, indent=2)
|
|
525
|
+
except Exception as e:
|
|
526
|
+
return str(e)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
@mcp.tool()
|
|
530
|
+
def get_task(dag_id: str, task_id: str) -> str:
|
|
531
|
+
"""Get detailed information about a specific task definition in a DAG.
|
|
532
|
+
|
|
533
|
+
Use this tool when the user asks about:
|
|
534
|
+
- "Show me details for task X in DAG Y" or "What does task Z do?"
|
|
535
|
+
- "What operator does task A use?" or "What's the configuration of task B?"
|
|
536
|
+
- "Tell me about task C" or "Get task definition for D"
|
|
537
|
+
- "What are the dependencies of task E?" or "Which tasks does F depend on?"
|
|
538
|
+
|
|
539
|
+
Returns task definition information including:
|
|
540
|
+
- task_id: Unique identifier for the task
|
|
541
|
+
- task_display_name: Human-readable display name
|
|
542
|
+
- owner: Who owns this task
|
|
543
|
+
- start_date: When this task becomes active
|
|
544
|
+
- end_date: When this task becomes inactive (if set)
|
|
545
|
+
- trigger_rule: When this task should run (all_success, one_failed, etc.)
|
|
546
|
+
- depends_on_past: Whether task depends on previous run's success
|
|
547
|
+
- wait_for_downstream: Whether to wait for downstream tasks
|
|
548
|
+
- retries: Number of retry attempts
|
|
549
|
+
- retry_delay: Time between retries
|
|
550
|
+
- execution_timeout: Maximum execution time
|
|
551
|
+
- operator_name: Type of operator (PythonOperator, BashOperator, etc.)
|
|
552
|
+
- pool: Resource pool assignment
|
|
553
|
+
- queue: Queue for executor
|
|
554
|
+
- downstream_task_ids: List of tasks that depend on this task
|
|
555
|
+
- upstream_task_ids: List of tasks this task depends on
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
dag_id: The ID of the DAG containing the task
|
|
559
|
+
task_id: The ID of the task to get details for
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
JSON with complete task definition details
|
|
563
|
+
"""
|
|
564
|
+
return _get_task_impl(
|
|
565
|
+
dag_id=dag_id,
|
|
566
|
+
task_id=task_id,
|
|
567
|
+
airflow_url=_config.url,
|
|
568
|
+
auth_token=_config.auth_token,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@mcp.tool()
|
|
573
|
+
def list_tasks(dag_id: str) -> str:
|
|
574
|
+
"""Get all tasks defined in a specific DAG.
|
|
575
|
+
|
|
576
|
+
Use this tool when the user asks about:
|
|
577
|
+
- "What tasks are in DAG X?" or "List all tasks for DAG Y"
|
|
578
|
+
- "Show me the tasks in this workflow" or "What's in the DAG?"
|
|
579
|
+
- "What are the steps in DAG Z?" or "Show me the task structure"
|
|
580
|
+
- "What does this DAG do?" or "Explain the workflow steps"
|
|
581
|
+
|
|
582
|
+
Returns information about all tasks in the DAG including:
|
|
583
|
+
- task_id: Unique identifier for the task
|
|
584
|
+
- task_display_name: Human-readable display name
|
|
585
|
+
- owner: Who owns this task
|
|
586
|
+
- operator_name: Type of operator (PythonOperator, BashOperator, etc.)
|
|
587
|
+
- start_date: When this task becomes active
|
|
588
|
+
- end_date: When this task becomes inactive (if set)
|
|
589
|
+
- trigger_rule: When this task should run
|
|
590
|
+
- retries: Number of retry attempts
|
|
591
|
+
- pool: Resource pool assignment
|
|
592
|
+
- downstream_task_ids: List of tasks that depend on this task
|
|
593
|
+
- upstream_task_ids: List of tasks this task depends on
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
dag_id: The ID of the DAG to list tasks for
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
JSON with list of all tasks in the DAG and their configurations
|
|
600
|
+
"""
|
|
601
|
+
return _list_tasks_impl(
|
|
602
|
+
dag_id=dag_id,
|
|
603
|
+
airflow_url=_config.url,
|
|
604
|
+
auth_token=_config.auth_token,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
@mcp.tool()
|
|
609
|
+
def get_task_instance(dag_id: str, dag_run_id: str, task_id: str) -> str:
|
|
610
|
+
"""Get detailed information about a specific task instance execution.
|
|
611
|
+
|
|
612
|
+
Use this tool when the user asks about:
|
|
613
|
+
- "Show me details for task X in DAG run Y" or "What's the status of task Z?"
|
|
614
|
+
- "Why did task A fail?" or "When did task B start/finish?"
|
|
615
|
+
- "What's the duration of task C?" or "Show me task execution details"
|
|
616
|
+
- "Get logs for task D" or "What operator does task E use?"
|
|
617
|
+
|
|
618
|
+
Returns detailed task instance information including:
|
|
619
|
+
- task_id: Name of the task
|
|
620
|
+
- state: Current state (success, failed, running, queued, etc.)
|
|
621
|
+
- start_date: When the task started
|
|
622
|
+
- end_date: When the task finished
|
|
623
|
+
- duration: How long the task ran
|
|
624
|
+
- try_number: Which attempt this is
|
|
625
|
+
- max_tries: Maximum retry attempts
|
|
626
|
+
- operator: What operator type (PythonOperator, BashOperator, etc.)
|
|
627
|
+
- executor_config: Executor configuration
|
|
628
|
+
- pool: Resource pool assignment
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
dag_id: The ID of the DAG
|
|
632
|
+
dag_run_id: The ID of the DAG run (e.g., "manual__2024-01-01T00:00:00+00:00")
|
|
633
|
+
task_id: The ID of the task within the DAG
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
JSON with complete task instance details
|
|
637
|
+
"""
|
|
638
|
+
return _get_task_instance_impl(
|
|
639
|
+
dag_id=dag_id,
|
|
640
|
+
dag_run_id=dag_run_id,
|
|
641
|
+
task_id=task_id,
|
|
642
|
+
airflow_url=_config.url,
|
|
643
|
+
auth_token=_config.auth_token,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _list_dag_runs_impl(
|
|
648
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
649
|
+
limit: int = DEFAULT_LIMIT,
|
|
650
|
+
offset: int = DEFAULT_OFFSET,
|
|
651
|
+
auth_token: str | None = None,
|
|
652
|
+
) -> str:
|
|
653
|
+
"""Internal implementation for listing DAG runs from Airflow.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
657
|
+
limit: Maximum number of DAG runs to return (default: 100)
|
|
658
|
+
offset: Offset for pagination (default: 0)
|
|
659
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
JSON string containing the list of DAG runs with their metadata
|
|
663
|
+
"""
|
|
664
|
+
try:
|
|
665
|
+
# Using ~/dagRuns to get runs across all DAGs
|
|
666
|
+
params = {"limit": limit, "offset": offset}
|
|
667
|
+
data = _call_airflow_api(
|
|
668
|
+
"dags/~/dagRuns",
|
|
669
|
+
airflow_url,
|
|
670
|
+
params,
|
|
671
|
+
auth_token=auth_token,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
if "dag_runs" in data:
|
|
675
|
+
return _wrap_list_response(data["dag_runs"], "dag_runs", data)
|
|
676
|
+
else:
|
|
677
|
+
return f"No DAG runs found. Response: {data}"
|
|
678
|
+
except Exception as e:
|
|
679
|
+
return str(e)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
@mcp.tool()
|
|
683
|
+
def list_dag_runs() -> str:
|
|
684
|
+
"""Get execution history and status of DAG runs (workflow executions).
|
|
685
|
+
|
|
686
|
+
Use this tool when the user asks about:
|
|
687
|
+
- "What DAG runs have executed?" or "Show me recent runs"
|
|
688
|
+
- "Which runs failed/succeeded?"
|
|
689
|
+
- "What's the status of my workflows?"
|
|
690
|
+
- "When did DAG X last run?"
|
|
691
|
+
- Execution times, durations, or states
|
|
692
|
+
- Finding runs by date or status
|
|
693
|
+
|
|
694
|
+
Returns execution metadata including:
|
|
695
|
+
- dag_run_id: Unique identifier for this execution
|
|
696
|
+
- dag_id: Which DAG this run belongs to
|
|
697
|
+
- state: Current state (running, success, failed, queued)
|
|
698
|
+
- execution_date: When this run was scheduled to execute
|
|
699
|
+
- start_date: When execution actually started
|
|
700
|
+
- end_date: When execution completed (if finished)
|
|
701
|
+
- run_type: manual, scheduled, or backfill
|
|
702
|
+
- conf: Configuration passed to this run
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
JSON with list of DAG runs across all DAGs, sorted by most recent
|
|
706
|
+
"""
|
|
707
|
+
return _list_dag_runs_impl(
|
|
708
|
+
airflow_url=_config.url,
|
|
709
|
+
auth_token=_config.auth_token,
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def _list_assets_impl(
|
|
714
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
715
|
+
limit: int = DEFAULT_LIMIT,
|
|
716
|
+
offset: int = DEFAULT_OFFSET,
|
|
717
|
+
auth_token: str | None = None,
|
|
718
|
+
) -> str:
|
|
719
|
+
"""Internal implementation for listing assets from Airflow.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
723
|
+
limit: Maximum number of assets to return (default: 100)
|
|
724
|
+
offset: Offset for pagination (default: 0)
|
|
725
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
JSON string containing the list of assets with their metadata
|
|
729
|
+
"""
|
|
730
|
+
try:
|
|
731
|
+
params = {"limit": limit, "offset": offset}
|
|
732
|
+
data = _call_airflow_api(
|
|
733
|
+
"assets",
|
|
734
|
+
airflow_url,
|
|
735
|
+
params,
|
|
736
|
+
auth_token=auth_token,
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
if "assets" in data:
|
|
740
|
+
return _wrap_list_response(data["assets"], "assets", data)
|
|
741
|
+
else:
|
|
742
|
+
return f"No assets found. Response: {data}"
|
|
743
|
+
except Exception as e:
|
|
744
|
+
return str(e)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
@mcp.tool()
|
|
748
|
+
def list_assets() -> str:
|
|
749
|
+
"""Get data assets and datasets tracked by Airflow (data lineage).
|
|
750
|
+
|
|
751
|
+
Use this tool when the user asks about:
|
|
752
|
+
- "What datasets exist?" or "List all assets"
|
|
753
|
+
- "What data does this DAG produce/consume?"
|
|
754
|
+
- "Show me data dependencies" or "What's the data lineage?"
|
|
755
|
+
- "Which DAGs use dataset X?"
|
|
756
|
+
- Data freshness or update events
|
|
757
|
+
|
|
758
|
+
Assets represent datasets or files that DAGs produce or consume.
|
|
759
|
+
This enables data-driven scheduling where DAGs wait for data availability.
|
|
760
|
+
|
|
761
|
+
Returns asset information including:
|
|
762
|
+
- uri: Unique identifier for the asset (e.g., s3://bucket/path)
|
|
763
|
+
- id: Internal asset ID
|
|
764
|
+
- created_at: When this asset was first registered
|
|
765
|
+
- updated_at: When this asset was last updated
|
|
766
|
+
- consuming_dags: Which DAGs depend on this asset
|
|
767
|
+
- producing_tasks: Which tasks create/update this asset
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
JSON with list of all assets and their producing/consuming relationships
|
|
771
|
+
"""
|
|
772
|
+
return _list_assets_impl(
|
|
773
|
+
airflow_url=_config.url,
|
|
774
|
+
auth_token=_config.auth_token,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _list_connections_impl(
|
|
779
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
780
|
+
limit: int = DEFAULT_LIMIT,
|
|
781
|
+
offset: int = DEFAULT_OFFSET,
|
|
782
|
+
auth_token: str | None = None,
|
|
783
|
+
) -> str:
|
|
784
|
+
"""Internal implementation for listing connections from Airflow.
|
|
785
|
+
|
|
786
|
+
NOTE: This endpoint uses explicit field filtering (unlike other endpoints)
|
|
787
|
+
to exclude sensitive information like passwords for security reasons.
|
|
788
|
+
|
|
789
|
+
Args:
|
|
790
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
791
|
+
limit: Maximum number of connections to return (default: 100)
|
|
792
|
+
offset: Offset for pagination (default: 0)
|
|
793
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
794
|
+
|
|
795
|
+
Returns:
|
|
796
|
+
JSON string containing the list of connections with their metadata
|
|
797
|
+
"""
|
|
798
|
+
import json
|
|
799
|
+
|
|
800
|
+
try:
|
|
801
|
+
params = {"limit": limit, "offset": offset}
|
|
802
|
+
data = _call_airflow_api(
|
|
803
|
+
"connections",
|
|
804
|
+
airflow_url,
|
|
805
|
+
params,
|
|
806
|
+
auth_token=auth_token,
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
if "connections" in data:
|
|
810
|
+
connections = data["connections"]
|
|
811
|
+
total_entries = data.get("total_entries", len(connections))
|
|
812
|
+
|
|
813
|
+
# SECURITY: Explicitly filter sensitive fields (password)
|
|
814
|
+
# We cannot use pass-through here as we must prevent password exposure
|
|
815
|
+
filtered_connections = [
|
|
816
|
+
{
|
|
817
|
+
"connection_id": conn.get("connection_id"),
|
|
818
|
+
"conn_type": conn.get("conn_type"),
|
|
819
|
+
"description": conn.get("description"),
|
|
820
|
+
"host": conn.get("host"),
|
|
821
|
+
"port": conn.get("port"),
|
|
822
|
+
"schema": conn.get("schema"),
|
|
823
|
+
"login": conn.get("login"),
|
|
824
|
+
"extra": conn.get("extra"),
|
|
825
|
+
# password is intentionally excluded
|
|
826
|
+
}
|
|
827
|
+
for conn in connections
|
|
828
|
+
]
|
|
829
|
+
|
|
830
|
+
result = {
|
|
831
|
+
"total_connections": total_entries,
|
|
832
|
+
"returned_count": len(filtered_connections),
|
|
833
|
+
"connections": filtered_connections,
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
return json.dumps(result, indent=2)
|
|
837
|
+
else:
|
|
838
|
+
return f"No connections found. Response: {data}"
|
|
839
|
+
except Exception as e:
|
|
840
|
+
return str(e)
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
@mcp.tool()
|
|
844
|
+
def list_connections() -> str:
|
|
845
|
+
"""Get connection configurations for external systems (databases, APIs, services).
|
|
846
|
+
|
|
847
|
+
Use this tool when the user asks about:
|
|
848
|
+
- "What connections are configured?" or "List all connections"
|
|
849
|
+
- "How do I connect to database X?"
|
|
850
|
+
- "What's the connection string for Y?"
|
|
851
|
+
- "Which databases/services are available?"
|
|
852
|
+
- Finding connection details by name or type
|
|
853
|
+
|
|
854
|
+
Connections store credentials and connection info for external systems
|
|
855
|
+
that DAGs interact with (databases, S3, APIs, etc.).
|
|
856
|
+
|
|
857
|
+
Returns connection metadata including:
|
|
858
|
+
- connection_id: Unique name for this connection
|
|
859
|
+
- conn_type: Type (postgres, mysql, s3, http, etc.)
|
|
860
|
+
- description: Human-readable description
|
|
861
|
+
- host: Server hostname or IP
|
|
862
|
+
- port: Port number
|
|
863
|
+
- schema: Database schema or path
|
|
864
|
+
- login: Username (passwords excluded for security)
|
|
865
|
+
- extra: Additional connection parameters as JSON
|
|
866
|
+
|
|
867
|
+
IMPORTANT: Passwords are NEVER returned for security reasons.
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
JSON with list of all connections (credentials excluded)
|
|
871
|
+
"""
|
|
872
|
+
return _list_connections_impl(
|
|
873
|
+
airflow_url=_config.url,
|
|
874
|
+
auth_token=_config.auth_token,
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def _get_variable_impl(
|
|
879
|
+
variable_key: str,
|
|
880
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
881
|
+
auth_token: str | None = None,
|
|
882
|
+
) -> str:
|
|
883
|
+
"""Internal implementation for getting a specific variable from Airflow.
|
|
884
|
+
|
|
885
|
+
Args:
|
|
886
|
+
variable_key: The key of the variable to get
|
|
887
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
888
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
889
|
+
|
|
890
|
+
Returns:
|
|
891
|
+
JSON string containing the variable details
|
|
892
|
+
"""
|
|
893
|
+
try:
|
|
894
|
+
data = _call_airflow_api(
|
|
895
|
+
f"variables/{variable_key}",
|
|
896
|
+
airflow_url,
|
|
897
|
+
auth_token=auth_token,
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
import json
|
|
901
|
+
|
|
902
|
+
return json.dumps(data, indent=2)
|
|
903
|
+
except Exception as e:
|
|
904
|
+
return str(e)
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def _list_variables_impl(
|
|
908
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
909
|
+
limit: int = DEFAULT_LIMIT,
|
|
910
|
+
offset: int = DEFAULT_OFFSET,
|
|
911
|
+
auth_token: str | None = None,
|
|
912
|
+
) -> str:
|
|
913
|
+
"""Internal implementation for listing variables from Airflow.
|
|
914
|
+
|
|
915
|
+
Args:
|
|
916
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
917
|
+
limit: Maximum number of variables to return (default: 100)
|
|
918
|
+
offset: Offset for pagination (default: 0)
|
|
919
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
JSON string containing the list of variables with their metadata
|
|
923
|
+
"""
|
|
924
|
+
try:
|
|
925
|
+
params = {"limit": limit, "offset": offset}
|
|
926
|
+
data = _call_airflow_api(
|
|
927
|
+
"variables",
|
|
928
|
+
airflow_url,
|
|
929
|
+
params,
|
|
930
|
+
auth_token=auth_token,
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
if "variables" in data:
|
|
934
|
+
return _wrap_list_response(data["variables"], "variables", data)
|
|
935
|
+
else:
|
|
936
|
+
return f"No variables found. Response: {data}"
|
|
937
|
+
except Exception as e:
|
|
938
|
+
return str(e)
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
def _get_version_impl(
|
|
942
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
943
|
+
auth_token: str | None = None,
|
|
944
|
+
) -> str:
|
|
945
|
+
"""Internal implementation for getting Airflow version information.
|
|
946
|
+
|
|
947
|
+
Args:
|
|
948
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
949
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
950
|
+
|
|
951
|
+
Returns:
|
|
952
|
+
JSON string containing the Airflow version information
|
|
953
|
+
"""
|
|
954
|
+
try:
|
|
955
|
+
data = _call_airflow_api(
|
|
956
|
+
"version",
|
|
957
|
+
airflow_url,
|
|
958
|
+
auth_token=auth_token,
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
import json
|
|
962
|
+
|
|
963
|
+
return json.dumps(data, indent=2)
|
|
964
|
+
except Exception as e:
|
|
965
|
+
return str(e)
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _get_config_impl(
|
|
969
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
970
|
+
auth_token: str | None = None,
|
|
971
|
+
) -> str:
|
|
972
|
+
"""Internal implementation for getting Airflow configuration.
|
|
973
|
+
|
|
974
|
+
Args:
|
|
975
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
976
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
977
|
+
|
|
978
|
+
Returns:
|
|
979
|
+
JSON string containing the Airflow configuration organized by sections
|
|
980
|
+
"""
|
|
981
|
+
import json
|
|
982
|
+
|
|
983
|
+
try:
|
|
984
|
+
data = _call_airflow_api(
|
|
985
|
+
"config",
|
|
986
|
+
airflow_url,
|
|
987
|
+
auth_token=auth_token,
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
if "sections" in data:
|
|
991
|
+
# Add summary metadata and pass through sections
|
|
992
|
+
result = {"total_sections": len(data["sections"]), "sections": data["sections"]}
|
|
993
|
+
return json.dumps(result, indent=2)
|
|
994
|
+
else:
|
|
995
|
+
return f"No configuration found. Response: {data}"
|
|
996
|
+
except Exception as e:
|
|
997
|
+
return str(e)
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
def _get_pool_impl(
|
|
1001
|
+
pool_name: str,
|
|
1002
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
1003
|
+
auth_token: str | None = None,
|
|
1004
|
+
) -> str:
|
|
1005
|
+
"""Internal implementation for getting details about a specific pool.
|
|
1006
|
+
|
|
1007
|
+
Args:
|
|
1008
|
+
pool_name: The name of the pool to get details for
|
|
1009
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
1010
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
1011
|
+
|
|
1012
|
+
Returns:
|
|
1013
|
+
JSON string containing the pool details
|
|
1014
|
+
"""
|
|
1015
|
+
try:
|
|
1016
|
+
data = _call_airflow_api(
|
|
1017
|
+
f"pools/{pool_name}",
|
|
1018
|
+
airflow_url,
|
|
1019
|
+
auth_token=auth_token,
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
import json
|
|
1023
|
+
|
|
1024
|
+
return json.dumps(data, indent=2)
|
|
1025
|
+
except Exception as e:
|
|
1026
|
+
return str(e)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def _list_pools_impl(
|
|
1030
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
1031
|
+
limit: int = DEFAULT_LIMIT,
|
|
1032
|
+
offset: int = DEFAULT_OFFSET,
|
|
1033
|
+
auth_token: str | None = None,
|
|
1034
|
+
) -> str:
|
|
1035
|
+
"""Internal implementation for listing pools from Airflow.
|
|
1036
|
+
|
|
1037
|
+
Args:
|
|
1038
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
1039
|
+
limit: Maximum number of pools to return (default: 100)
|
|
1040
|
+
offset: Offset for pagination (default: 0)
|
|
1041
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
1042
|
+
|
|
1043
|
+
Returns:
|
|
1044
|
+
JSON string containing the list of pools with their metadata
|
|
1045
|
+
"""
|
|
1046
|
+
try:
|
|
1047
|
+
params = {"limit": limit, "offset": offset}
|
|
1048
|
+
data = _call_airflow_api(
|
|
1049
|
+
"pools",
|
|
1050
|
+
airflow_url,
|
|
1051
|
+
params,
|
|
1052
|
+
auth_token=auth_token,
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
if "pools" in data:
|
|
1056
|
+
return _wrap_list_response(data["pools"], "pools", data)
|
|
1057
|
+
else:
|
|
1058
|
+
return f"No pools found. Response: {data}"
|
|
1059
|
+
except Exception as e:
|
|
1060
|
+
return str(e)
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def _list_plugins_impl(
|
|
1064
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
1065
|
+
limit: int = DEFAULT_LIMIT,
|
|
1066
|
+
offset: int = DEFAULT_OFFSET,
|
|
1067
|
+
auth_token: str | None = None,
|
|
1068
|
+
) -> str:
|
|
1069
|
+
"""Internal implementation for listing installed plugins from Airflow.
|
|
1070
|
+
|
|
1071
|
+
Args:
|
|
1072
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
1073
|
+
limit: Maximum number of plugins to return (default: 100)
|
|
1074
|
+
offset: Offset for pagination (default: 0)
|
|
1075
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
1076
|
+
|
|
1077
|
+
Returns:
|
|
1078
|
+
JSON string containing the list of installed plugins
|
|
1079
|
+
"""
|
|
1080
|
+
try:
|
|
1081
|
+
params = {"limit": limit, "offset": offset}
|
|
1082
|
+
data = _call_airflow_api(
|
|
1083
|
+
"plugins",
|
|
1084
|
+
airflow_url,
|
|
1085
|
+
params,
|
|
1086
|
+
auth_token=auth_token,
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
if "plugins" in data:
|
|
1090
|
+
return _wrap_list_response(data["plugins"], "plugins", data)
|
|
1091
|
+
else:
|
|
1092
|
+
return f"No plugins found. Response: {data}"
|
|
1093
|
+
except Exception as e:
|
|
1094
|
+
return str(e)
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
def _list_providers_impl(
|
|
1098
|
+
airflow_url: str = DEFAULT_AIRFLOW_URL,
|
|
1099
|
+
auth_token: str | None = None,
|
|
1100
|
+
) -> str:
|
|
1101
|
+
"""Internal implementation for listing installed providers from Airflow.
|
|
1102
|
+
|
|
1103
|
+
Args:
|
|
1104
|
+
airflow_url: The base URL of the Airflow webserver (default: http://localhost:8080)
|
|
1105
|
+
auth_token: Optional Bearer token for token-based authentication
|
|
1106
|
+
|
|
1107
|
+
Returns:
|
|
1108
|
+
JSON string containing the list of installed providers
|
|
1109
|
+
"""
|
|
1110
|
+
try:
|
|
1111
|
+
data = _call_airflow_api(
|
|
1112
|
+
"providers",
|
|
1113
|
+
airflow_url,
|
|
1114
|
+
auth_token=auth_token,
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
if "providers" in data:
|
|
1118
|
+
return _wrap_list_response(data["providers"], "providers", data)
|
|
1119
|
+
else:
|
|
1120
|
+
return f"No providers found. Response: {data}"
|
|
1121
|
+
except Exception as e:
|
|
1122
|
+
return str(e)
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
@mcp.tool()
|
|
1126
|
+
def get_pool(pool_name: str) -> str:
|
|
1127
|
+
"""Get detailed information about a specific resource pool.
|
|
1128
|
+
|
|
1129
|
+
Use this tool when the user asks about:
|
|
1130
|
+
- "Show me details for pool X" or "What's the status of pool Y?"
|
|
1131
|
+
- "How many slots are available in pool Z?" or "Is pool X full?"
|
|
1132
|
+
- "What's using pool Y?" or "How many tasks are running in pool X?"
|
|
1133
|
+
- "Get information about the default_pool" or "Show me pool details"
|
|
1134
|
+
|
|
1135
|
+
Pools are used to limit parallelism for specific sets of tasks. This returns
|
|
1136
|
+
detailed real-time information about a specific pool's capacity and utilization.
|
|
1137
|
+
|
|
1138
|
+
Returns detailed pool information including:
|
|
1139
|
+
- name: Name of the pool
|
|
1140
|
+
- slots: Total number of available slots in the pool
|
|
1141
|
+
- occupied_slots: Number of currently occupied slots (running + queued)
|
|
1142
|
+
- running_slots: Number of slots with currently running tasks
|
|
1143
|
+
- queued_slots: Number of slots with queued tasks waiting to run
|
|
1144
|
+
- open_slots: Number of available slots (slots - occupied_slots)
|
|
1145
|
+
- description: Human-readable description of the pool's purpose
|
|
1146
|
+
|
|
1147
|
+
Args:
|
|
1148
|
+
pool_name: The name of the pool to get details for (e.g., "default_pool")
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
JSON with complete details about the specified pool
|
|
1152
|
+
"""
|
|
1153
|
+
return _get_pool_impl(
|
|
1154
|
+
pool_name=pool_name,
|
|
1155
|
+
airflow_url=_config.url,
|
|
1156
|
+
auth_token=_config.auth_token,
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1159
|
+
|
|
1160
|
+
@mcp.tool()
|
|
1161
|
+
def list_pools() -> str:
|
|
1162
|
+
"""Get resource pools for managing task concurrency and resource allocation.
|
|
1163
|
+
|
|
1164
|
+
Use this tool when the user asks about:
|
|
1165
|
+
- "What pools are configured?" or "List all pools"
|
|
1166
|
+
- "Show me the resource pools" or "What pools exist?"
|
|
1167
|
+
- "How many slots does pool X have?" or "What's the pool capacity?"
|
|
1168
|
+
- "Which pools are available?" or "What's the pool configuration?"
|
|
1169
|
+
|
|
1170
|
+
Pools are used to limit parallelism for specific sets of tasks. Each pool
|
|
1171
|
+
has a certain number of slots, and tasks assigned to a pool will only run
|
|
1172
|
+
if there are available slots. This is useful for limiting concurrent access
|
|
1173
|
+
to resources like databases or external APIs.
|
|
1174
|
+
|
|
1175
|
+
Returns pool information including:
|
|
1176
|
+
- name: Name of the pool
|
|
1177
|
+
- slots: Total number of available slots in the pool
|
|
1178
|
+
- occupied_slots: Number of currently occupied slots
|
|
1179
|
+
- running_slots: Number of slots with running tasks
|
|
1180
|
+
- queued_slots: Number of slots with queued tasks
|
|
1181
|
+
- open_slots: Number of available slots (slots - occupied_slots)
|
|
1182
|
+
- description: Human-readable description of the pool's purpose
|
|
1183
|
+
|
|
1184
|
+
Returns:
|
|
1185
|
+
JSON with list of all pools and their current utilization
|
|
1186
|
+
"""
|
|
1187
|
+
return _list_pools_impl(
|
|
1188
|
+
airflow_url=_config.url,
|
|
1189
|
+
auth_token=_config.auth_token,
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
@mcp.tool()
|
|
1194
|
+
def list_plugins() -> str:
|
|
1195
|
+
"""Get information about installed Airflow plugins.
|
|
1196
|
+
|
|
1197
|
+
Use this tool when the user asks about:
|
|
1198
|
+
- "What plugins are installed?" or "List all plugins"
|
|
1199
|
+
- "Show me the plugins" or "Which plugins are enabled?"
|
|
1200
|
+
- "Is plugin X installed?" or "Do we have any custom plugins?"
|
|
1201
|
+
- "What's in the plugins directory?"
|
|
1202
|
+
|
|
1203
|
+
Plugins extend Airflow functionality by adding custom operators, hooks,
|
|
1204
|
+
views, menu items, or other components. This returns information about
|
|
1205
|
+
all plugins discovered by Airflow's plugin system.
|
|
1206
|
+
|
|
1207
|
+
Returns information about installed plugins including:
|
|
1208
|
+
- name: Name of the plugin
|
|
1209
|
+
- hooks: Custom hooks provided by the plugin
|
|
1210
|
+
- executors: Custom executors provided by the plugin
|
|
1211
|
+
- macros: Custom macros provided by the plugin
|
|
1212
|
+
- flask_blueprints: Flask blueprints for custom UI pages
|
|
1213
|
+
- appbuilder_views: Flask-AppBuilder views for admin interface
|
|
1214
|
+
- appbuilder_menu_items: Custom menu items in the UI
|
|
1215
|
+
|
|
1216
|
+
Returns:
|
|
1217
|
+
JSON with list of all installed plugins and their components
|
|
1218
|
+
"""
|
|
1219
|
+
return _list_plugins_impl(
|
|
1220
|
+
airflow_url=_config.url,
|
|
1221
|
+
auth_token=_config.auth_token,
|
|
1222
|
+
)
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
@mcp.tool()
|
|
1226
|
+
def list_providers() -> str:
|
|
1227
|
+
"""Get information about installed Airflow provider packages.
|
|
1228
|
+
|
|
1229
|
+
Use this tool when the user asks about:
|
|
1230
|
+
- "What providers are installed?" or "List all providers"
|
|
1231
|
+
- "What integrations are available?" or "Show me installed packages"
|
|
1232
|
+
- "Do we have the AWS provider?" or "Is the Snowflake provider installed?"
|
|
1233
|
+
- "What version of provider X is installed?"
|
|
1234
|
+
|
|
1235
|
+
Returns information about installed provider packages including:
|
|
1236
|
+
- package_name: Name of the provider package (e.g., "apache-airflow-providers-amazon")
|
|
1237
|
+
- version: Version of the provider package
|
|
1238
|
+
- description: What the provider does
|
|
1239
|
+
- provider_info: Details about operators, hooks, and sensors included
|
|
1240
|
+
|
|
1241
|
+
Returns:
|
|
1242
|
+
JSON with list of all installed provider packages and their details
|
|
1243
|
+
"""
|
|
1244
|
+
return _list_providers_impl(
|
|
1245
|
+
airflow_url=_config.url,
|
|
1246
|
+
auth_token=_config.auth_token,
|
|
1247
|
+
)
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
@mcp.tool()
|
|
1251
|
+
def get_variable(variable_key: str) -> str:
|
|
1252
|
+
"""Get a specific Airflow variable by key.
|
|
1253
|
+
|
|
1254
|
+
Use this tool when the user asks about:
|
|
1255
|
+
- "What's the value of variable X?" or "Show me variable Y"
|
|
1256
|
+
- "Get variable Z" or "What does variable A contain?"
|
|
1257
|
+
- "What's stored in variable B?" or "Look up variable C"
|
|
1258
|
+
|
|
1259
|
+
Variables are key-value pairs stored in Airflow's metadata database that
|
|
1260
|
+
can be accessed by DAGs at runtime. They're commonly used for configuration
|
|
1261
|
+
values, API keys, or other settings that need to be shared across DAGs.
|
|
1262
|
+
|
|
1263
|
+
Returns variable information including:
|
|
1264
|
+
- key: The variable's key/name
|
|
1265
|
+
- value: The variable's value (may be masked if marked as sensitive)
|
|
1266
|
+
- description: Optional description of the variable's purpose
|
|
1267
|
+
|
|
1268
|
+
Args:
|
|
1269
|
+
variable_key: The key/name of the variable to retrieve
|
|
1270
|
+
|
|
1271
|
+
Returns:
|
|
1272
|
+
JSON with the variable's key, value, and metadata
|
|
1273
|
+
"""
|
|
1274
|
+
return _get_variable_impl(
|
|
1275
|
+
variable_key=variable_key,
|
|
1276
|
+
airflow_url=_config.url,
|
|
1277
|
+
auth_token=_config.auth_token,
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
|
|
1281
|
+
@mcp.tool()
|
|
1282
|
+
def list_variables() -> str:
|
|
1283
|
+
"""Get all Airflow variables (key-value configuration pairs).
|
|
1284
|
+
|
|
1285
|
+
Use this tool when the user asks about:
|
|
1286
|
+
- "What variables are configured?" or "List all variables"
|
|
1287
|
+
- "Show me the variables" or "What variables exist?"
|
|
1288
|
+
- "What configuration variables are available?"
|
|
1289
|
+
- "Show me all variable keys"
|
|
1290
|
+
|
|
1291
|
+
Variables are key-value pairs stored in Airflow's metadata database that
|
|
1292
|
+
can be accessed by DAGs at runtime. They're commonly used for configuration
|
|
1293
|
+
values, environment-specific settings, or other data that needs to be
|
|
1294
|
+
shared across DAGs without hardcoding in the DAG files.
|
|
1295
|
+
|
|
1296
|
+
Returns variable information including:
|
|
1297
|
+
- key: The variable's key/name
|
|
1298
|
+
- value: The variable's value (may be masked if marked as sensitive)
|
|
1299
|
+
- description: Optional description of the variable's purpose
|
|
1300
|
+
|
|
1301
|
+
IMPORTANT: Sensitive variables (like passwords, API keys) may have their
|
|
1302
|
+
values masked in the response for security reasons.
|
|
1303
|
+
|
|
1304
|
+
Returns:
|
|
1305
|
+
JSON with list of all variables and their values
|
|
1306
|
+
"""
|
|
1307
|
+
return _list_variables_impl(
|
|
1308
|
+
airflow_url=_config.url,
|
|
1309
|
+
auth_token=_config.auth_token,
|
|
1310
|
+
)
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
@mcp.tool()
|
|
1314
|
+
def get_airflow_version() -> str:
|
|
1315
|
+
"""Get version information for the Airflow instance.
|
|
1316
|
+
|
|
1317
|
+
Use this tool when the user asks about:
|
|
1318
|
+
- "What version of Airflow is running?" or "Show me the Airflow version"
|
|
1319
|
+
- "What's the Airflow version?" or "Which Airflow release is this?"
|
|
1320
|
+
- "What version is installed?" or "Check Airflow version"
|
|
1321
|
+
- "Is this Airflow 2 or 3?" or "What's the version number?"
|
|
1322
|
+
|
|
1323
|
+
Returns version information including:
|
|
1324
|
+
- version: The Airflow version string (e.g., "2.8.0", "3.0.0")
|
|
1325
|
+
- git_version: Git commit hash if available
|
|
1326
|
+
|
|
1327
|
+
This is useful for:
|
|
1328
|
+
- Determining API compatibility
|
|
1329
|
+
- Checking if features are available in this version
|
|
1330
|
+
- Troubleshooting version-specific issues
|
|
1331
|
+
- Verifying upgrade success
|
|
1332
|
+
|
|
1333
|
+
Returns:
|
|
1334
|
+
JSON with Airflow version information
|
|
1335
|
+
"""
|
|
1336
|
+
return _get_version_impl(
|
|
1337
|
+
airflow_url=_config.url,
|
|
1338
|
+
auth_token=_config.auth_token,
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
|
|
1342
|
+
@mcp.tool()
|
|
1343
|
+
def get_airflow_config() -> str:
|
|
1344
|
+
"""Get Airflow instance configuration and settings.
|
|
1345
|
+
|
|
1346
|
+
Use this tool when the user asks about:
|
|
1347
|
+
- "What's the Airflow configuration?" or "Show me Airflow settings"
|
|
1348
|
+
- "What's the executor type?" or "How is Airflow configured?"
|
|
1349
|
+
- "What's the parallelism setting?"
|
|
1350
|
+
- Database connection, logging, or scheduler settings
|
|
1351
|
+
- Finding specific configuration values
|
|
1352
|
+
|
|
1353
|
+
Returns all Airflow configuration organized by sections:
|
|
1354
|
+
- [core]: Basic Airflow settings (executor, dags_folder, parallelism)
|
|
1355
|
+
- [database]: Database connection and settings
|
|
1356
|
+
- [webserver]: Web UI configuration (port, workers, auth)
|
|
1357
|
+
- [scheduler]: Scheduler behavior and intervals
|
|
1358
|
+
- [logging]: Log locations and formatting
|
|
1359
|
+
- [api]: REST API configuration
|
|
1360
|
+
- [operators]: Default operator settings
|
|
1361
|
+
- And many more sections...
|
|
1362
|
+
|
|
1363
|
+
Each setting includes:
|
|
1364
|
+
- key: Configuration parameter name
|
|
1365
|
+
- value: Current value
|
|
1366
|
+
- source: Where the value came from (default, env var, config file)
|
|
1367
|
+
|
|
1368
|
+
Returns:
|
|
1369
|
+
JSON with complete Airflow configuration organized by sections
|
|
1370
|
+
"""
|
|
1371
|
+
return _get_config_impl(
|
|
1372
|
+
airflow_url=_config.url,
|
|
1373
|
+
auth_token=_config.auth_token,
|
|
1374
|
+
)
|