apache-airflow-providers-edge3 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/edge3/__init__.py +3 -3
- airflow/providers/edge3/cli/api_client.py +23 -26
- airflow/providers/edge3/cli/worker.py +9 -28
- airflow/providers/edge3/example_dags/integration_test.py +1 -1
- airflow/providers/edge3/example_dags/win_test.py +32 -22
- airflow/providers/edge3/executors/edge_executor.py +7 -63
- airflow/providers/edge3/models/edge_worker.py +7 -3
- airflow/providers/edge3/plugins/edge_executor_plugin.py +26 -205
- airflow/providers/edge3/plugins/www/dist/main.umd.cjs +8 -8
- airflow/providers/edge3/plugins/www/openapi-gen/queries/common.ts +6 -1
- airflow/providers/edge3/plugins/www/openapi-gen/queries/ensureQueryData.ts +6 -1
- airflow/providers/edge3/plugins/www/openapi-gen/queries/prefetch.ts +6 -1
- airflow/providers/edge3/plugins/www/openapi-gen/queries/queries.ts +6 -2
- airflow/providers/edge3/plugins/www/openapi-gen/queries/suspense.ts +6 -1
- airflow/providers/edge3/plugins/www/openapi-gen/requests/schemas.gen.ts +5 -0
- airflow/providers/edge3/plugins/www/openapi-gen/requests/services.gen.ts +18 -3
- airflow/providers/edge3/plugins/www/openapi-gen/requests/types.gen.ts +24 -0
- airflow/providers/edge3/plugins/www/package.json +17 -15
- airflow/providers/edge3/plugins/www/pnpm-lock.yaml +1194 -1244
- airflow/providers/edge3/plugins/www/src/components/SearchBar.tsx +103 -0
- airflow/providers/edge3/plugins/www/src/components/ui/InputGroup.tsx +57 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/Content.tsx +37 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/Item.tsx +34 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/Root.tsx +24 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/Trigger.tsx +54 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/ValueText.tsx +51 -0
- airflow/providers/edge3/plugins/www/src/components/ui/Select/index.ts +34 -0
- airflow/providers/edge3/plugins/www/src/components/ui/index.ts +3 -0
- airflow/providers/edge3/plugins/www/src/constants.ts +43 -0
- airflow/providers/edge3/plugins/www/src/pages/WorkerPage.tsx +184 -95
- airflow/providers/edge3/version_compat.py +0 -2
- airflow/providers/edge3/worker_api/auth.py +11 -35
- airflow/providers/edge3/worker_api/datamodels.py +3 -2
- airflow/providers/edge3/worker_api/routes/health.py +1 -1
- airflow/providers/edge3/worker_api/routes/jobs.py +10 -11
- airflow/providers/edge3/worker_api/routes/logs.py +5 -8
- airflow/providers/edge3/worker_api/routes/ui.py +14 -3
- airflow/providers/edge3/worker_api/routes/worker.py +19 -12
- airflow/providers/edge3/{openapi → worker_api}/v2-edge-generated.yaml +59 -5
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/METADATA +13 -13
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/RECORD +45 -40
- airflow/providers/edge3/openapi/__init__.py +0 -19
- airflow/providers/edge3/openapi/edge_worker_api_v1.yaml +0 -808
- airflow/providers/edge3/worker_api/routes/_v2_compat.py +0 -136
- airflow/providers/edge3/worker_api/routes/_v2_routes.py +0 -237
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {apache_airflow_providers_edge3-1.5.0.dist-info → apache_airflow_providers_edge3-2.0.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "
|
|
32
|
+
__version__ = "2.0.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
|
-
"
|
|
35
|
+
"3.0.0"
|
|
36
36
|
):
|
|
37
37
|
raise RuntimeError(
|
|
38
|
-
f"The package `apache-airflow-providers-edge3:{__version__}` needs Apache Airflow
|
|
38
|
+
f"The package `apache-airflow-providers-edge3:{__version__}` needs Apache Airflow 3.0.0+"
|
|
39
39
|
)
|
|
@@ -20,6 +20,7 @@ import json
|
|
|
20
20
|
import logging
|
|
21
21
|
import os
|
|
22
22
|
from datetime import datetime
|
|
23
|
+
from functools import cache
|
|
23
24
|
from http import HTTPStatus
|
|
24
25
|
from pathlib import Path
|
|
25
26
|
from typing import TYPE_CHECKING, Any
|
|
@@ -27,11 +28,14 @@ from urllib.parse import quote, urljoin
|
|
|
27
28
|
|
|
28
29
|
import requests
|
|
29
30
|
from retryhttp import retry, wait_retry_after
|
|
30
|
-
from tenacity import
|
|
31
|
+
from tenacity import before_sleep_log, wait_random_exponential
|
|
31
32
|
|
|
33
|
+
from airflow.api_fastapi.auth.tokens import JWTGenerator
|
|
32
34
|
from airflow.configuration import conf
|
|
33
|
-
from airflow.providers.edge3.models.edge_worker import
|
|
34
|
-
|
|
35
|
+
from airflow.providers.edge3.models.edge_worker import (
|
|
36
|
+
EdgeWorkerDuplicateException,
|
|
37
|
+
EdgeWorkerVersionException,
|
|
38
|
+
)
|
|
35
39
|
from airflow.providers.edge3.worker_api.datamodels import (
|
|
36
40
|
EdgeJobFetched,
|
|
37
41
|
PushLogsBody,
|
|
@@ -71,6 +75,15 @@ API_RETRY_WAIT_MAX = float(
|
|
|
71
75
|
_default_wait = wait_random_exponential(min=API_RETRY_WAIT_MIN, max=API_RETRY_WAIT_MAX)
|
|
72
76
|
|
|
73
77
|
|
|
78
|
+
@cache
|
|
79
|
+
def jwt_generator() -> JWTGenerator:
|
|
80
|
+
return JWTGenerator(
|
|
81
|
+
secret_key=conf.get("api_auth", "jwt_secret"),
|
|
82
|
+
valid_for=conf.getint("api_auth", "jwt_leeway", fallback=30),
|
|
83
|
+
audience="api",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
74
87
|
@retry(
|
|
75
88
|
reraise=True,
|
|
76
89
|
max_attempt_number=API_RETRIES,
|
|
@@ -78,31 +91,10 @@ _default_wait = wait_random_exponential(min=API_RETRY_WAIT_MIN, max=API_RETRY_WA
|
|
|
78
91
|
wait_network_errors=_default_wait,
|
|
79
92
|
wait_timeouts=_default_wait,
|
|
80
93
|
wait_rate_limited=wait_retry_after(fallback=_default_wait), # No infinite timeout on HTTP 429
|
|
81
|
-
before_sleep=
|
|
94
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
82
95
|
)
|
|
83
96
|
def _make_generic_request(method: str, rest_path: str, data: str | None = None) -> Any:
|
|
84
|
-
|
|
85
|
-
from functools import cache
|
|
86
|
-
|
|
87
|
-
from airflow.api_fastapi.auth.tokens import JWTGenerator
|
|
88
|
-
|
|
89
|
-
@cache
|
|
90
|
-
def jwt_generator() -> JWTGenerator:
|
|
91
|
-
return JWTGenerator(
|
|
92
|
-
secret_key=conf.get("api_auth", "jwt_secret"),
|
|
93
|
-
valid_for=conf.getint("api_auth", "jwt_leeway", fallback=30),
|
|
94
|
-
audience="api",
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
generator = jwt_generator()
|
|
98
|
-
authorization = generator.generate({"method": rest_path})
|
|
99
|
-
else:
|
|
100
|
-
# Airflow 2.10 compatibility
|
|
101
|
-
from airflow.providers.edge3.worker_api.auth import jwt_signer
|
|
102
|
-
|
|
103
|
-
signer = jwt_signer()
|
|
104
|
-
authorization = signer.generate_signed_token({"method": rest_path})
|
|
105
|
-
|
|
97
|
+
authorization = jwt_generator().generate({"method": rest_path})
|
|
106
98
|
api_url = conf.get("edge", "api_url")
|
|
107
99
|
headers = {
|
|
108
100
|
"Content-Type": "application/json",
|
|
@@ -132,6 +124,11 @@ def worker_register(
|
|
|
132
124
|
except requests.HTTPError as e:
|
|
133
125
|
if e.response.status_code == 400:
|
|
134
126
|
raise EdgeWorkerVersionException(str(e))
|
|
127
|
+
if e.response.status_code == 409:
|
|
128
|
+
raise EdgeWorkerDuplicateException(
|
|
129
|
+
f"A worker with the name '{hostname}' is already active. "
|
|
130
|
+
"Please ensure worker names are unique, or stop the existing worker before starting a new one."
|
|
131
|
+
)
|
|
135
132
|
raise e
|
|
136
133
|
return WorkerRegistrationReturn(**result)
|
|
137
134
|
|
|
@@ -25,7 +25,6 @@ from functools import cache
|
|
|
25
25
|
from http import HTTPStatus
|
|
26
26
|
from multiprocessing import Process
|
|
27
27
|
from pathlib import Path
|
|
28
|
-
from subprocess import Popen
|
|
29
28
|
from time import sleep
|
|
30
29
|
from typing import TYPE_CHECKING
|
|
31
30
|
|
|
@@ -39,7 +38,6 @@ from airflow.providers.edge3 import __version__ as edge_provider_version
|
|
|
39
38
|
from airflow.providers.edge3.cli.api_client import (
|
|
40
39
|
jobs_fetch,
|
|
41
40
|
jobs_set_state,
|
|
42
|
-
logs_logfile_path,
|
|
43
41
|
logs_push,
|
|
44
42
|
worker_register,
|
|
45
43
|
worker_set_state,
|
|
@@ -51,8 +49,11 @@ from airflow.providers.edge3.cli.signalling import (
|
|
|
51
49
|
status_file_path,
|
|
52
50
|
write_pid_to_pidfile,
|
|
53
51
|
)
|
|
54
|
-
from airflow.providers.edge3.models.edge_worker import
|
|
55
|
-
|
|
52
|
+
from airflow.providers.edge3.models.edge_worker import (
|
|
53
|
+
EdgeWorkerDuplicateException,
|
|
54
|
+
EdgeWorkerState,
|
|
55
|
+
EdgeWorkerVersionException,
|
|
56
|
+
)
|
|
56
57
|
from airflow.utils.net import getfqdn
|
|
57
58
|
from airflow.utils.state import TaskInstanceState
|
|
58
59
|
|
|
@@ -214,7 +215,7 @@ class EdgeWorker:
|
|
|
214
215
|
return 1
|
|
215
216
|
|
|
216
217
|
@staticmethod
|
|
217
|
-
def
|
|
218
|
+
def _launch_job(edge_job: EdgeJobFetched):
|
|
218
219
|
if TYPE_CHECKING:
|
|
219
220
|
from airflow.executors.workloads import ExecuteTask
|
|
220
221
|
|
|
@@ -228,29 +229,6 @@ class EdgeWorker:
|
|
|
228
229
|
if TYPE_CHECKING:
|
|
229
230
|
assert workload.log_path # We need to assume this is defined in here
|
|
230
231
|
logfile = Path(base_log_folder, workload.log_path)
|
|
231
|
-
return process, logfile
|
|
232
|
-
|
|
233
|
-
@staticmethod
|
|
234
|
-
def _launch_job_af2_10(edge_job: EdgeJobFetched) -> tuple[Popen, Path]:
|
|
235
|
-
"""Compatibility for Airflow 2.10 Launch."""
|
|
236
|
-
env = os.environ.copy()
|
|
237
|
-
env["AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION"] = "True"
|
|
238
|
-
env["AIRFLOW__CORE__INTERNAL_API_URL"] = conf.get("edge", "api_url")
|
|
239
|
-
env["_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK"] = "1"
|
|
240
|
-
command: list[str] = edge_job.command # type: ignore[assignment]
|
|
241
|
-
process = Popen(command, close_fds=True, env=env, start_new_session=True)
|
|
242
|
-
logfile = logs_logfile_path(edge_job.key)
|
|
243
|
-
return process, logfile
|
|
244
|
-
|
|
245
|
-
@staticmethod
|
|
246
|
-
def _launch_job(edge_job: EdgeJobFetched):
|
|
247
|
-
"""Get the received job executed."""
|
|
248
|
-
process: Popen | Process
|
|
249
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
250
|
-
process, logfile = EdgeWorker._launch_job_af3(edge_job)
|
|
251
|
-
else:
|
|
252
|
-
# Airflow 2.10
|
|
253
|
-
process, logfile = EdgeWorker._launch_job_af2_10(edge_job)
|
|
254
232
|
EdgeWorker.jobs.append(Job(edge_job, process, logfile, 0))
|
|
255
233
|
|
|
256
234
|
def start(self):
|
|
@@ -262,6 +240,9 @@ class EdgeWorker:
|
|
|
262
240
|
except EdgeWorkerVersionException as e:
|
|
263
241
|
logger.info("Version mismatch of Edge worker and Core. Shutting down worker.")
|
|
264
242
|
raise SystemExit(str(e))
|
|
243
|
+
except EdgeWorkerDuplicateException as e:
|
|
244
|
+
logger.error(str(e))
|
|
245
|
+
raise SystemExit(str(e))
|
|
265
246
|
except HTTPError as e:
|
|
266
247
|
if e.response.status_code == HTTPStatus.NOT_FOUND:
|
|
267
248
|
raise SystemExit("Error: API endpoint is not ready, please set [edge] api_enabled=True.")
|
|
@@ -26,7 +26,7 @@ from __future__ import annotations
|
|
|
26
26
|
from datetime import datetime
|
|
27
27
|
from time import sleep
|
|
28
28
|
|
|
29
|
-
from airflow.
|
|
29
|
+
from airflow.providers.common.compat.sdk import AirflowNotFoundException
|
|
30
30
|
|
|
31
31
|
try:
|
|
32
32
|
from airflow.sdk import BaseHook
|
|
@@ -32,42 +32,52 @@ from subprocess import STDOUT, Popen
|
|
|
32
32
|
from time import sleep
|
|
33
33
|
from typing import TYPE_CHECKING, Any
|
|
34
34
|
|
|
35
|
-
try:
|
|
36
|
-
from airflow.sdk import task, task_group
|
|
37
|
-
except ImportError:
|
|
38
|
-
# Airflow 2 path
|
|
39
|
-
from airflow.decorators import task, task_group # type: ignore[attr-defined,no-redef]
|
|
40
|
-
from airflow.exceptions import AirflowException, AirflowNotFoundException, AirflowSkipException
|
|
41
35
|
from airflow.models import BaseOperator
|
|
42
36
|
from airflow.models.dag import DAG
|
|
43
37
|
from airflow.models.variable import Variable
|
|
38
|
+
from airflow.providers.common.compat.sdk import (
|
|
39
|
+
AirflowException,
|
|
40
|
+
AirflowNotFoundException,
|
|
41
|
+
AirflowSkipException,
|
|
42
|
+
)
|
|
44
43
|
from airflow.providers.standard.operators.empty import EmptyOperator
|
|
44
|
+
from airflow.sdk.execution_time.context import context_to_airflow_vars
|
|
45
45
|
|
|
46
|
+
try:
|
|
47
|
+
from airflow.sdk import task, task_group
|
|
48
|
+
except ImportError:
|
|
49
|
+
from airflow.decorators import task, task_group # type: ignore[attr-defined,no-redef]
|
|
46
50
|
try:
|
|
47
51
|
from airflow.sdk import BaseHook
|
|
48
52
|
except ImportError:
|
|
49
53
|
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
|
|
50
|
-
|
|
51
|
-
|
|
54
|
+
try:
|
|
55
|
+
from airflow.sdk import Param
|
|
56
|
+
except ImportError:
|
|
57
|
+
from airflow.models import Param # type: ignore[attr-defined,no-redef]
|
|
52
58
|
try:
|
|
53
59
|
from airflow.sdk import TriggerRule
|
|
54
60
|
except ImportError:
|
|
55
|
-
# Compatibility for Airflow < 3.1
|
|
56
61
|
from airflow.utils.trigger_rule import TriggerRule # type: ignore[no-redef,attr-defined]
|
|
57
|
-
from airflow.sdk.execution_time.context import context_to_airflow_vars
|
|
58
|
-
from airflow.utils.types import ArgNotSet
|
|
59
|
-
|
|
60
|
-
if TYPE_CHECKING:
|
|
61
|
-
try:
|
|
62
|
-
from airflow.sdk.types import RuntimeTaskInstanceProtocol as TaskInstance
|
|
63
|
-
except ImportError:
|
|
64
|
-
from airflow.models import TaskInstance # type: ignore[assignment]
|
|
65
|
-
from airflow.utils.context import Context
|
|
66
|
-
|
|
67
62
|
try:
|
|
68
|
-
from airflow.operators
|
|
63
|
+
from airflow.providers.common.compat.standard.operators import PythonOperator
|
|
64
|
+
except ImportError:
|
|
65
|
+
from airflow.operators.python import PythonOperator # type: ignore[no-redef]
|
|
66
|
+
try:
|
|
67
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
69
68
|
except ImportError:
|
|
70
|
-
from airflow.
|
|
69
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
70
|
+
try:
|
|
71
|
+
from airflow.sdk.definitions._internal.types import is_arg_set
|
|
72
|
+
except ImportError:
|
|
73
|
+
|
|
74
|
+
def is_arg_set(value): # type: ignore[misc,no-redef]
|
|
75
|
+
return value is not NOTSET
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if TYPE_CHECKING:
|
|
79
|
+
from airflow.sdk import Context
|
|
80
|
+
from airflow.sdk.types import RuntimeTaskInstanceProtocol as TaskInstance
|
|
71
81
|
|
|
72
82
|
|
|
73
83
|
class CmdOperator(BaseOperator):
|
|
@@ -163,7 +173,7 @@ class CmdOperator(BaseOperator):
|
|
|
163
173
|
# When using the @task.command decorator, the command is not known until the underlying Python
|
|
164
174
|
# callable is executed and therefore set to NOTSET initially. This flag is useful during execution to
|
|
165
175
|
# determine whether the command value needs to re-rendered.
|
|
166
|
-
self._init_command_not_set =
|
|
176
|
+
self._init_command_not_set = not is_arg_set(self.command)
|
|
167
177
|
|
|
168
178
|
@staticmethod
|
|
169
179
|
def refresh_command(ti: TaskInstance) -> None:
|
|
@@ -29,15 +29,14 @@ from sqlalchemy.orm import Session
|
|
|
29
29
|
|
|
30
30
|
from airflow.cli.cli_config import GroupCommand
|
|
31
31
|
from airflow.configuration import conf
|
|
32
|
+
from airflow.executors import workloads
|
|
32
33
|
from airflow.executors.base_executor import BaseExecutor
|
|
33
34
|
from airflow.models.taskinstance import TaskInstance
|
|
34
|
-
from airflow.providers.common.compat.sdk import timezone
|
|
35
|
+
from airflow.providers.common.compat.sdk import Stats, timezone
|
|
35
36
|
from airflow.providers.edge3.cli.edge_command import EDGE_COMMANDS
|
|
36
37
|
from airflow.providers.edge3.models.edge_job import EdgeJobModel
|
|
37
38
|
from airflow.providers.edge3.models.edge_logs import EdgeLogsModel
|
|
38
39
|
from airflow.providers.edge3.models.edge_worker import EdgeWorkerModel, EdgeWorkerState, reset_metrics
|
|
39
|
-
from airflow.providers.edge3.version_compat import AIRFLOW_V_3_0_PLUS
|
|
40
|
-
from airflow.stats import Stats
|
|
41
40
|
from airflow.utils.db import DBLocks, create_global_lock
|
|
42
41
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
43
42
|
from airflow.utils.state import TaskInstanceState
|
|
@@ -69,8 +68,10 @@ class EdgeExecutor(BaseExecutor):
|
|
|
69
68
|
"""
|
|
70
69
|
Check if already existing table matches the newest table schema.
|
|
71
70
|
|
|
72
|
-
workaround
|
|
71
|
+
workaround as Airflow 2.x had no support for provider DB migrations,
|
|
73
72
|
then it is possible to use alembic also for provider distributions.
|
|
73
|
+
|
|
74
|
+
TODO(jscheffl): Change to alembic DB migrations in the future.
|
|
74
75
|
"""
|
|
75
76
|
inspector = inspect(engine)
|
|
76
77
|
edge_job_columns = None
|
|
@@ -125,66 +126,13 @@ class EdgeExecutor(BaseExecutor):
|
|
|
125
126
|
self.edge_queued_tasks = deepcopy(self.queued_tasks)
|
|
126
127
|
super()._process_tasks(task_tuples) # type: ignore[misc]
|
|
127
128
|
|
|
128
|
-
@provide_session
|
|
129
|
-
def execute_async(
|
|
130
|
-
self,
|
|
131
|
-
key: TaskInstanceKey,
|
|
132
|
-
command: CommandType,
|
|
133
|
-
queue: str | None = None,
|
|
134
|
-
executor_config: Any | None = None,
|
|
135
|
-
session: Session = NEW_SESSION,
|
|
136
|
-
) -> None:
|
|
137
|
-
"""Execute asynchronously. Airflow 2.10 entry point to execute a task."""
|
|
138
|
-
# Use of a temporary trick to get task instance, will be changed with Airflow 3.0.0
|
|
139
|
-
# code works together with _process_tasks overwrite to get task instance.
|
|
140
|
-
# TaskInstance in fourth element
|
|
141
|
-
task_instance = self.edge_queued_tasks[key][3] # type: ignore[index]
|
|
142
|
-
del self.edge_queued_tasks[key]
|
|
143
|
-
|
|
144
|
-
self.validate_airflow_tasks_run_command(command) # type: ignore[attr-defined]
|
|
145
|
-
|
|
146
|
-
# Check if job already exists with same dag_id, task_id, run_id, map_index, try_number
|
|
147
|
-
existing_job = (
|
|
148
|
-
session.query(EdgeJobModel)
|
|
149
|
-
.filter_by(
|
|
150
|
-
dag_id=key.dag_id,
|
|
151
|
-
task_id=key.task_id,
|
|
152
|
-
run_id=key.run_id,
|
|
153
|
-
map_index=key.map_index,
|
|
154
|
-
try_number=key.try_number,
|
|
155
|
-
)
|
|
156
|
-
.first()
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
if existing_job:
|
|
160
|
-
existing_job.state = TaskInstanceState.QUEUED
|
|
161
|
-
existing_job.queue = queue or DEFAULT_QUEUE
|
|
162
|
-
existing_job.concurrency_slots = task_instance.pool_slots
|
|
163
|
-
existing_job.command = str(command)
|
|
164
|
-
else:
|
|
165
|
-
session.add(
|
|
166
|
-
EdgeJobModel(
|
|
167
|
-
dag_id=key.dag_id,
|
|
168
|
-
task_id=key.task_id,
|
|
169
|
-
run_id=key.run_id,
|
|
170
|
-
map_index=key.map_index,
|
|
171
|
-
try_number=key.try_number,
|
|
172
|
-
state=TaskInstanceState.QUEUED,
|
|
173
|
-
queue=queue or DEFAULT_QUEUE,
|
|
174
|
-
concurrency_slots=task_instance.pool_slots,
|
|
175
|
-
command=str(command),
|
|
176
|
-
)
|
|
177
|
-
)
|
|
178
|
-
|
|
179
129
|
@provide_session
|
|
180
130
|
def queue_workload(
|
|
181
131
|
self,
|
|
182
|
-
workload:
|
|
132
|
+
workload: workloads.All,
|
|
183
133
|
session: Session = NEW_SESSION,
|
|
184
134
|
) -> None:
|
|
185
135
|
"""Put new workload to queue. Airflow 3 entry point to execute a task."""
|
|
186
|
-
from airflow.executors import workloads
|
|
187
|
-
|
|
188
136
|
if not isinstance(workload, workloads.ExecuteTask):
|
|
189
137
|
raise TypeError(f"Don't know how to queue workload of type {type(workload).__name__}")
|
|
190
138
|
|
|
@@ -263,11 +211,7 @@ class EdgeExecutor(BaseExecutor):
|
|
|
263
211
|
|
|
264
212
|
def _update_orphaned_jobs(self, session: Session) -> bool:
|
|
265
213
|
"""Update status ob jobs when workers die and don't update anymore."""
|
|
266
|
-
|
|
267
|
-
heartbeat_interval_config_name = "task_instance_heartbeat_timeout"
|
|
268
|
-
else:
|
|
269
|
-
heartbeat_interval_config_name = "scheduler_zombie_task_threshold"
|
|
270
|
-
heartbeat_interval: int = conf.getint("scheduler", heartbeat_interval_config_name)
|
|
214
|
+
heartbeat_interval: int = conf.getint("scheduler", "task_instance_heartbeat_timeout")
|
|
271
215
|
lifeless_jobs: list[EdgeJobModel] = (
|
|
272
216
|
session.query(EdgeJobModel)
|
|
273
217
|
.with_for_update(skip_locked=True)
|
|
@@ -26,11 +26,9 @@ from typing import TYPE_CHECKING
|
|
|
26
26
|
from sqlalchemy import Integer, String, delete, select
|
|
27
27
|
from sqlalchemy.orm import Mapped
|
|
28
28
|
|
|
29
|
-
from airflow.exceptions import AirflowException
|
|
30
29
|
from airflow.models.base import Base
|
|
31
|
-
from airflow.providers.common.compat.sdk import timezone
|
|
30
|
+
from airflow.providers.common.compat.sdk import AirflowException, Stats, timezone
|
|
32
31
|
from airflow.providers.common.compat.sqlalchemy.orm import mapped_column
|
|
33
|
-
from airflow.stats import Stats
|
|
34
32
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
35
33
|
from airflow.utils.providers_configuration_loader import providers_configuration_loaded
|
|
36
34
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
@@ -50,6 +48,12 @@ class EdgeWorkerVersionException(AirflowException):
|
|
|
50
48
|
pass
|
|
51
49
|
|
|
52
50
|
|
|
51
|
+
class EdgeWorkerDuplicateException(AirflowException):
|
|
52
|
+
"""Signal that a worker with the same name is already active."""
|
|
53
|
+
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
53
57
|
class EdgeWorkerState(str, Enum):
|
|
54
58
|
"""Status of a Edge Worker instance."""
|
|
55
59
|
|
|
@@ -23,188 +23,35 @@ from typing import TYPE_CHECKING, Any
|
|
|
23
23
|
from airflow.configuration import conf
|
|
24
24
|
from airflow.exceptions import AirflowConfigException
|
|
25
25
|
from airflow.plugins_manager import AirflowPlugin
|
|
26
|
-
from airflow.providers.edge3.version_compat import
|
|
26
|
+
from airflow.providers.edge3.version_compat import AIRFLOW_V_3_1_PLUS
|
|
27
27
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from sqlalchemy.orm import Session
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
from airflow.utils.db import DBLocks, create_global_lock
|
|
32
|
+
from airflow.utils.db import DBLocks, create_global_lock
|
|
34
33
|
|
|
35
|
-
@provide_session
|
|
36
|
-
def _get_api_endpoint(session: Session = NEW_SESSION) -> dict[str, Any]:
|
|
37
|
-
# Ensure all required DB modeals are created before starting the API
|
|
38
|
-
with create_global_lock(session=session, lock=DBLocks.MIGRATIONS):
|
|
39
|
-
engine = session.get_bind().engine
|
|
40
|
-
from airflow.providers.edge3.models.edge_job import EdgeJobModel
|
|
41
|
-
from airflow.providers.edge3.models.edge_logs import EdgeLogsModel
|
|
42
|
-
from airflow.providers.edge3.models.edge_worker import EdgeWorkerModel
|
|
43
34
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
35
|
+
@provide_session
|
|
36
|
+
def _get_api_endpoint(session: Session = NEW_SESSION) -> dict[str, Any]:
|
|
37
|
+
# Ensure all required DB modeals are created before starting the API
|
|
38
|
+
with create_global_lock(session=session, lock=DBLocks.MIGRATIONS):
|
|
39
|
+
engine = session.get_bind().engine
|
|
40
|
+
from airflow.providers.edge3.models.edge_job import EdgeJobModel
|
|
41
|
+
from airflow.providers.edge3.models.edge_logs import EdgeLogsModel
|
|
42
|
+
from airflow.providers.edge3.models.edge_worker import EdgeWorkerModel
|
|
47
43
|
|
|
48
|
-
|
|
44
|
+
EdgeJobModel.metadata.create_all(engine)
|
|
45
|
+
EdgeLogsModel.metadata.create_all(engine)
|
|
46
|
+
EdgeWorkerModel.metadata.create_all(engine)
|
|
49
47
|
|
|
50
|
-
|
|
51
|
-
"app": create_edge_worker_api_app(),
|
|
52
|
-
"url_prefix": "/edge_worker",
|
|
53
|
-
"name": "Airflow Edge Worker",
|
|
54
|
-
}
|
|
48
|
+
from airflow.providers.edge3.worker_api.app import create_edge_worker_api_app
|
|
55
49
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
from pathlib import Path
|
|
62
|
-
|
|
63
|
-
from flask import Blueprint, redirect, request, url_for
|
|
64
|
-
from flask_appbuilder import BaseView, expose
|
|
65
|
-
from markupsafe import Markup
|
|
66
|
-
from sqlalchemy import select
|
|
67
|
-
|
|
68
|
-
from airflow.auth.managers.models.resource_details import AccessView
|
|
69
|
-
from airflow.utils.state import State, TaskInstanceState
|
|
70
|
-
from airflow.utils.yaml import safe_load
|
|
71
|
-
from airflow.www.auth import has_access_view
|
|
72
|
-
|
|
73
|
-
def _get_airflow_2_api_endpoint() -> Blueprint:
|
|
74
|
-
from airflow.www.app import csrf
|
|
75
|
-
from airflow.www.constants import SWAGGER_BUNDLE, SWAGGER_ENABLED
|
|
76
|
-
from airflow.www.extensions.init_views import _CustomErrorRequestBodyValidator, _LazyResolver
|
|
77
|
-
|
|
78
|
-
folder = Path(__file__).parents[1].resolve() # this is airflow/providers/edge3/
|
|
79
|
-
with folder.joinpath("openapi", "edge_worker_api_v1.yaml").open() as f:
|
|
80
|
-
specification = safe_load(f)
|
|
81
|
-
from connexion import FlaskApi
|
|
82
|
-
|
|
83
|
-
bp = FlaskApi(
|
|
84
|
-
specification=specification,
|
|
85
|
-
resolver=_LazyResolver(),
|
|
86
|
-
base_path="/edge_worker/v1",
|
|
87
|
-
strict_validation=True,
|
|
88
|
-
options={"swagger_ui": SWAGGER_ENABLED, "swagger_path": SWAGGER_BUNDLE.__fspath__()},
|
|
89
|
-
validate_responses=True,
|
|
90
|
-
validator_map={"body": _CustomErrorRequestBodyValidator},
|
|
91
|
-
).blueprint
|
|
92
|
-
# Need to exempt CSRF to make API usable
|
|
93
|
-
csrf.exempt(bp)
|
|
94
|
-
return bp
|
|
95
|
-
|
|
96
|
-
def _state_token(state):
|
|
97
|
-
"""Return a formatted string with HTML for a given State."""
|
|
98
|
-
color = State.color(state)
|
|
99
|
-
fg_color = State.color_fg(state)
|
|
100
|
-
return Markup(
|
|
101
|
-
"""
|
|
102
|
-
<span class="label" style="color:{fg_color}; background-color:{color};"
|
|
103
|
-
title="Current State: {state}">{state}</span>
|
|
104
|
-
"""
|
|
105
|
-
).format(color=color, state=state, fg_color=fg_color)
|
|
106
|
-
|
|
107
|
-
def modify_maintenance_comment_on_update(maintenance_comment: str | None, username: str) -> str:
|
|
108
|
-
if maintenance_comment:
|
|
109
|
-
if re.search(
|
|
110
|
-
r"^\[[-\d:\s]+\] - .+ put node into maintenance mode\r?\nComment:.*", maintenance_comment
|
|
111
|
-
):
|
|
112
|
-
return re.sub(
|
|
113
|
-
r"^\[[-\d:\s]+\] - .+ put node into maintenance mode\r?\nComment:",
|
|
114
|
-
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] - {username} updated maintenance mode\nComment:",
|
|
115
|
-
maintenance_comment,
|
|
116
|
-
)
|
|
117
|
-
if re.search(r"^\[[-\d:\s]+\] - .+ updated maintenance mode\r?\nComment:.*", maintenance_comment):
|
|
118
|
-
return re.sub(
|
|
119
|
-
r"^\[[-\d:\s]+\] - .+ updated maintenance mode\r?\nComment:",
|
|
120
|
-
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] - {username} updated maintenance mode\nComment:",
|
|
121
|
-
maintenance_comment,
|
|
122
|
-
)
|
|
123
|
-
return f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] - {username} updated maintenance mode\nComment: {maintenance_comment}"
|
|
124
|
-
return (
|
|
125
|
-
f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] - {username} updated maintenance mode\nComment:"
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
# registers airflow/providers/edge3/plugins/templates as a Jinja template folder
|
|
129
|
-
template_bp = Blueprint(
|
|
130
|
-
"template_blueprint",
|
|
131
|
-
__name__,
|
|
132
|
-
template_folder="templates",
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
class EdgeWorkerJobs(BaseView):
|
|
136
|
-
"""Simple view to show Edge Worker jobs."""
|
|
137
|
-
|
|
138
|
-
default_view = "jobs"
|
|
139
|
-
|
|
140
|
-
@expose("/jobs")
|
|
141
|
-
@has_access_view(AccessView.JOBS)
|
|
142
|
-
@provide_session
|
|
143
|
-
def jobs(self, session: Session = NEW_SESSION):
|
|
144
|
-
from airflow.providers.edge3.models.edge_job import EdgeJobModel
|
|
145
|
-
|
|
146
|
-
jobs = session.scalars(select(EdgeJobModel).order_by(EdgeJobModel.queued_dttm)).all()
|
|
147
|
-
html_states = {
|
|
148
|
-
str(state): _state_token(str(state)) for state in TaskInstanceState.__members__.values()
|
|
149
|
-
}
|
|
150
|
-
return self.render_template("edge_worker_jobs.html", jobs=jobs, html_states=html_states)
|
|
151
|
-
|
|
152
|
-
class EdgeWorkerHosts(BaseView):
|
|
153
|
-
"""Simple view to show Edge Worker status."""
|
|
154
|
-
|
|
155
|
-
default_view = "status"
|
|
156
|
-
|
|
157
|
-
@expose("/status")
|
|
158
|
-
@has_access_view(AccessView.JOBS)
|
|
159
|
-
@provide_session
|
|
160
|
-
def status(self, session: Session = NEW_SESSION):
|
|
161
|
-
from airflow.providers.edge3.models.edge_worker import EdgeWorkerModel
|
|
162
|
-
|
|
163
|
-
hosts = session.scalars(select(EdgeWorkerModel).order_by(EdgeWorkerModel.worker_name)).all()
|
|
164
|
-
five_min_ago = datetime.now() - timedelta(minutes=5)
|
|
165
|
-
return self.render_template("edge_worker_hosts.html", hosts=hosts, five_min_ago=five_min_ago)
|
|
166
|
-
|
|
167
|
-
@expose("/status/maintenance/<string:worker_name>/on", methods=["POST"])
|
|
168
|
-
@has_access_view(AccessView.JOBS)
|
|
169
|
-
def worker_to_maintenance(self, worker_name: str):
|
|
170
|
-
from flask_login import current_user
|
|
171
|
-
|
|
172
|
-
from airflow.providers.edge3.models.edge_worker import request_maintenance
|
|
173
|
-
|
|
174
|
-
maintenance_comment = request.form.get("maintenance_comment")
|
|
175
|
-
maintenance_comment = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] - {current_user.username} put node into maintenance mode\nComment: {maintenance_comment}"
|
|
176
|
-
request_maintenance(worker_name, maintenance_comment)
|
|
177
|
-
return redirect(url_for("EdgeWorkerHosts.status"))
|
|
178
|
-
|
|
179
|
-
@expose("/status/maintenance/<string:worker_name>/off", methods=["POST"])
|
|
180
|
-
@has_access_view(AccessView.JOBS)
|
|
181
|
-
def remove_worker_from_maintenance(self, worker_name: str):
|
|
182
|
-
from airflow.providers.edge3.models.edge_worker import exit_maintenance
|
|
183
|
-
|
|
184
|
-
exit_maintenance(worker_name)
|
|
185
|
-
return redirect(url_for("EdgeWorkerHosts.status"))
|
|
186
|
-
|
|
187
|
-
@expose("/status/maintenance/<string:worker_name>/remove", methods=["POST"])
|
|
188
|
-
@has_access_view(AccessView.JOBS)
|
|
189
|
-
def remove_worker(self, worker_name: str):
|
|
190
|
-
from airflow.providers.edge3.models.edge_worker import remove_worker
|
|
191
|
-
|
|
192
|
-
remove_worker(worker_name)
|
|
193
|
-
return redirect(url_for("EdgeWorkerHosts.status"))
|
|
194
|
-
|
|
195
|
-
@expose("/status/maintenance/<string:worker_name>/change_comment", methods=["POST"])
|
|
196
|
-
@has_access_view(AccessView.JOBS)
|
|
197
|
-
def change_maintenance_comment(self, worker_name: str):
|
|
198
|
-
from flask_login import current_user
|
|
199
|
-
|
|
200
|
-
from airflow.providers.edge3.models.edge_worker import change_maintenance_comment
|
|
201
|
-
|
|
202
|
-
maintenance_comment = request.form.get("maintenance_comment")
|
|
203
|
-
maintenance_comment = modify_maintenance_comment_on_update(
|
|
204
|
-
maintenance_comment, current_user.username
|
|
205
|
-
)
|
|
206
|
-
change_maintenance_comment(worker_name, maintenance_comment)
|
|
207
|
-
return redirect(url_for("EdgeWorkerHosts.status"))
|
|
50
|
+
return {
|
|
51
|
+
"app": create_edge_worker_api_app(),
|
|
52
|
+
"url_prefix": "/edge_worker",
|
|
53
|
+
"name": "Airflow Edge Worker",
|
|
54
|
+
}
|
|
208
55
|
|
|
209
56
|
|
|
210
57
|
# Check if EdgeExecutor is actually loaded
|
|
@@ -213,17 +60,14 @@ try:
|
|
|
213
60
|
except AirflowConfigException:
|
|
214
61
|
EDGE_EXECUTOR_ACTIVE = False
|
|
215
62
|
|
|
216
|
-
# Load the API endpoint only on api-server
|
|
217
|
-
#
|
|
63
|
+
# Load the API endpoint only on api-server
|
|
64
|
+
# TODO(jscheffl): Remove this check when the discussion in
|
|
218
65
|
# https://lists.apache.org/thread/w170czq6r7bslkqp1tk6bjjjo0789wgl
|
|
219
66
|
# resulted in a proper API to selective initialize. Maybe backcompat-shim
|
|
220
67
|
# is also needed to support Airflow-versions prior the rework.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
)
|
|
225
|
-
else:
|
|
226
|
-
RUNNING_ON_APISERVER = "gunicorn" in sys.argv[0] and "airflow-webserver" in sys.argv
|
|
68
|
+
RUNNING_ON_APISERVER = (len(sys.argv) > 1 and sys.argv[1] in ["api-server"]) or (
|
|
69
|
+
len(sys.argv) > 2 and sys.argv[2] == "airflow-core/src/airflow/api_fastapi/main.py"
|
|
70
|
+
)
|
|
227
71
|
|
|
228
72
|
|
|
229
73
|
def _get_base_url_path(path: str) -> str:
|
|
@@ -247,8 +91,9 @@ class EdgeExecutorPlugin(AirflowPlugin):
|
|
|
247
91
|
|
|
248
92
|
name = "edge_executor"
|
|
249
93
|
if EDGE_EXECUTOR_ACTIVE and RUNNING_ON_APISERVER:
|
|
94
|
+
fastapi_apps = [_get_api_endpoint()]
|
|
250
95
|
if AIRFLOW_V_3_1_PLUS:
|
|
251
|
-
|
|
96
|
+
# Airflow 3.0 does not know about react_apps, so we only provide the API endpoint
|
|
252
97
|
react_apps = [
|
|
253
98
|
{
|
|
254
99
|
"name": "Edge Executor",
|
|
@@ -271,27 +116,3 @@ class EdgeExecutorPlugin(AirflowPlugin):
|
|
|
271
116
|
"url_route": "edge_worker_api_docs",
|
|
272
117
|
}
|
|
273
118
|
]
|
|
274
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
275
|
-
# Airflow 3.0 does not know about react_apps, so we only provide the API endpoint
|
|
276
|
-
fastapi_apps = [_get_api_endpoint()]
|
|
277
|
-
else:
|
|
278
|
-
appbuilder_menu_items = [
|
|
279
|
-
{
|
|
280
|
-
"name": "Edge Worker API docs",
|
|
281
|
-
"href": _get_base_url_path("/edge_worker/v1/ui"),
|
|
282
|
-
"category": "Docs",
|
|
283
|
-
}
|
|
284
|
-
]
|
|
285
|
-
appbuilder_views = [
|
|
286
|
-
{
|
|
287
|
-
"name": "Edge Worker Jobs",
|
|
288
|
-
"category": "Admin",
|
|
289
|
-
"view": EdgeWorkerJobs(),
|
|
290
|
-
},
|
|
291
|
-
{
|
|
292
|
-
"name": "Edge Worker Hosts",
|
|
293
|
-
"category": "Admin",
|
|
294
|
-
"view": EdgeWorkerHosts(),
|
|
295
|
-
},
|
|
296
|
-
]
|
|
297
|
-
flask_blueprints = [_get_airflow_2_api_endpoint(), template_bp]
|