apache-airflow-providers-edge3 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airflow/providers/edge3/LICENSE +201 -0
  2. airflow/providers/edge3/__init__.py +39 -0
  3. airflow/providers/edge3/cli/__init__.py +16 -0
  4. airflow/providers/edge3/cli/api_client.py +206 -0
  5. airflow/providers/edge3/cli/dataclasses.py +95 -0
  6. airflow/providers/edge3/cli/edge_command.py +689 -0
  7. airflow/providers/edge3/example_dags/__init__.py +16 -0
  8. airflow/providers/edge3/example_dags/integration_test.py +164 -0
  9. airflow/providers/edge3/example_dags/win_notepad.py +83 -0
  10. airflow/providers/edge3/example_dags/win_test.py +342 -0
  11. airflow/providers/edge3/executors/__init__.py +22 -0
  12. airflow/providers/edge3/executors/edge_executor.py +367 -0
  13. airflow/providers/edge3/get_provider_info.py +99 -0
  14. airflow/providers/edge3/models/__init__.py +16 -0
  15. airflow/providers/edge3/models/edge_job.py +94 -0
  16. airflow/providers/edge3/models/edge_logs.py +73 -0
  17. airflow/providers/edge3/models/edge_worker.py +230 -0
  18. airflow/providers/edge3/openapi/__init__.py +19 -0
  19. airflow/providers/edge3/openapi/edge_worker_api_v1.yaml +808 -0
  20. airflow/providers/edge3/plugins/__init__.py +16 -0
  21. airflow/providers/edge3/plugins/edge_executor_plugin.py +229 -0
  22. airflow/providers/edge3/plugins/templates/edge_worker_hosts.html +175 -0
  23. airflow/providers/edge3/plugins/templates/edge_worker_jobs.html +69 -0
  24. airflow/providers/edge3/version_compat.py +36 -0
  25. airflow/providers/edge3/worker_api/__init__.py +17 -0
  26. airflow/providers/edge3/worker_api/app.py +43 -0
  27. airflow/providers/edge3/worker_api/auth.py +135 -0
  28. airflow/providers/edge3/worker_api/datamodels.py +190 -0
  29. airflow/providers/edge3/worker_api/routes/__init__.py +16 -0
  30. airflow/providers/edge3/worker_api/routes/_v2_compat.py +135 -0
  31. airflow/providers/edge3/worker_api/routes/_v2_routes.py +237 -0
  32. airflow/providers/edge3/worker_api/routes/health.py +28 -0
  33. airflow/providers/edge3/worker_api/routes/jobs.py +162 -0
  34. airflow/providers/edge3/worker_api/routes/logs.py +133 -0
  35. airflow/providers/edge3/worker_api/routes/worker.py +224 -0
  36. apache_airflow_providers_edge3-1.0.0rc1.dist-info/METADATA +117 -0
  37. apache_airflow_providers_edge3-1.0.0rc1.dist-info/RECORD +39 -0
  38. apache_airflow_providers_edge3-1.0.0rc1.dist-info/WHEEL +4 -0
  39. apache_airflow_providers_edge3-1.0.0rc1.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,135 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from functools import cache
21
+ from uuid import uuid4
22
+
23
+ from itsdangerous import BadSignature
24
+ from jwt import (
25
+ ExpiredSignatureError,
26
+ ImmatureSignatureError,
27
+ InvalidAudienceError,
28
+ InvalidIssuedAtError,
29
+ InvalidSignatureError,
30
+ )
31
+
32
+ from airflow.configuration import conf
33
+ from airflow.providers.edge3.version_compat import AIRFLOW_V_3_0_PLUS
34
+ from airflow.providers.edge3.worker_api.datamodels import JsonRpcRequestBase # noqa: TCH001
35
+ from airflow.providers.edge3.worker_api.routes._v2_compat import (
36
+ Header,
37
+ HTTPException,
38
+ Request,
39
+ status,
40
+ )
41
+
42
+ log = logging.getLogger(__name__)
43
+
44
+
45
+ if AIRFLOW_V_3_0_PLUS:
46
+ from airflow.api_fastapi.auth.tokens import JWTValidator
47
+
48
+ @cache
49
+ def jwt_validator() -> JWTValidator:
50
+ clock_grace = conf.getint("core", "internal_api_clock_grace", fallback=30)
51
+ return JWTValidator(
52
+ secret_key=conf.get("core", "internal_api_secret_key"),
53
+ leeway=clock_grace,
54
+ audience="api",
55
+ )
56
+
57
+ def jwt_validate(authorization: str) -> dict:
58
+ return jwt_validator().validated_claims(authorization)
59
+
60
+ else:
61
+ # Airflow 2.10 compatibility
62
+ from airflow.utils.jwt_signer import JWTSigner # type: ignore
63
+
64
+ @cache
65
+ def jwt_signer() -> JWTSigner:
66
+ clock_grace = conf.getint("core", "internal_api_clock_grace", fallback=30)
67
+ return JWTSigner(
68
+ secret_key=conf.get("core", "internal_api_secret_key"),
69
+ expiration_time_in_seconds=clock_grace,
70
+ leeway_in_seconds=clock_grace,
71
+ audience="api",
72
+ )
73
+
74
+ def jwt_validate(authorization: str) -> dict:
75
+ return jwt_signer().verify_token(authorization)
76
+
77
+
78
+ def _forbidden_response(message: str):
79
+ """Log the error and return the response anonymized."""
80
+ error_id = uuid4()
81
+ log.exception("%s error_id=%s", message, error_id)
82
+ raise HTTPException(
83
+ status.HTTP_403_FORBIDDEN,
84
+ f"Forbidden. The server side traceback may be identified with error_id={error_id}",
85
+ )
86
+
87
+
88
+ def jwt_token_authorization(method: str, authorization: str):
89
+ """Check if the JWT token is correct."""
90
+ try:
91
+ payload = jwt_validate(authorization)
92
+ signed_method = payload.get("method")
93
+ if not signed_method or signed_method != method:
94
+ _forbidden_response(
95
+ "Invalid method in token authorization. "
96
+ f"signed method='{signed_method}' "
97
+ f"called method='{method}'",
98
+ )
99
+ except BadSignature:
100
+ _forbidden_response("Bad Signature. Please use only the tokens provided by the API.")
101
+ except InvalidAudienceError:
102
+ _forbidden_response("Invalid audience for the request")
103
+ except InvalidSignatureError:
104
+ _forbidden_response("The signature of the request was wrong")
105
+ except ImmatureSignatureError:
106
+ _forbidden_response("The signature of the request was sent from the future")
107
+ except ExpiredSignatureError:
108
+ _forbidden_response(
109
+ "The signature of the request has expired. Make sure that all components "
110
+ "in your system have synchronized clocks.",
111
+ )
112
+ except InvalidIssuedAtError:
113
+ _forbidden_response(
114
+ "The request was issues in the future. Make sure that all components "
115
+ "in your system have synchronized clocks.",
116
+ )
117
+ except Exception:
118
+ _forbidden_response("Unable to authenticate API via token.")
119
+
120
+
121
+ def jwt_token_authorization_rpc(
122
+ body: JsonRpcRequestBase, authorization: str = Header(description="JWT Authorization Token")
123
+ ):
124
+ """Check if the JWT token is correct for JSON PRC requests."""
125
+ jwt_token_authorization(body.method, authorization)
126
+
127
+
128
+ def jwt_token_authorization_rest(
129
+ request: Request, authorization: str = Header(description="JWT Authorization Token")
130
+ ):
131
+ """Check if the JWT token is correct for REST API requests."""
132
+ PREFIX = "/edge_worker/v1/"
133
+ path = request.url.path
134
+ method_path = path[path.find(PREFIX) + len(PREFIX) :] if PREFIX in path else path
135
+ jwt_token_authorization(method_path, authorization)
@@ -0,0 +1,190 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ from datetime import datetime
20
+ from typing import (
21
+ Annotated,
22
+ Any,
23
+ )
24
+
25
+ from pydantic import BaseModel, Field
26
+
27
+ from airflow.models.taskinstancekey import TaskInstanceKey
28
+ from airflow.providers.edge3.models.edge_worker import EdgeWorkerState # noqa: TCH001
29
+ from airflow.providers.edge3.worker_api.routes._v2_compat import ExecuteTask, Path
30
+
31
+
32
+ class WorkerApiDocs:
33
+ """Documentation collection for the worker API."""
34
+
35
+ dag_id = Path(title="Dag ID", description="Identifier of the DAG to which the task belongs.")
36
+ task_id = Path(title="Task ID", description="Task name in the DAG.")
37
+ run_id = Path(title="Run ID", description="Run ID of the DAG execution.")
38
+ try_number = Path(title="Try Number", description="The number of attempt to execute this task.")
39
+ map_index = Path(
40
+ title="Map Index",
41
+ description="For dynamically mapped tasks the mapping number, -1 if the task is not mapped.",
42
+ )
43
+ state = Path(title="Task State", description="State of the assigned task under execution.")
44
+
45
+
46
+ class JsonRpcRequestBase(BaseModel):
47
+ """Base JSON RPC request model to define just the method."""
48
+
49
+ method: Annotated[
50
+ str,
51
+ Field(description="Fully qualified python module method name that is called via JSON RPC."),
52
+ ]
53
+
54
+
55
+ class JsonRpcRequest(JsonRpcRequestBase):
56
+ """JSON RPC request model."""
57
+
58
+ jsonrpc: Annotated[str, Field(description="JSON RPC Version", examples=["2.0"])]
59
+ params: Annotated[
60
+ dict[str, Any] | None,
61
+ Field(description="Dictionary of parameters passed to the method."),
62
+ ]
63
+
64
+
65
+ class EdgeJobBase(BaseModel):
66
+ """Basic attributes of a job on the edge worker."""
67
+
68
+ dag_id: Annotated[
69
+ str, Field(title="Dag ID", description="Identifier of the DAG to which the task belongs.")
70
+ ]
71
+ task_id: Annotated[str, Field(title="Task ID", description="Task name in the DAG.")]
72
+ run_id: Annotated[str, Field(title="Run ID", description="Run ID of the DAG execution.")]
73
+ map_index: Annotated[
74
+ int,
75
+ Field(
76
+ title="Map Index",
77
+ description="For dynamically mapped tasks the mapping number, -1 if the task is not mapped.",
78
+ ),
79
+ ]
80
+ try_number: Annotated[
81
+ int, Field(title="Try Number", description="The number of attempt to execute this task.")
82
+ ]
83
+
84
+ @property
85
+ def key(self) -> TaskInstanceKey:
86
+ return TaskInstanceKey(self.dag_id, self.task_id, self.run_id, self.try_number, self.map_index)
87
+
88
+
89
+ class EdgeJobFetched(EdgeJobBase):
90
+ """Job that is to be executed on the edge worker."""
91
+
92
+ command: Annotated[
93
+ ExecuteTask,
94
+ Field(
95
+ title="Command",
96
+ description="Command line to use to execute the job in Airflow 2. Task definition in Airflow 3",
97
+ ),
98
+ ]
99
+ concurrency_slots: Annotated[int, Field(description="Number of concurrency slots the job requires.")]
100
+
101
+
102
+ class WorkerQueuesBase(BaseModel):
103
+ """Queues that a worker supports to run jobs on."""
104
+
105
+ queues: Annotated[
106
+ list[str] | None,
107
+ Field(
108
+ None,
109
+ description="List of queues the worker is pulling jobs from. If not provided, worker pulls from all queues.",
110
+ ),
111
+ ]
112
+
113
+
114
+ class WorkerQueuesBody(WorkerQueuesBase):
115
+ """Queues that a worker supports to run jobs on."""
116
+
117
+ free_concurrency: Annotated[int, Field(description="Number of free concurrency slots on the worker.")]
118
+
119
+
120
+ class WorkerStateBody(WorkerQueuesBase):
121
+ """Details of the worker state sent to the scheduler."""
122
+
123
+ state: Annotated[EdgeWorkerState, Field(description="State of the worker from the view of the worker.")]
124
+ jobs_active: Annotated[int, Field(description="Number of active jobs the worker is running.")] = 0
125
+ queues: Annotated[
126
+ list[str] | None,
127
+ Field(
128
+ description="List of queues the worker is pulling jobs from. If not provided, worker pulls from all queues."
129
+ ),
130
+ ] = None
131
+ sysinfo: Annotated[
132
+ dict[str, str | int],
133
+ Field(
134
+ description="System information of the worker.",
135
+ examples=[
136
+ {
137
+ "concurrency": 4,
138
+ "free_concurrency": 3,
139
+ "airflow_version": "2.0.0",
140
+ "edge_provider_version": "1.0.0",
141
+ }
142
+ ],
143
+ ),
144
+ ]
145
+ maintenance_comments: Annotated[
146
+ str | None,
147
+ Field(description="Comments about the maintenance state of the worker."),
148
+ ] = None
149
+
150
+
151
+ class WorkerQueueUpdateBody(BaseModel):
152
+ """Changed queues for the worker."""
153
+
154
+ new_queues: Annotated[
155
+ list[str] | None,
156
+ Field(description="Additional queues to be added to worker."),
157
+ ]
158
+ remove_queues: Annotated[
159
+ list[str] | None,
160
+ Field(description="Queues to remove from worker."),
161
+ ]
162
+
163
+
164
+ class PushLogsBody(BaseModel):
165
+ """Incremental new log content from worker."""
166
+
167
+ log_chunk_time: Annotated[datetime, Field(description="Time of the log chunk at point of sending.")]
168
+ log_chunk_data: Annotated[str, Field(description="Log chunk data as incremental log text.")]
169
+
170
+
171
+ class WorkerRegistrationReturn(BaseModel):
172
+ """The return class for the worker registration."""
173
+
174
+ last_update: Annotated[datetime, Field(description="Time of the last update of the worker.")]
175
+
176
+
177
+ class WorkerSetStateReturn(BaseModel):
178
+ """The return class for the worker set state."""
179
+
180
+ state: Annotated[EdgeWorkerState, Field(description="State of the worker from the view of the server.")]
181
+ queues: Annotated[
182
+ list[str] | None,
183
+ Field(
184
+ description="List of queues the worker is pulling jobs from. If not provided, worker pulls from all queues."
185
+ ),
186
+ ]
187
+ maintenance_comments: Annotated[
188
+ str | None,
189
+ Field(description="Comments about the maintenance state of the worker."),
190
+ ] = None
@@ -0,0 +1,16 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
@@ -0,0 +1,135 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ """Compatibility layer for API to provide both FastAPI as well as Connexion based endpoints."""
18
+
19
+ from __future__ import annotations
20
+
21
+ from airflow.providers.edge3.version_compat import AIRFLOW_V_3_0_PLUS
22
+
23
+ if AIRFLOW_V_3_0_PLUS:
24
+ # Just re-import the types from FastAPI and Airflow Core
25
+ from fastapi import Body, Depends, Header, HTTPException, Path, Request, status
26
+
27
+ from airflow.api_fastapi.common.db.common import SessionDep
28
+ from airflow.api_fastapi.common.router import AirflowRouter
29
+ from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc
30
+
31
+ # In Airflow 3 with AIP-72 we get workload addressed by ExecuteTask
32
+ from airflow.executors.workloads import ExecuteTask
33
+
34
+ def parse_command(command: str) -> ExecuteTask:
35
+ return ExecuteTask.model_validate_json(command)
36
+ else:
37
+ # Mock the external dependnecies
38
+ from typing import Callable
39
+
40
+ from connexion import ProblemException
41
+
42
+ class Body: # type: ignore[no-redef]
43
+ def __init__(self, *_, **__):
44
+ pass
45
+
46
+ class Depends: # type: ignore[no-redef]
47
+ def __init__(self, *_, **__):
48
+ pass
49
+
50
+ class Header: # type: ignore[no-redef]
51
+ def __init__(self, *_, **__):
52
+ pass
53
+
54
+ class Path: # type: ignore[no-redef]
55
+ def __init__(self, *_, **__):
56
+ pass
57
+
58
+ class Request: # type: ignore[no-redef]
59
+ pass
60
+
61
+ class SessionDep: # type: ignore[no-redef]
62
+ pass
63
+
64
+ def create_openapi_http_exception_doc(responses_status_code: list[int]) -> dict:
65
+ return {}
66
+
67
+ class status: # type: ignore[no-redef]
68
+ HTTP_204_NO_CONTENT = 204
69
+ HTTP_400_BAD_REQUEST = 400
70
+ HTTP_403_FORBIDDEN = 403
71
+ HTTP_500_INTERNAL_SERVER_ERROR = 500
72
+
73
+ class HTTPException(ProblemException): # type: ignore[no-redef]
74
+ """Raise when the user does not have the required permissions."""
75
+
76
+ def __init__(
77
+ self,
78
+ status: int,
79
+ detail: str,
80
+ ) -> None:
81
+ from airflow.utils.docs import get_docs_url
82
+
83
+ doc_link = get_docs_url("stable-rest-api-ref.html")
84
+ EXCEPTIONS_LINK_MAP = {
85
+ 400: f"{doc_link}#section/Errors/BadRequest",
86
+ 403: f"{doc_link}#section/Errors/PermissionDenied",
87
+ 500: f"{doc_link}#section/Errors/Unknown",
88
+ }
89
+ TITLE_MAP = {
90
+ 400: "BadRequest",
91
+ 403: "PermissionDenied",
92
+ 500: "InternalServerError",
93
+ }
94
+ super().__init__(
95
+ status=status,
96
+ type=EXCEPTIONS_LINK_MAP[status],
97
+ title=TITLE_MAP[status],
98
+ detail=detail,
99
+ )
100
+
101
+ def to_response(self):
102
+ from flask import Response
103
+
104
+ return Response(response=self.detail, status=self.status)
105
+
106
+ class AirflowRouter: # type: ignore[no-redef]
107
+ def __init__(self, *_, **__):
108
+ pass
109
+
110
+ def get(self, *_, **__):
111
+ def decorator(func: Callable) -> Callable:
112
+ return func
113
+
114
+ return decorator
115
+
116
+ def post(self, *_, **__):
117
+ def decorator(func: Callable) -> Callable:
118
+ return func
119
+
120
+ return decorator
121
+
122
+ def patch(self, *_, **__):
123
+ def decorator(func: Callable) -> Callable:
124
+ return func
125
+
126
+ return decorator
127
+
128
+ # In Airflow 3 with AIP-72 we get workload addressed by ExecuteTask
129
+ # But in Airflow 2.10 it is a command line array
130
+ ExecuteTask = list[str] # type: ignore[no-redef,assignment,misc]
131
+
132
+ def parse_command(command: str) -> ExecuteTask:
133
+ from ast import literal_eval
134
+
135
+ return literal_eval(command)