apache-airflow-providers-edge3 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airflow/providers/edge3/LICENSE +201 -0
  2. airflow/providers/edge3/__init__.py +39 -0
  3. airflow/providers/edge3/cli/__init__.py +16 -0
  4. airflow/providers/edge3/cli/api_client.py +206 -0
  5. airflow/providers/edge3/cli/dataclasses.py +95 -0
  6. airflow/providers/edge3/cli/edge_command.py +689 -0
  7. airflow/providers/edge3/example_dags/__init__.py +16 -0
  8. airflow/providers/edge3/example_dags/integration_test.py +164 -0
  9. airflow/providers/edge3/example_dags/win_notepad.py +83 -0
  10. airflow/providers/edge3/example_dags/win_test.py +342 -0
  11. airflow/providers/edge3/executors/__init__.py +22 -0
  12. airflow/providers/edge3/executors/edge_executor.py +367 -0
  13. airflow/providers/edge3/get_provider_info.py +99 -0
  14. airflow/providers/edge3/models/__init__.py +16 -0
  15. airflow/providers/edge3/models/edge_job.py +94 -0
  16. airflow/providers/edge3/models/edge_logs.py +73 -0
  17. airflow/providers/edge3/models/edge_worker.py +230 -0
  18. airflow/providers/edge3/openapi/__init__.py +19 -0
  19. airflow/providers/edge3/openapi/edge_worker_api_v1.yaml +808 -0
  20. airflow/providers/edge3/plugins/__init__.py +16 -0
  21. airflow/providers/edge3/plugins/edge_executor_plugin.py +229 -0
  22. airflow/providers/edge3/plugins/templates/edge_worker_hosts.html +175 -0
  23. airflow/providers/edge3/plugins/templates/edge_worker_jobs.html +69 -0
  24. airflow/providers/edge3/version_compat.py +36 -0
  25. airflow/providers/edge3/worker_api/__init__.py +17 -0
  26. airflow/providers/edge3/worker_api/app.py +43 -0
  27. airflow/providers/edge3/worker_api/auth.py +135 -0
  28. airflow/providers/edge3/worker_api/datamodels.py +190 -0
  29. airflow/providers/edge3/worker_api/routes/__init__.py +16 -0
  30. airflow/providers/edge3/worker_api/routes/_v2_compat.py +135 -0
  31. airflow/providers/edge3/worker_api/routes/_v2_routes.py +237 -0
  32. airflow/providers/edge3/worker_api/routes/health.py +28 -0
  33. airflow/providers/edge3/worker_api/routes/jobs.py +162 -0
  34. airflow/providers/edge3/worker_api/routes/logs.py +133 -0
  35. airflow/providers/edge3/worker_api/routes/worker.py +224 -0
  36. apache_airflow_providers_edge3-1.0.0.dist-info/METADATA +117 -0
  37. apache_airflow_providers_edge3-1.0.0.dist-info/RECORD +39 -0
  38. apache_airflow_providers_edge3-1.0.0.dist-info/WHEEL +4 -0
  39. apache_airflow_providers_edge3-1.0.0.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,237 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ """Compatibility layer for Connexion API to Airflow v2.10 API routes."""
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ from typing import TYPE_CHECKING, Any
24
+ from uuid import uuid4
25
+
26
+ from flask import Response, request
27
+
28
+ from airflow.exceptions import AirflowException
29
+ from airflow.providers.edge3.worker_api.auth import (
30
+ jwt_token_authorization,
31
+ jwt_token_authorization_rpc,
32
+ )
33
+ from airflow.providers.edge3.worker_api.datamodels import (
34
+ EdgeJobFetched,
35
+ JsonRpcRequest,
36
+ PushLogsBody,
37
+ WorkerQueuesBody,
38
+ WorkerStateBody,
39
+ )
40
+ from airflow.providers.edge3.worker_api.routes._v2_compat import HTTPException, status
41
+ from airflow.providers.edge3.worker_api.routes.jobs import fetch, state as state_api
42
+ from airflow.providers.edge3.worker_api.routes.logs import logfile_path, push_logs
43
+ from airflow.providers.edge3.worker_api.routes.worker import register, set_state
44
+ from airflow.serialization.serialized_objects import BaseSerialization
45
+ from airflow.utils.session import NEW_SESSION, create_session, provide_session
46
+
47
+ if TYPE_CHECKING:
48
+ from airflow.api_connexion.types import APIResponse
49
+ from airflow.utils.state import TaskInstanceState
50
+
51
+
52
+ log = logging.getLogger(__name__)
53
+
54
+
55
+ def error_response(message: str, status: int):
56
+ """Log the error and return the response as JSON object."""
57
+ error_id = uuid4()
58
+ server_message = f"{message} error_id={error_id}"
59
+ log.exception(server_message)
60
+ client_message = f"{message} The server side traceback may be identified with error_id={error_id}"
61
+ return HTTPException(status, client_message)
62
+
63
+
64
+ def rpcapi_v2(body: dict[str, Any]) -> APIResponse:
65
+ """Handle Edge Worker API `/edge_worker/v1/rpcapi` endpoint for Airflow 2.10."""
66
+ # Note: Except the method map this _was_ a 100% copy of internal API module
67
+ # airflow.api_internal.endpoints.rpc_api_endpoint.internal_airflow_api()
68
+ # As of rework for FastAPI in Airflow 3.0, this is updated and to be removed in the future.
69
+ from airflow.api_internal.endpoints.rpc_api_endpoint import ( # type: ignore[attr-defined]
70
+ # Note: This is just for compatibility with Airflow 2.10, not working for Airflow 3 / main as removed
71
+ initialize_method_map,
72
+ )
73
+
74
+ try:
75
+ if request.headers.get("Content-Type", "") != "application/json":
76
+ raise HTTPException(status.HTTP_403_FORBIDDEN, "Expected Content-Type: application/json")
77
+ if request.headers.get("Accept", "") != "application/json":
78
+ raise HTTPException(status.HTTP_403_FORBIDDEN, "Expected Accept: application/json")
79
+ auth = request.headers.get("Authorization", "")
80
+ request_obj = JsonRpcRequest(method=body["method"], jsonrpc=body["jsonrpc"], params=body["params"])
81
+ jwt_token_authorization_rpc(request_obj, auth)
82
+ if request_obj.jsonrpc != "2.0":
83
+ raise error_response("Expected jsonrpc 2.0 request.", status.HTTP_400_BAD_REQUEST)
84
+
85
+ log.debug("Got request for %s", request_obj.method)
86
+ methods_map = initialize_method_map()
87
+ if request_obj.method not in methods_map:
88
+ raise error_response(f"Unrecognized method: {request_obj.method}.", status.HTTP_400_BAD_REQUEST)
89
+
90
+ handler = methods_map[request_obj.method]
91
+ params = {}
92
+ try:
93
+ if request_obj.params:
94
+ # Note, this is Airflow 2.10 specific, as it uses Pydantic models for serialization
95
+ params = BaseSerialization.deserialize(request_obj.params, use_pydantic_models=True) # type: ignore[call-arg]
96
+ except Exception:
97
+ raise error_response("Error deserializing parameters.", status.HTTP_400_BAD_REQUEST)
98
+
99
+ log.debug("Calling method %s\nparams: %s", request_obj.method, params)
100
+ try:
101
+ # Session must be created there as it may be needed by serializer for lazy-loaded fields.
102
+ with create_session() as session:
103
+ output = handler(**params, session=session)
104
+ # Note, this is Airflow 2.10 specific, as it uses Pydantic models for serialization
105
+ output_json = BaseSerialization.serialize(output, use_pydantic_models=True) # type: ignore[call-arg]
106
+ log.debug(
107
+ "Sending response: %s", json.dumps(output_json) if output_json is not None else None
108
+ )
109
+ # In case of AirflowException or other selective known types, transport the exception class back to caller
110
+ except (KeyError, AttributeError, AirflowException) as e:
111
+ # Note, this is Airflow 2.10 specific, as it uses Pydantic models for serialization
112
+ output_json = BaseSerialization.serialize(e, use_pydantic_models=True) # type: ignore[call-arg]
113
+ log.debug(
114
+ "Sending exception response: %s", json.dumps(output_json) if output_json is not None else None
115
+ )
116
+ except Exception:
117
+ raise error_response(
118
+ f"Error executing method '{request_obj.method}'.", status.HTTP_500_INTERNAL_SERVER_ERROR
119
+ )
120
+ response = json.dumps(output_json) if output_json is not None else None
121
+ return Response(response=response, headers={"Content-Type": "application/json"})
122
+ except HTTPException as e:
123
+ return e.to_response() # type: ignore[attr-defined]
124
+
125
+
126
+ def jwt_token_authorization_v2(method: str, authorization: str):
127
+ """Proxy for v2 method path handling."""
128
+ PREFIX = "/edge_worker/v1/"
129
+ method_path = method[method.find(PREFIX) + len(PREFIX) :] if PREFIX in method else method
130
+ jwt_token_authorization(method_path, authorization)
131
+
132
+
133
+ @provide_session
134
+ def register_v2(worker_name: str, body: dict[str, Any], session=NEW_SESSION) -> Any:
135
+ """Handle Edge Worker API `/edge_worker/v1/worker/{worker_name}` endpoint for Airflow 2.10."""
136
+ try:
137
+ auth = request.headers.get("Authorization", "")
138
+ jwt_token_authorization_v2(request.path, auth)
139
+ request_obj = WorkerStateBody(
140
+ state=body["state"], jobs_active=0, queues=body["queues"], sysinfo=body["sysinfo"]
141
+ )
142
+ return register(worker_name, request_obj, session).model_dump()
143
+ except HTTPException as e:
144
+ return e.to_response() # type: ignore[attr-defined]
145
+
146
+
147
+ @provide_session
148
+ def set_state_v2(worker_name: str, body: dict[str, Any], session=NEW_SESSION) -> Any:
149
+ """Handle Edge Worker API `/edge_worker/v1/worker/{worker_name}` endpoint for Airflow 2.10."""
150
+ try:
151
+ auth = request.headers.get("Authorization", "")
152
+ jwt_token_authorization_v2(request.path, auth)
153
+ request_obj = WorkerStateBody(
154
+ state=body["state"],
155
+ jobs_active=body["jobs_active"],
156
+ queues=body["queues"],
157
+ sysinfo=body["sysinfo"],
158
+ maintenance_comments=body.get("maintenance_comments"),
159
+ )
160
+ return set_state(worker_name, request_obj, session).model_dump()
161
+ except HTTPException as e:
162
+ return e.to_response() # type: ignore[attr-defined]
163
+
164
+
165
+ @provide_session
166
+ def job_fetch_v2(worker_name: str, body: dict[str, Any], session=NEW_SESSION) -> Any:
167
+ """Handle Edge Worker API `/edge_worker/v1/jobs/fetch/{worker_name}` endpoint for Airflow 2.10."""
168
+ from flask import request
169
+
170
+ try:
171
+ auth = request.headers.get("Authorization", "")
172
+ jwt_token_authorization_v2(request.path, auth)
173
+ queues = body.get("queues")
174
+ free_concurrency = body.get("free_concurrency", 1)
175
+ request_obj = WorkerQueuesBody(queues=queues, free_concurrency=free_concurrency)
176
+ job: EdgeJobFetched | None = fetch(worker_name, request_obj, session)
177
+ return job.model_dump() if job is not None else None
178
+ except HTTPException as e:
179
+ return e.to_response() # type: ignore[attr-defined]
180
+
181
+
182
+ @provide_session
183
+ def job_state_v2(
184
+ dag_id: str,
185
+ task_id: str,
186
+ run_id: str,
187
+ try_number: int,
188
+ map_index: str, # Note: Connexion can not have negative numbers in path parameters, use string therefore
189
+ state: TaskInstanceState,
190
+ session=NEW_SESSION,
191
+ ) -> Any:
192
+ """Handle Edge Worker API `/jobs/state/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}/{state}` endpoint for Airflow 2.10."""
193
+ from flask import request
194
+
195
+ try:
196
+ auth = request.headers.get("Authorization", "")
197
+ jwt_token_authorization_v2(request.path, auth)
198
+ state_api(dag_id, task_id, run_id, try_number, int(map_index), state, session)
199
+ except HTTPException as e:
200
+ return e.to_response() # type: ignore[attr-defined]
201
+
202
+
203
+ def logfile_path_v2(
204
+ dag_id: str,
205
+ task_id: str,
206
+ run_id: str,
207
+ try_number: int,
208
+ map_index: str, # Note: Connexion can not have negative numbers in path parameters, use string therefore
209
+ ) -> str:
210
+ """Handle Edge Worker API `/edge_worker/v1/logs/logfile_path/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}` endpoint for Airflow 2.10."""
211
+ try:
212
+ auth = request.headers.get("Authorization", "")
213
+ jwt_token_authorization_v2(request.path, auth)
214
+ return logfile_path(dag_id, task_id, run_id, try_number, int(map_index))
215
+ except HTTPException as e:
216
+ return e.to_response() # type: ignore[attr-defined]
217
+
218
+
219
+ def push_logs_v2(
220
+ dag_id: str,
221
+ task_id: str,
222
+ run_id: str,
223
+ try_number: int,
224
+ map_index: str, # Note: Connexion can not have negative numbers in path parameters, use string therefore
225
+ body: dict[str, Any],
226
+ ) -> None:
227
+ """Handle Edge Worker API `/edge_worker/v1/logs/push/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}` endpoint for Airflow 2.10."""
228
+ try:
229
+ auth = request.headers.get("Authorization", "")
230
+ jwt_token_authorization_v2(request.path, auth)
231
+ request_obj = PushLogsBody(
232
+ log_chunk_data=body["log_chunk_data"], log_chunk_time=body["log_chunk_time"]
233
+ )
234
+ with create_session() as session:
235
+ push_logs(dag_id, task_id, run_id, try_number, int(map_index), request_obj, session)
236
+ except HTTPException as e:
237
+ return e.to_response() # type: ignore[attr-defined]
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ from airflow.providers.edge3.worker_api.routes._v2_compat import AirflowRouter
21
+
22
+ health_router = AirflowRouter(tags=["Health"])
23
+
24
+
25
+ @health_router.get("/health")
26
+ def health() -> dict[str, str]:
27
+ """Report API Health."""
28
+ return {"status": "healthy"}
@@ -0,0 +1,162 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Annotated
21
+
22
+ from sqlalchemy import select, update
23
+
24
+ from airflow.providers.edge3.models.edge_job import EdgeJobModel
25
+ from airflow.providers.edge3.worker_api.auth import jwt_token_authorization_rest
26
+ from airflow.providers.edge3.worker_api.datamodels import (
27
+ EdgeJobFetched,
28
+ WorkerApiDocs,
29
+ WorkerQueuesBody,
30
+ )
31
+ from airflow.providers.edge3.worker_api.routes._v2_compat import (
32
+ AirflowRouter,
33
+ Body,
34
+ Depends,
35
+ SessionDep,
36
+ create_openapi_http_exception_doc,
37
+ parse_command,
38
+ status,
39
+ )
40
+ from airflow.stats import Stats
41
+ from airflow.utils import timezone
42
+ from airflow.utils.sqlalchemy import with_row_locks
43
+ from airflow.utils.state import TaskInstanceState
44
+
45
+ jobs_router = AirflowRouter(tags=["Jobs"], prefix="/jobs")
46
+
47
+
48
+ @jobs_router.post(
49
+ "/fetch/{worker_name}",
50
+ dependencies=[Depends(jwt_token_authorization_rest)],
51
+ responses=create_openapi_http_exception_doc(
52
+ [
53
+ status.HTTP_400_BAD_REQUEST,
54
+ status.HTTP_403_FORBIDDEN,
55
+ ]
56
+ ),
57
+ )
58
+ def fetch(
59
+ worker_name: str,
60
+ body: Annotated[
61
+ WorkerQueuesBody,
62
+ Body(
63
+ title="Log data chunks",
64
+ description="The queues and capacity from which the worker can fetch jobs.",
65
+ ),
66
+ ],
67
+ session: SessionDep,
68
+ ) -> EdgeJobFetched | None:
69
+ """Fetch a job to execute on the edge worker."""
70
+ query = (
71
+ select(EdgeJobModel)
72
+ .where(
73
+ EdgeJobModel.state == TaskInstanceState.QUEUED,
74
+ EdgeJobModel.concurrency_slots <= body.free_concurrency,
75
+ )
76
+ .order_by(EdgeJobModel.queued_dttm)
77
+ )
78
+ if body.queues:
79
+ query = query.where(EdgeJobModel.queue.in_(body.queues))
80
+ query = query.limit(1)
81
+ query = with_row_locks(query, of=EdgeJobModel, session=session, skip_locked=True)
82
+ job: EdgeJobModel = session.scalar(query)
83
+ if not job:
84
+ return None
85
+ job.state = TaskInstanceState.RUNNING
86
+ job.edge_worker = worker_name
87
+ job.last_update = timezone.utcnow()
88
+ session.commit()
89
+ # Edge worker does not backport emitted Airflow metrics, so export some metrics
90
+ tags = {"dag_id": job.dag_id, "task_id": job.task_id, "queue": job.queue}
91
+ Stats.incr(f"edge_worker.ti.start.{job.queue}.{job.dag_id}.{job.task_id}", tags=tags)
92
+ Stats.incr("edge_worker.ti.start", tags=tags)
93
+ return EdgeJobFetched(
94
+ dag_id=job.dag_id,
95
+ task_id=job.task_id,
96
+ run_id=job.run_id,
97
+ map_index=job.map_index,
98
+ try_number=job.try_number,
99
+ command=parse_command(job.command),
100
+ concurrency_slots=job.concurrency_slots,
101
+ )
102
+
103
+
104
+ @jobs_router.patch(
105
+ "/state/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}/{state}",
106
+ dependencies=[Depends(jwt_token_authorization_rest)],
107
+ responses=create_openapi_http_exception_doc(
108
+ [
109
+ status.HTTP_400_BAD_REQUEST,
110
+ status.HTTP_403_FORBIDDEN,
111
+ ]
112
+ ),
113
+ )
114
+ def state(
115
+ dag_id: Annotated[str, WorkerApiDocs.dag_id],
116
+ task_id: Annotated[str, WorkerApiDocs.task_id],
117
+ run_id: Annotated[str, WorkerApiDocs.run_id],
118
+ try_number: Annotated[int, WorkerApiDocs.try_number],
119
+ map_index: Annotated[int, WorkerApiDocs.map_index],
120
+ state: Annotated[TaskInstanceState, WorkerApiDocs.state],
121
+ session: SessionDep,
122
+ ) -> None:
123
+ """Update the state of a job running on the edge worker."""
124
+ # execute query to catch the queue and check if state toggles to success or failed
125
+ # otherwise possible that Executor resets orphaned jobs and stats are exported 2 times
126
+ if state in [TaskInstanceState.SUCCESS, state == TaskInstanceState.FAILED]:
127
+ query = select(EdgeJobModel).where(
128
+ EdgeJobModel.dag_id == dag_id,
129
+ EdgeJobModel.task_id == task_id,
130
+ EdgeJobModel.run_id == run_id,
131
+ EdgeJobModel.map_index == map_index,
132
+ EdgeJobModel.try_number == try_number,
133
+ EdgeJobModel.state == TaskInstanceState.RUNNING,
134
+ )
135
+ job = session.scalar(query)
136
+
137
+ if job:
138
+ # Edge worker does not backport emitted Airflow metrics, so export some metrics
139
+ tags = {
140
+ "dag_id": job.dag_id,
141
+ "task_id": job.task_id,
142
+ "queue": job.queue,
143
+ "state": str(state),
144
+ }
145
+ Stats.incr(
146
+ f"edge_worker.ti.finish.{job.queue}.{state}.{job.dag_id}.{job.task_id}",
147
+ tags=tags,
148
+ )
149
+ Stats.incr("edge_worker.ti.finish", tags=tags)
150
+
151
+ query = (
152
+ update(EdgeJobModel)
153
+ .where(
154
+ EdgeJobModel.dag_id == dag_id,
155
+ EdgeJobModel.task_id == task_id,
156
+ EdgeJobModel.run_id == run_id,
157
+ EdgeJobModel.map_index == map_index,
158
+ EdgeJobModel.try_number == try_number,
159
+ )
160
+ .values(state=state, last_update=timezone.utcnow())
161
+ )
162
+ session.execute(query)
@@ -0,0 +1,133 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ from functools import cache
21
+ from pathlib import Path
22
+ from typing import TYPE_CHECKING, Annotated
23
+
24
+ from airflow.configuration import conf
25
+ from airflow.models.taskinstance import TaskInstance
26
+ from airflow.models.taskinstancekey import TaskInstanceKey
27
+ from airflow.providers.edge3.models.edge_logs import EdgeLogsModel
28
+ from airflow.providers.edge3.worker_api.auth import jwt_token_authorization_rest
29
+ from airflow.providers.edge3.worker_api.datamodels import PushLogsBody, WorkerApiDocs
30
+ from airflow.providers.edge3.worker_api.routes._v2_compat import (
31
+ AirflowRouter,
32
+ Body,
33
+ Depends,
34
+ SessionDep,
35
+ create_openapi_http_exception_doc,
36
+ status,
37
+ )
38
+ from airflow.utils.log.file_task_handler import FileTaskHandler
39
+ from airflow.utils.session import NEW_SESSION, provide_session
40
+
41
+ logs_router = AirflowRouter(tags=["Logs"], prefix="/logs")
42
+
43
+
44
+ @cache
45
+ @provide_session
46
+ def _logfile_path(task: TaskInstanceKey, session=NEW_SESSION) -> str:
47
+ """Elaborate the (relative) path and filename to expect from task execution."""
48
+ ti = TaskInstance.get_task_instance(
49
+ dag_id=task.dag_id,
50
+ run_id=task.run_id,
51
+ task_id=task.task_id,
52
+ map_index=task.map_index,
53
+ session=session,
54
+ )
55
+ if TYPE_CHECKING:
56
+ assert ti
57
+ assert isinstance(ti, TaskInstance)
58
+ return FileTaskHandler(".")._render_filename(ti, task.try_number)
59
+
60
+
61
+ @logs_router.get(
62
+ "/logfile_path/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}",
63
+ dependencies=[Depends(jwt_token_authorization_rest)],
64
+ responses=create_openapi_http_exception_doc(
65
+ [
66
+ status.HTTP_400_BAD_REQUEST,
67
+ status.HTTP_403_FORBIDDEN,
68
+ ]
69
+ ),
70
+ )
71
+ def logfile_path(
72
+ dag_id: Annotated[str, WorkerApiDocs.dag_id],
73
+ task_id: Annotated[str, WorkerApiDocs.task_id],
74
+ run_id: Annotated[str, WorkerApiDocs.run_id],
75
+ try_number: Annotated[int, WorkerApiDocs.try_number],
76
+ map_index: Annotated[int, WorkerApiDocs.map_index],
77
+ ) -> str:
78
+ """Elaborate the path and filename to expect from task execution."""
79
+ task = TaskInstanceKey(
80
+ dag_id=dag_id, task_id=task_id, run_id=run_id, try_number=try_number, map_index=map_index
81
+ )
82
+ return _logfile_path(task)
83
+
84
+
85
+ @logs_router.post(
86
+ "/push/{dag_id}/{task_id}/{run_id}/{try_number}/{map_index}",
87
+ dependencies=[Depends(jwt_token_authorization_rest)],
88
+ responses=create_openapi_http_exception_doc(
89
+ [
90
+ status.HTTP_400_BAD_REQUEST,
91
+ status.HTTP_403_FORBIDDEN,
92
+ ]
93
+ ),
94
+ )
95
+ def push_logs(
96
+ dag_id: Annotated[str, WorkerApiDocs.dag_id],
97
+ task_id: Annotated[str, WorkerApiDocs.task_id],
98
+ run_id: Annotated[str, WorkerApiDocs.run_id],
99
+ try_number: Annotated[int, WorkerApiDocs.try_number],
100
+ map_index: Annotated[int, WorkerApiDocs.map_index],
101
+ body: Annotated[
102
+ PushLogsBody,
103
+ Body(
104
+ title="Log data chunks",
105
+ description="The worker remote has no access to log sink and with this can send log chunks to the central site.",
106
+ ),
107
+ ],
108
+ session: SessionDep,
109
+ ) -> None:
110
+ """Push an incremental log chunk from Edge Worker to central site."""
111
+ log_chunk = EdgeLogsModel(
112
+ dag_id=dag_id,
113
+ task_id=task_id,
114
+ run_id=run_id,
115
+ map_index=map_index,
116
+ try_number=try_number,
117
+ log_chunk_time=body.log_chunk_time,
118
+ log_chunk_data=body.log_chunk_data,
119
+ )
120
+ session.add(log_chunk)
121
+ # Write logs to local file to make them accessible
122
+ task = TaskInstanceKey(
123
+ dag_id=dag_id, task_id=task_id, run_id=run_id, try_number=try_number, map_index=map_index
124
+ )
125
+ base_log_folder = conf.get("logging", "base_log_folder", fallback="NOT AVAILABLE")
126
+ logfile_path = Path(base_log_folder, _logfile_path(task))
127
+ if not logfile_path.exists():
128
+ new_folder_permissions = int(
129
+ conf.get("logging", "file_task_handler_new_folder_permissions", fallback="0o775"), 8
130
+ )
131
+ logfile_path.parent.mkdir(parents=True, exist_ok=True, mode=new_folder_permissions)
132
+ with logfile_path.open("a") as logfile:
133
+ logfile.write(body.log_chunk_data)