apache-airflow-providers-edge3 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airflow/providers/edge3/LICENSE +201 -0
  2. airflow/providers/edge3/__init__.py +39 -0
  3. airflow/providers/edge3/cli/__init__.py +16 -0
  4. airflow/providers/edge3/cli/api_client.py +206 -0
  5. airflow/providers/edge3/cli/dataclasses.py +95 -0
  6. airflow/providers/edge3/cli/edge_command.py +689 -0
  7. airflow/providers/edge3/example_dags/__init__.py +16 -0
  8. airflow/providers/edge3/example_dags/integration_test.py +164 -0
  9. airflow/providers/edge3/example_dags/win_notepad.py +83 -0
  10. airflow/providers/edge3/example_dags/win_test.py +342 -0
  11. airflow/providers/edge3/executors/__init__.py +22 -0
  12. airflow/providers/edge3/executors/edge_executor.py +367 -0
  13. airflow/providers/edge3/get_provider_info.py +99 -0
  14. airflow/providers/edge3/models/__init__.py +16 -0
  15. airflow/providers/edge3/models/edge_job.py +94 -0
  16. airflow/providers/edge3/models/edge_logs.py +73 -0
  17. airflow/providers/edge3/models/edge_worker.py +230 -0
  18. airflow/providers/edge3/openapi/__init__.py +19 -0
  19. airflow/providers/edge3/openapi/edge_worker_api_v1.yaml +808 -0
  20. airflow/providers/edge3/plugins/__init__.py +16 -0
  21. airflow/providers/edge3/plugins/edge_executor_plugin.py +229 -0
  22. airflow/providers/edge3/plugins/templates/edge_worker_hosts.html +175 -0
  23. airflow/providers/edge3/plugins/templates/edge_worker_jobs.html +69 -0
  24. airflow/providers/edge3/version_compat.py +36 -0
  25. airflow/providers/edge3/worker_api/__init__.py +17 -0
  26. airflow/providers/edge3/worker_api/app.py +43 -0
  27. airflow/providers/edge3/worker_api/auth.py +135 -0
  28. airflow/providers/edge3/worker_api/datamodels.py +190 -0
  29. airflow/providers/edge3/worker_api/routes/__init__.py +16 -0
  30. airflow/providers/edge3/worker_api/routes/_v2_compat.py +135 -0
  31. airflow/providers/edge3/worker_api/routes/_v2_routes.py +237 -0
  32. airflow/providers/edge3/worker_api/routes/health.py +28 -0
  33. airflow/providers/edge3/worker_api/routes/jobs.py +162 -0
  34. airflow/providers/edge3/worker_api/routes/logs.py +133 -0
  35. airflow/providers/edge3/worker_api/routes/worker.py +224 -0
  36. apache_airflow_providers_edge3-1.0.0rc1.dist-info/METADATA +117 -0
  37. apache_airflow_providers_edge3-1.0.0rc1.dist-info/RECORD +39 -0
  38. apache_airflow_providers_edge3-1.0.0rc1.dist-info/WHEEL +4 -0
  39. apache_airflow_providers_edge3-1.0.0rc1.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,367 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ import contextlib
21
+ from collections.abc import Sequence
22
+ from copy import deepcopy
23
+ from datetime import datetime, timedelta
24
+ from typing import TYPE_CHECKING, Any, Optional
25
+
26
+ from sqlalchemy import delete, inspect, text
27
+ from sqlalchemy.exc import NoSuchTableError
28
+ from sqlalchemy.orm import Session
29
+
30
+ from airflow.cli.cli_config import GroupCommand
31
+ from airflow.configuration import conf
32
+ from airflow.executors.base_executor import BaseExecutor
33
+ from airflow.models.abstractoperator import DEFAULT_QUEUE
34
+ from airflow.models.taskinstance import TaskInstance, TaskInstanceState
35
+ from airflow.providers.edge3.cli.edge_command import EDGE_COMMANDS
36
+ from airflow.providers.edge3.models.edge_job import EdgeJobModel
37
+ from airflow.providers.edge3.models.edge_logs import EdgeLogsModel
38
+ from airflow.providers.edge3.models.edge_worker import EdgeWorkerModel, EdgeWorkerState, reset_metrics
39
+ from airflow.providers.edge3.version_compat import AIRFLOW_V_3_0_PLUS
40
+ from airflow.stats import Stats
41
+ from airflow.utils import timezone
42
+ from airflow.utils.db import DBLocks, create_global_lock
43
+ from airflow.utils.session import NEW_SESSION, provide_session
44
+
45
+ if TYPE_CHECKING:
46
+ import argparse
47
+
48
+ from sqlalchemy.engine.base import Engine
49
+
50
+ from airflow.executors.base_executor import CommandType
51
+ from airflow.models.taskinstancekey import TaskInstanceKey
52
+
53
+ # Task tuple to send to be executed
54
+ TaskTuple = tuple[TaskInstanceKey, CommandType, Optional[str], Optional[Any]]
55
+
56
+ PARALLELISM: int = conf.getint("core", "PARALLELISM")
57
+
58
+
59
+ class EdgeExecutor(BaseExecutor):
60
+ """Implementation of the EdgeExecutor to distribute work to Edge Workers via HTTP."""
61
+
62
+ def __init__(self, parallelism: int = PARALLELISM):
63
+ super().__init__(parallelism=parallelism)
64
+ self.last_reported_state: dict[TaskInstanceKey, TaskInstanceState] = {}
65
+
66
+ def _check_db_schema(self, engine: Engine) -> None:
67
+ """
68
+ Check if already existing table matches the newest table schema.
69
+
70
+ workaround till Airflow 3.0.0, then it is possible to use alembic also for provider distributions.
71
+ """
72
+ inspector = inspect(engine)
73
+ edge_job_columns = None
74
+ with contextlib.suppress(NoSuchTableError):
75
+ edge_job_columns = [column["name"] for column in inspector.get_columns("edge_job")]
76
+
77
+ # version 0.6.0rc1 added new column concurrency_slots
78
+ if edge_job_columns and "concurrency_slots" not in edge_job_columns:
79
+ EdgeJobModel.metadata.drop_all(engine, tables=[EdgeJobModel.__table__])
80
+
81
+ edge_worker_columns = None
82
+ with contextlib.suppress(NoSuchTableError):
83
+ edge_worker_columns = [column["name"] for column in inspector.get_columns("edge_worker")]
84
+
85
+ # version 0.14.0pre0 added new column maintenance_comment
86
+ if edge_worker_columns and "maintenance_comment" not in edge_worker_columns:
87
+ with Session(engine) as session:
88
+ query = "ALTER TABLE edge_worker ADD maintenance_comment VARCHAR(1024);"
89
+ session.execute(text(query))
90
+ session.commit()
91
+
92
+ @provide_session
93
+ def start(self, session: Session = NEW_SESSION):
94
+ """If EdgeExecutor provider is loaded first time, ensure table exists."""
95
+ with create_global_lock(session=session, lock=DBLocks.MIGRATIONS):
96
+ engine = session.get_bind().engine
97
+ self._check_db_schema(engine)
98
+ EdgeJobModel.metadata.create_all(engine)
99
+ EdgeLogsModel.metadata.create_all(engine)
100
+ EdgeWorkerModel.metadata.create_all(engine)
101
+
102
+ def _process_tasks(self, task_tuples: list[TaskTuple]) -> None:
103
+ """
104
+ Temponary overwrite of _process_tasks function.
105
+
106
+ Idea is to not change the interface of the execute_async function in BaseExecutor as it will be changed in Airflow 3.
107
+ Edge worker needs task_instance in execute_async but BaseExecutor deletes this out of the self.queued_tasks.
108
+ Store queued_tasks in own var to be able to access this in execute_async function.
109
+ """
110
+ self.edge_queued_tasks = deepcopy(self.queued_tasks)
111
+ super()._process_tasks(task_tuples)
112
+
113
+ @provide_session
114
+ def execute_async(
115
+ self,
116
+ key: TaskInstanceKey,
117
+ command: CommandType,
118
+ queue: str | None = None,
119
+ executor_config: Any | None = None,
120
+ session: Session = NEW_SESSION,
121
+ ) -> None:
122
+ """Execute asynchronously. Airflow 2.10 entry point to execute a task."""
123
+ # Use of a temponary trick to get task instance, will be changed with Airflow 3.0.0
124
+ # code works together with _process_tasks overwrite to get task instance.
125
+ task_instance = self.edge_queued_tasks[key][3] # TaskInstance in fourth element
126
+ del self.edge_queued_tasks[key]
127
+
128
+ self.validate_airflow_tasks_run_command(command)
129
+ session.add(
130
+ EdgeJobModel(
131
+ dag_id=key.dag_id,
132
+ task_id=key.task_id,
133
+ run_id=key.run_id,
134
+ map_index=key.map_index,
135
+ try_number=key.try_number,
136
+ state=TaskInstanceState.QUEUED,
137
+ queue=queue or DEFAULT_QUEUE,
138
+ concurrency_slots=task_instance.pool_slots,
139
+ command=str(command),
140
+ )
141
+ )
142
+
143
+ @provide_session
144
+ def queue_workload(
145
+ self,
146
+ workload: Any, # Note actually "airflow.executors.workloads.All" but not existing in Airflow 2.10
147
+ session: Session = NEW_SESSION,
148
+ ) -> None:
149
+ """Put new workload to queue. Airflow 3 entry point to execute a task."""
150
+ from airflow.executors import workloads
151
+
152
+ if not isinstance(workload, workloads.ExecuteTask):
153
+ raise TypeError(f"Don't know how to queue workload of type {type(workload).__name__}")
154
+
155
+ task_instance = workload.ti
156
+ key = task_instance.key
157
+ session.add(
158
+ EdgeJobModel(
159
+ dag_id=key.dag_id,
160
+ task_id=key.task_id,
161
+ run_id=key.run_id,
162
+ map_index=key.map_index,
163
+ try_number=key.try_number,
164
+ state=TaskInstanceState.QUEUED,
165
+ queue=task_instance.queue,
166
+ concurrency_slots=task_instance.pool_slots,
167
+ command=workload.model_dump_json(),
168
+ )
169
+ )
170
+
171
+ def _check_worker_liveness(self, session: Session) -> bool:
172
+ """Reset worker state if heartbeat timed out."""
173
+ changed = False
174
+ heartbeat_interval: int = conf.getint("edge", "heartbeat_interval")
175
+ lifeless_workers: list[EdgeWorkerModel] = (
176
+ session.query(EdgeWorkerModel)
177
+ .with_for_update(skip_locked=True)
178
+ .filter(
179
+ EdgeWorkerModel.state.not_in(
180
+ [EdgeWorkerState.UNKNOWN, EdgeWorkerState.OFFLINE, EdgeWorkerState.OFFLINE_MAINTENANCE]
181
+ ),
182
+ EdgeWorkerModel.last_update < (timezone.utcnow() - timedelta(seconds=heartbeat_interval * 5)),
183
+ )
184
+ .all()
185
+ )
186
+
187
+ for worker in lifeless_workers:
188
+ changed = True
189
+ worker.state = EdgeWorkerState.UNKNOWN
190
+ reset_metrics(worker.worker_name)
191
+
192
+ return changed
193
+
194
+ def _update_orphaned_jobs(self, session: Session) -> bool:
195
+ """Update status ob jobs when workers die and don't update anymore."""
196
+ if AIRFLOW_V_3_0_PLUS:
197
+ heartbeat_interval_config_name = "task_instance_heartbeat_timeout"
198
+ else:
199
+ heartbeat_interval_config_name = "scheduler_zombie_task_threshold"
200
+ heartbeat_interval: int = conf.getint("scheduler", heartbeat_interval_config_name)
201
+ lifeless_jobs: list[EdgeJobModel] = (
202
+ session.query(EdgeJobModel)
203
+ .with_for_update(skip_locked=True)
204
+ .filter(
205
+ EdgeJobModel.state == TaskInstanceState.RUNNING,
206
+ EdgeJobModel.last_update < (timezone.utcnow() - timedelta(seconds=heartbeat_interval)),
207
+ )
208
+ .all()
209
+ )
210
+
211
+ for job in lifeless_jobs:
212
+ ti = TaskInstance.get_task_instance(
213
+ dag_id=job.dag_id,
214
+ run_id=job.run_id,
215
+ task_id=job.task_id,
216
+ map_index=job.map_index,
217
+ session=session,
218
+ )
219
+ job.state = ti.state if ti else TaskInstanceState.REMOVED
220
+
221
+ if job.state != TaskInstanceState.RUNNING:
222
+ # Edge worker does not backport emitted Airflow metrics, so export some metrics
223
+ # Export metrics as failed as these jobs will be deleted in the future
224
+ tags = {
225
+ "dag_id": job.dag_id,
226
+ "task_id": job.task_id,
227
+ "queue": job.queue,
228
+ "state": str(TaskInstanceState.FAILED),
229
+ }
230
+ Stats.incr(
231
+ f"edge_worker.ti.finish.{job.queue}.{TaskInstanceState.FAILED}.{job.dag_id}.{job.task_id}",
232
+ tags=tags,
233
+ )
234
+ Stats.incr("edge_worker.ti.finish", tags=tags)
235
+
236
+ return bool(lifeless_jobs)
237
+
238
+ def _purge_jobs(self, session: Session) -> bool:
239
+ """Clean finished jobs."""
240
+ purged_marker = False
241
+ job_success_purge = conf.getint("edge", "job_success_purge")
242
+ job_fail_purge = conf.getint("edge", "job_fail_purge")
243
+ jobs: list[EdgeJobModel] = (
244
+ session.query(EdgeJobModel)
245
+ .with_for_update(skip_locked=True)
246
+ .filter(
247
+ EdgeJobModel.state.in_(
248
+ [
249
+ TaskInstanceState.RUNNING,
250
+ TaskInstanceState.SUCCESS,
251
+ TaskInstanceState.FAILED,
252
+ TaskInstanceState.REMOVED,
253
+ TaskInstanceState.RESTARTING,
254
+ TaskInstanceState.UP_FOR_RETRY,
255
+ ]
256
+ )
257
+ )
258
+ .all()
259
+ )
260
+
261
+ # Sync DB with executor otherwise runs out of sync in multi scheduler deployment
262
+ already_removed = self.running - set(job.key for job in jobs)
263
+ self.running = self.running - already_removed
264
+
265
+ for job in jobs:
266
+ if job.key in self.running:
267
+ if job.state == TaskInstanceState.RUNNING:
268
+ if (
269
+ job.key not in self.last_reported_state
270
+ or self.last_reported_state[job.key] != job.state
271
+ ):
272
+ self.running_state(job.key)
273
+ self.last_reported_state[job.key] = job.state
274
+ elif job.state == TaskInstanceState.SUCCESS:
275
+ if job.key in self.last_reported_state:
276
+ del self.last_reported_state[job.key]
277
+ self.success(job.key)
278
+ elif job.state in [
279
+ TaskInstanceState.FAILED,
280
+ TaskInstanceState.RESTARTING,
281
+ TaskInstanceState.UP_FOR_RETRY,
282
+ ]:
283
+ if job.key in self.last_reported_state:
284
+ del self.last_reported_state[job.key]
285
+ self.fail(job.key)
286
+ else:
287
+ self.last_reported_state[job.key] = job.state
288
+ if (
289
+ job.state == TaskInstanceState.SUCCESS
290
+ and job.last_update_t < (datetime.now() - timedelta(minutes=job_success_purge)).timestamp()
291
+ ) or (
292
+ job.state
293
+ in (
294
+ TaskInstanceState.FAILED,
295
+ TaskInstanceState.REMOVED,
296
+ TaskInstanceState.RESTARTING,
297
+ TaskInstanceState.UP_FOR_RETRY,
298
+ )
299
+ and job.last_update_t < (datetime.now() - timedelta(minutes=job_fail_purge)).timestamp()
300
+ ):
301
+ if job.key in self.last_reported_state:
302
+ del self.last_reported_state[job.key]
303
+ purged_marker = True
304
+ session.delete(job)
305
+ session.execute(
306
+ delete(EdgeLogsModel).where(
307
+ EdgeLogsModel.dag_id == job.dag_id,
308
+ EdgeLogsModel.run_id == job.run_id,
309
+ EdgeLogsModel.task_id == job.task_id,
310
+ EdgeLogsModel.map_index == job.map_index,
311
+ EdgeLogsModel.try_number == job.try_number,
312
+ )
313
+ )
314
+
315
+ return purged_marker
316
+
317
+ @provide_session
318
+ def sync(self, session: Session = NEW_SESSION) -> None:
319
+ """Sync will get called periodically by the heartbeat method."""
320
+ with Stats.timer("edge_executor.sync.duration"):
321
+ orphaned = self._update_orphaned_jobs(session)
322
+ purged = self._purge_jobs(session)
323
+ liveness = self._check_worker_liveness(session)
324
+ if purged or liveness or orphaned:
325
+ session.commit()
326
+
327
+ def end(self) -> None:
328
+ """End the executor."""
329
+ self.log.info("Shutting down EdgeExecutor")
330
+
331
+ def terminate(self):
332
+ """Terminate the executor is not doing anything."""
333
+
334
+ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[TaskInstance]:
335
+ """
336
+ Try to adopt running task instances that have been abandoned by a SchedulerJob dying.
337
+
338
+ Anything that is not adopted will be cleared by the scheduler (and then become eligible for
339
+ re-scheduling)
340
+
341
+ :return: any TaskInstances that were unable to be adopted
342
+ """
343
+ # We handle all running tasks from the DB in sync, no adoption logic needed.
344
+ return []
345
+
346
+ @staticmethod
347
+ def get_cli_commands() -> list[GroupCommand]:
348
+ return [
349
+ GroupCommand(
350
+ name="edge",
351
+ help="Edge Worker components",
352
+ description=(
353
+ "Start and manage Edge Worker. Works only when using EdgeExecutor. For more information, "
354
+ "see https://airflow.apache.org/docs/apache-airflow-providers-edge3/stable/edge_executor.html"
355
+ ),
356
+ subcommands=EDGE_COMMANDS,
357
+ ),
358
+ ]
359
+
360
+
361
+ def _get_parser() -> argparse.ArgumentParser:
362
+ """
363
+ Generate documentation; used by Sphinx.
364
+
365
+ :meta private:
366
+ """
367
+ return EdgeExecutor._get_parser()
@@ -0,0 +1,99 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN!
19
+ #
20
+ # IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE
21
+ # `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY
22
+
23
+
24
+ def get_provider_info():
25
+ return {
26
+ "package-name": "apache-airflow-providers-edge3",
27
+ "name": "Edge Executor",
28
+ "description": "Handle edge workers on remote sites via HTTP(s) connection and orchestrates work over distributed sites\n",
29
+ "plugins": [
30
+ {
31
+ "name": "edge_executor",
32
+ "plugin-class": "airflow.providers.edge3.plugins.edge_executor_plugin.EdgeExecutorPlugin",
33
+ }
34
+ ],
35
+ "executors": ["airflow.providers.edge3.executors.EdgeExecutor"],
36
+ "config": {
37
+ "edge": {
38
+ "description": "This section only applies if you are using the EdgeExecutor in\n``[core]`` section above\n",
39
+ "options": {
40
+ "api_enabled": {
41
+ "description": "Flag if the plugin endpoint is enabled to serve Edge Workers.\n",
42
+ "version_added": None,
43
+ "type": "boolean",
44
+ "example": "True",
45
+ "default": "False",
46
+ },
47
+ "api_url": {
48
+ "description": "URL endpoint on which the Airflow code edge API is accessible from edge worker.\n",
49
+ "version_added": None,
50
+ "type": "string",
51
+ "example": "https://airflow.hosting.org/edge_worker/v1/rpcapi",
52
+ "default": None,
53
+ },
54
+ "job_poll_interval": {
55
+ "description": "Edge Worker currently polls for new jobs via HTTP. This parameter defines the number\nof seconds it should sleep between polls for new jobs.\nJob polling only happens if the Edge Worker seeks for new work. Not if busy.\n",
56
+ "version_added": None,
57
+ "type": "integer",
58
+ "example": "5",
59
+ "default": "5",
60
+ },
61
+ "heartbeat_interval": {
62
+ "description": "Edge Worker continuously reports status to the central site. This parameter defines\nhow often a status with heartbeat should be sent.\nDuring heartbeat status is reported as well as it is checked if a running task is to be terminated.\n",
63
+ "version_added": None,
64
+ "type": "integer",
65
+ "example": "10",
66
+ "default": "30",
67
+ },
68
+ "worker_concurrency": {
69
+ "description": "The concurrency defines the default max parallel running task instances and can also be set during\nstart of worker with the ``airflow edge worker`` command parameter. The size of the workers\nand the resources must support the nature of your tasks. The parameter\nworks together with the concurrency_slots parameter of a task.\n",
70
+ "version_added": None,
71
+ "type": "integer",
72
+ "example": None,
73
+ "default": "8",
74
+ },
75
+ "job_success_purge": {
76
+ "description": "Minutes after which successful jobs for EdgeExecutor are purged from database\n",
77
+ "version_added": None,
78
+ "type": "integer",
79
+ "example": None,
80
+ "default": "5",
81
+ },
82
+ "job_fail_purge": {
83
+ "description": "Minutes after which failed jobs for EdgeExecutor are purged from database\n",
84
+ "version_added": None,
85
+ "type": "integer",
86
+ "example": None,
87
+ "default": "60",
88
+ },
89
+ "push_log_chunk_size": {
90
+ "description": "Edge Worker uploads log files in chunks. If the log file part which is uploaded\nexceeds the chunk size it creates a new request. The application gateway can\nlimit the max body size see:\nhttps://nginx.org/en/docs/http/ngx_http_core_module.html#client_max_body_size\nA HTTP 413 issue can point to this value to fix the issue.\nThis value must be defined in Bytes.\n",
91
+ "version_added": None,
92
+ "type": "integer",
93
+ "example": None,
94
+ "default": "524288",
95
+ },
96
+ },
97
+ }
98
+ },
99
+ }
@@ -0,0 +1,16 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
@@ -0,0 +1,94 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ from datetime import datetime
20
+
21
+ from sqlalchemy import (
22
+ Column,
23
+ Index,
24
+ Integer,
25
+ String,
26
+ text,
27
+ )
28
+
29
+ from airflow.models.base import Base, StringID
30
+ from airflow.models.taskinstancekey import TaskInstanceKey
31
+ from airflow.utils import timezone
32
+ from airflow.utils.log.logging_mixin import LoggingMixin
33
+ from airflow.utils.sqlalchemy import UtcDateTime
34
+
35
+
36
+ class EdgeJobModel(Base, LoggingMixin):
37
+ """
38
+ A job which is queued, waiting or running on a Edge Worker.
39
+
40
+ Each tuple in the database represents and describes the state of one job.
41
+ """
42
+
43
+ __tablename__ = "edge_job"
44
+ dag_id = Column(StringID(), primary_key=True, nullable=False)
45
+ task_id = Column(StringID(), primary_key=True, nullable=False)
46
+ run_id = Column(StringID(), primary_key=True, nullable=False)
47
+ map_index = Column(Integer, primary_key=True, nullable=False, server_default=text("-1"))
48
+ try_number = Column(Integer, primary_key=True, default=0)
49
+ state = Column(String(20))
50
+ queue = Column(String(256))
51
+ concurrency_slots = Column(Integer)
52
+ command = Column(String(1000))
53
+ queued_dttm = Column(UtcDateTime)
54
+ edge_worker = Column(String(64))
55
+ last_update = Column(UtcDateTime)
56
+
57
+ def __init__(
58
+ self,
59
+ dag_id: str,
60
+ task_id: str,
61
+ run_id: str,
62
+ map_index: int,
63
+ try_number: int,
64
+ state: str,
65
+ queue: str,
66
+ concurrency_slots: int,
67
+ command: str,
68
+ queued_dttm: datetime | None = None,
69
+ edge_worker: str | None = None,
70
+ last_update: datetime | None = None,
71
+ ):
72
+ self.dag_id = dag_id
73
+ self.task_id = task_id
74
+ self.run_id = run_id
75
+ self.map_index = map_index
76
+ self.try_number = try_number
77
+ self.state = state
78
+ self.queue = queue
79
+ self.concurrency_slots = concurrency_slots
80
+ self.command = command
81
+ self.queued_dttm = queued_dttm or timezone.utcnow()
82
+ self.edge_worker = edge_worker
83
+ self.last_update = last_update
84
+ super().__init__()
85
+
86
+ __table_args__ = (Index("rj_order", state, queued_dttm, queue),)
87
+
88
+ @property
89
+ def key(self):
90
+ return TaskInstanceKey(self.dag_id, self.task_id, self.run_id, self.try_number, self.map_index)
91
+
92
+ @property
93
+ def last_update_t(self) -> float:
94
+ return self.last_update.timestamp()
@@ -0,0 +1,73 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ from datetime import datetime
20
+
21
+ from sqlalchemy import (
22
+ Column,
23
+ Integer,
24
+ Text,
25
+ text,
26
+ )
27
+ from sqlalchemy.dialects.mysql import MEDIUMTEXT
28
+
29
+ from airflow.models.base import Base, StringID
30
+ from airflow.utils.log.logging_mixin import LoggingMixin
31
+ from airflow.utils.sqlalchemy import UtcDateTime
32
+
33
+
34
+ class EdgeLogsModel(Base, LoggingMixin):
35
+ """
36
+ Temporary collected logs from a Edge Worker while job runs on remote site.
37
+
38
+ As the Edge Worker in most cases has a local file system and the web UI no access
39
+ to read files from remote site, Edge Workers will send incremental chunks of logs
40
+ of running jobs to the central site. As log storage backends in most cloud cases can not
41
+ append logs, the table is used as buffer to receive. Upon task completion logs can be
42
+ flushed to task log handler.
43
+
44
+ Log data therefore is collected in chunks and is only temporary.
45
+ """
46
+
47
+ __tablename__ = "edge_logs"
48
+ dag_id = Column(StringID(), primary_key=True, nullable=False)
49
+ task_id = Column(StringID(), primary_key=True, nullable=False)
50
+ run_id = Column(StringID(), primary_key=True, nullable=False)
51
+ map_index = Column(Integer, primary_key=True, nullable=False, server_default=text("-1"))
52
+ try_number = Column(Integer, primary_key=True, default=0)
53
+ log_chunk_time = Column(UtcDateTime, primary_key=True, nullable=False)
54
+ log_chunk_data = Column(Text().with_variant(MEDIUMTEXT(), "mysql"), nullable=False)
55
+
56
+ def __init__(
57
+ self,
58
+ dag_id: str,
59
+ task_id: str,
60
+ run_id: str,
61
+ map_index: int,
62
+ try_number: int,
63
+ log_chunk_time: datetime,
64
+ log_chunk_data: str,
65
+ ):
66
+ self.dag_id = dag_id
67
+ self.task_id = task_id
68
+ self.run_id = run_id
69
+ self.map_index = map_index
70
+ self.try_number = try_number
71
+ self.log_chunk_time = log_chunk_time
72
+ self.log_chunk_data = log_chunk_data
73
+ super().__init__()