openmetadata-managed-apis 1.10.1.0__tar.gz → 1.10.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-managed-apis might be problematic. Click here for more details.

Files changed (68) hide show
  1. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/PKG-INFO +1 -1
  2. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/last_dag_logs.py +14 -2
  3. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/utils.py +10 -0
  4. openmetadata_managed_apis-1.10.2.0/openmetadata_managed_apis/operations/last_dag_logs.py +229 -0
  5. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/PKG-INFO +1 -1
  6. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/pyproject.toml +1 -1
  7. openmetadata_managed_apis-1.10.1.0/openmetadata_managed_apis/operations/last_dag_logs.py +0 -114
  8. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/LICENSE +0 -0
  9. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/README.md +0 -0
  10. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/__init__.py +0 -0
  11. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/__init__.py +0 -0
  12. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/apis_metadata.py +0 -0
  13. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/app.py +0 -0
  14. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/config.py +0 -0
  15. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/error_handlers.py +0 -0
  16. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/response.py +0 -0
  17. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/__init__.py +0 -0
  18. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/delete.py +0 -0
  19. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/deploy.py +0 -0
  20. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/disable.py +0 -0
  21. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/enable.py +0 -0
  22. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/health.py +0 -0
  23. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/health_auth.py +0 -0
  24. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/ip.py +0 -0
  25. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/kill.py +0 -0
  26. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/run_automation.py +0 -0
  27. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/status.py +0 -0
  28. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/api/routes/trigger.py +0 -0
  29. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/__init__.py +0 -0
  30. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/delete.py +0 -0
  31. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/deploy.py +0 -0
  32. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/health.py +0 -0
  33. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/kill_all.py +0 -0
  34. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/state.py +0 -0
  35. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/status.py +0 -0
  36. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/operations/trigger.py +0 -0
  37. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/plugin.py +0 -0
  38. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/resources/__init__.py +0 -0
  39. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/resources/dag_runner.j2 +0 -0
  40. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/utils/__init__.py +0 -0
  41. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/utils/logger.py +0 -0
  42. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/utils/parser.py +0 -0
  43. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/views/__init__.py +0 -0
  44. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/views/rest_api.py +0 -0
  45. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/views/templates/rest_api/index.html +0 -0
  46. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/__init__.py +0 -0
  47. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/config.py +0 -0
  48. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/__init__.py +0 -0
  49. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/application.py +0 -0
  50. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/auto_classification.py +0 -0
  51. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/common.py +0 -0
  52. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/dbt.py +0 -0
  53. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/elasticsearch_sink.py +0 -0
  54. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/es_reindex.py +0 -0
  55. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/lineage.py +0 -0
  56. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/metadata.py +0 -0
  57. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/profiler.py +0 -0
  58. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/registry.py +0 -0
  59. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/test_suite.py +0 -0
  60. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/ingestion/usage.py +0 -0
  61. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/workflow_builder.py +0 -0
  62. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis/workflows/workflow_factory.py +0 -0
  63. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/SOURCES.txt +0 -0
  64. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/dependency_links.txt +0 -0
  65. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/entry_points.txt +0 -0
  66. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/requires.txt +0 -0
  67. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/openmetadata_managed_apis.egg-info/top_level.txt +0 -0
  68. {openmetadata_managed_apis-1.10.1.0 → openmetadata_managed_apis-1.10.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openmetadata_managed_apis
3
- Version: 1.10.1.0
3
+ Version: 1.10.2.0
4
4
  Summary: Airflow REST APIs to create and manage DAGS
5
5
  Author: OpenMetadata Committers
6
6
  License: Apache License
@@ -16,7 +16,11 @@ from typing import Callable
16
16
 
17
17
  from flask import Blueprint, Response, request
18
18
  from openmetadata_managed_apis.api.response import ApiResponse
19
- from openmetadata_managed_apis.api.utils import get_arg_dag_id, get_request_arg
19
+ from openmetadata_managed_apis.api.utils import (
20
+ get_arg_dag_id,
21
+ get_request_arg,
22
+ sanitize_task_id,
23
+ )
20
24
  from openmetadata_managed_apis.operations.last_dag_logs import last_dag_logs
21
25
  from openmetadata_managed_apis.utils.logger import routes_logger
22
26
 
@@ -45,7 +49,15 @@ def get_fn(blueprint: Blueprint) -> Callable:
45
49
  """
46
50
 
47
51
  dag_id = get_arg_dag_id()
48
- task_id = get_request_arg(request, "task_id")
52
+ raw_task_id = get_request_arg(request, "task_id")
53
+ task_id = sanitize_task_id(raw_task_id)
54
+
55
+ if task_id is None:
56
+ return ApiResponse.error(
57
+ status=ApiResponse.STATUS_BAD_REQUEST,
58
+ error="Invalid or missing task_id parameter",
59
+ )
60
+
49
61
  after = get_request_arg(request, "after", raise_missing=False)
50
62
 
51
63
  try:
@@ -53,6 +53,16 @@ def clean_dag_id(raw_dag_id: Optional[str]) -> Optional[str]:
53
53
  return re.sub("[^0-9a-zA-Z-_]+", "_", raw_dag_id) if raw_dag_id else None
54
54
 
55
55
 
56
+ def sanitize_task_id(raw_task_id: Optional[str]) -> Optional[str]:
57
+ """
58
+ Sanitize task_id to prevent path traversal attacks.
59
+ Only allows alphanumeric characters, dashes, and underscores.
60
+ :param raw_task_id: Raw task ID from user input
61
+ :return: Sanitized task ID safe for file path construction
62
+ """
63
+ return re.sub("[^0-9a-zA-Z-_]+", "_", raw_task_id) if raw_task_id else None
64
+
65
+
56
66
  def get_request_arg(req, arg, raise_missing: bool = True) -> Optional[str]:
57
67
  """
58
68
  Pick up the `arg` from the flask `req`.
@@ -0,0 +1,229 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Module containing the logic to retrieve all logs from the tasks of a last DAG run
13
+ """
14
+ import os
15
+ from functools import lru_cache, partial
16
+ from io import StringIO
17
+ from typing import List, Optional, Tuple
18
+
19
+ from airflow.models import DagModel, TaskInstance
20
+ from airflow.utils.log.log_reader import TaskLogReader
21
+ from flask import Response
22
+ from openmetadata_managed_apis.api.response import ApiResponse
23
+ from openmetadata_managed_apis.utils.logger import operations_logger
24
+
25
+ logger = operations_logger()
26
+
27
+ LOG_METADATA = {
28
+ "download_logs": False,
29
+ }
30
+ CHUNK_SIZE = 2_000_000
31
+ DOT_STR = "_DOT_"
32
+
33
+
34
+ @lru_cache(maxsize=10)
35
+ def get_log_file_info(log_file_path: str, mtime: int) -> Tuple[int, int]:
36
+ """
37
+ Get total size and number of chunks for a log file.
38
+ :param log_file_path: Path to log file
39
+ :param mtime: File modification time in seconds (used as cache key)
40
+ :return: Tuple of (file_size_bytes, total_chunks)
41
+ """
42
+ file_size = os.path.getsize(log_file_path)
43
+ total_chunks = (file_size + CHUNK_SIZE - 1) // CHUNK_SIZE
44
+ return file_size, total_chunks
45
+
46
+
47
+ def read_log_chunk_from_file(file_path: str, chunk_index: int) -> Optional[str]:
48
+ """
49
+ Read a specific chunk from a log file without loading entire file.
50
+ :param file_path: Path to the log file
51
+ :param chunk_index: 0-based chunk index to read
52
+ :return: Log chunk content or None if error
53
+ """
54
+ try:
55
+ offset = chunk_index * CHUNK_SIZE
56
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
57
+ f.seek(offset)
58
+ chunk = f.read(CHUNK_SIZE)
59
+ return chunk
60
+ except Exception as exc:
61
+ logger.warning(f"Failed to read log chunk from {file_path}: {exc}")
62
+ return None
63
+
64
+
65
+ def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Response:
66
+ """
67
+ Validate that the DAG is registered by Airflow and have at least one Run.
68
+ If exists, returns all logs for each task instance of the last DAG run.
69
+ Uses file streaming to avoid loading entire log file into memory.
70
+ :param dag_id: DAG to look for
71
+ :param task_id: Task to fetch logs from
72
+ :param after: log stream cursor
73
+ :return: Response with log and pagination
74
+ """
75
+ dag_model = DagModel.get_dagmodel(dag_id=dag_id)
76
+
77
+ if not dag_model:
78
+ return ApiResponse.not_found(f"DAG {dag_id} not found.")
79
+
80
+ last_dag_run = dag_model.get_last_dagrun(include_externally_triggered=True)
81
+
82
+ if not last_dag_run:
83
+ return ApiResponse.not_found(f"No DAG run found for {dag_id}.")
84
+
85
+ task_instances: List[TaskInstance] = last_dag_run.get_task_instances()
86
+
87
+ if not task_instances:
88
+ return ApiResponse.not_found(
89
+ f"Cannot find any task instance for the last DagRun of {dag_id}."
90
+ )
91
+
92
+ target_task_instance = None
93
+ for task_instance in task_instances:
94
+ if task_instance.task_id == task_id:
95
+ target_task_instance = task_instance
96
+ break
97
+
98
+ if not target_task_instance:
99
+ return ApiResponse.bad_request(f"Task {task_id} not found in DAG {dag_id}.")
100
+
101
+ try_number = target_task_instance._try_number # pylint: disable=protected-access
102
+
103
+ task_log_reader = TaskLogReader()
104
+ if not task_log_reader.supports_read:
105
+ return ApiResponse.server_error("Task Log Reader does not support read logs.")
106
+
107
+ # Try to use file streaming for better performance
108
+ try:
109
+
110
+ from airflow.configuration import ( # pylint: disable=import-outside-toplevel
111
+ conf,
112
+ )
113
+
114
+ base_log_folder = conf.get("logging", "base_log_folder")
115
+ # dag_id and task_id are already sanitized at route level
116
+ # Only dots are replaced for Airflow log path compatibility
117
+ dag_id_safe = dag_id.replace(".", DOT_STR)
118
+ task_id_safe = task_id.replace(".", DOT_STR)
119
+
120
+ log_relative_path = f"dag_id={dag_id_safe}/run_id={last_dag_run.run_id}/task_id={task_id_safe}/attempt={try_number}.log"
121
+ log_file_path = os.path.join(base_log_folder, log_relative_path)
122
+
123
+ # Security: Validate the resolved path stays within base_log_folder
124
+ # to prevent directory traversal attacks. This provides defense-in-depth
125
+ # even though dag_id and task_id are already sanitized at the route level.
126
+ log_file_path_real = os.path.realpath(log_file_path)
127
+ base_log_folder_real = os.path.realpath(base_log_folder)
128
+
129
+ if not log_file_path_real.startswith(base_log_folder_real + os.sep):
130
+ logger.warning(
131
+ f"Path traversal attempt detected: {log_file_path} is outside {base_log_folder}"
132
+ )
133
+ return ApiResponse.bad_request(
134
+ f"Invalid log path for DAG {dag_id} and Task {task_id}."
135
+ )
136
+
137
+ if os.path.exists(log_file_path_real):
138
+ stat_info = os.stat(log_file_path_real)
139
+ file_mtime = int(stat_info.st_mtime)
140
+
141
+ _, total_chunks = get_log_file_info(log_file_path_real, file_mtime)
142
+
143
+ after_idx = int(after) if after is not None else 0
144
+
145
+ if after_idx >= total_chunks:
146
+ return ApiResponse.bad_request(
147
+ f"After index {after} is out of bounds. Total pagination is {total_chunks} for DAG {dag_id} and Task {task_id}."
148
+ )
149
+
150
+ chunk_content = read_log_chunk_from_file(log_file_path_real, after_idx)
151
+
152
+ if chunk_content is not None:
153
+ return ApiResponse.success(
154
+ {
155
+ task_id: chunk_content,
156
+ "total": total_chunks,
157
+ **(
158
+ {"after": after_idx + 1}
159
+ if after_idx < total_chunks - 1
160
+ else {}
161
+ ),
162
+ }
163
+ )
164
+ except Exception as exc:
165
+ logger.debug(
166
+ f"File streaming failed for DAG {dag_id}, falling back to TaskLogReader: {exc}"
167
+ )
168
+
169
+ # Fallback to TaskLogReader if streaming fails
170
+ return _last_dag_logs_fallback(
171
+ dag_id, task_id, after, target_task_instance, task_log_reader, try_number
172
+ )
173
+
174
+
175
+ def _last_dag_logs_fallback(
176
+ dag_id: str,
177
+ task_id: str,
178
+ after: Optional[int],
179
+ task_instance: TaskInstance,
180
+ task_log_reader: TaskLogReader,
181
+ try_number: int,
182
+ ) -> Response:
183
+ """
184
+ Fallback to reading entire log file into memory (old behavior).
185
+ :param dag_id: DAG to look for
186
+ :param task_id: Task to fetch logs from
187
+ :param after: log stream cursor
188
+ :param task_instance: Task instance to fetch logs from
189
+ :param task_log_reader: TaskLogReader instance
190
+ :param try_number: Task attempt number
191
+ :return: API Response
192
+ """
193
+ raw_logs_str = "".join(
194
+ list(
195
+ task_log_reader.read_log_stream(
196
+ ti=task_instance,
197
+ try_number=try_number,
198
+ metadata=LOG_METADATA,
199
+ )
200
+ )
201
+ )
202
+
203
+ if not raw_logs_str:
204
+ return ApiResponse.bad_request(
205
+ f"Can't fetch logs for DAG {dag_id} and Task {task_id}."
206
+ )
207
+
208
+ # Split the string in chunks of size without
209
+ # having to know the full length beforehand
210
+ log_chunks = [
211
+ chunk for chunk in iter(partial(StringIO(raw_logs_str).read, CHUNK_SIZE), "")
212
+ ]
213
+
214
+ total = len(log_chunks)
215
+ after_idx = int(after) if after is not None else 0
216
+
217
+ if after_idx >= total:
218
+ return ApiResponse.bad_request(
219
+ f"After index {after} is out of bounds. Total pagination is {total} for DAG {dag_id} and Task {task_id}."
220
+ )
221
+
222
+ return ApiResponse.success(
223
+ {
224
+ task_id: log_chunks[after_idx],
225
+ "total": len(log_chunks),
226
+ # Only add the after if there are more pages
227
+ **({"after": after_idx + 1} if after_idx < total - 1 else {}),
228
+ }
229
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openmetadata_managed_apis
3
- Version: 1.10.1.0
3
+ Version: 1.10.2.0
4
4
  Summary: Airflow REST APIs to create and manage DAGS
5
5
  Author: OpenMetadata Committers
6
6
  License: Apache License
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  # since it helps us organize and isolate version management
7
7
  [project]
8
8
  name = "openmetadata_managed_apis"
9
- version = "1.10.1.0"
9
+ version = "1.10.2.0"
10
10
  readme = "README.md"
11
11
  authors = [
12
12
  {name = "OpenMetadata Committers"}
@@ -1,114 +0,0 @@
1
- # Copyright 2022 Collate
2
- # Licensed under the Collate Community License, Version 1.0 (the "License");
3
- # you may not use this file except in compliance with the License.
4
- # You may obtain a copy of the License at
5
- # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
- # Unless required by applicable law or agreed to in writing, software
7
- # distributed under the License is distributed on an "AS IS" BASIS,
8
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
- # See the License for the specific language governing permissions and
10
- # limitations under the License.
11
- """
12
- Module containing the logic to retrieve all logs from the tasks of a last DAG run
13
- """
14
- from functools import partial
15
- from io import StringIO
16
- from typing import List, Optional
17
-
18
- from airflow.models import DagModel, TaskInstance
19
- from airflow.utils.log.log_reader import TaskLogReader
20
- from flask import Response
21
- from openmetadata_managed_apis.api.response import ApiResponse
22
-
23
- LOG_METADATA = {
24
- "download_logs": False,
25
- }
26
- # Make chunks of 2M characters
27
- CHUNK_SIZE = 2_000_000
28
-
29
-
30
- def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Response:
31
- """Validate that the DAG is registered by Airflow and have at least one Run.
32
-
33
- If exists, returns all logs for each task instance of the last DAG run.
34
-
35
- Args:
36
- dag_id (str): DAG to look for
37
- task_id (str): Task to fetch logs from
38
- after (int): log stream cursor
39
-
40
- Return:
41
- Response with log and pagination
42
- """
43
-
44
- dag_model = DagModel.get_dagmodel(dag_id=dag_id)
45
-
46
- if not dag_model:
47
- return ApiResponse.not_found(f"DAG {dag_id} not found.")
48
-
49
- last_dag_run = dag_model.get_last_dagrun(include_externally_triggered=True)
50
-
51
- if not last_dag_run:
52
- return ApiResponse.not_found(f"No DAG run found for {dag_id}.")
53
-
54
- task_instances: List[TaskInstance] = last_dag_run.get_task_instances()
55
-
56
- if not task_instances:
57
- return ApiResponse.not_found(
58
- f"Cannot find any task instance for the last DagRun of {dag_id}."
59
- )
60
-
61
- raw_logs_str = None
62
-
63
- for task_instance in task_instances:
64
- # Only fetch the required logs
65
- if task_instance.task_id == task_id:
66
- # Pick up the _try_number, otherwise they are adding 1
67
- try_number = task_instance._try_number # pylint: disable=protected-access
68
-
69
- task_log_reader = TaskLogReader()
70
- if not task_log_reader.supports_read:
71
- return ApiResponse.server_error(
72
- "Task Log Reader does not support read logs."
73
- )
74
-
75
- # Even when generating a ton of logs, we just get a single element.
76
- # Same happens when trying to call task_log_reader.read_log_chunks
77
- # We'll create our own chunk size and paginate based on that
78
- raw_logs_str = "".join(
79
- list(
80
- task_log_reader.read_log_stream(
81
- ti=task_instance,
82
- try_number=try_number,
83
- metadata=LOG_METADATA,
84
- )
85
- )
86
- )
87
-
88
- if not raw_logs_str:
89
- return ApiResponse.bad_request(
90
- f"Can't fetch logs for DAG {dag_id} and Task {task_id}."
91
- )
92
-
93
- # Split the string in chunks of size without
94
- # having to know the full length beforehand
95
- log_chunks = [
96
- chunk for chunk in iter(partial(StringIO(raw_logs_str).read, CHUNK_SIZE), "")
97
- ]
98
-
99
- total = len(log_chunks)
100
- after_idx = int(after) if after is not None else 0
101
-
102
- if after_idx >= total:
103
- return ApiResponse.bad_request(
104
- f"After index {after} is out of bounds. Total pagination is {total} for DAG {dag_id} and Task {task_id}."
105
- )
106
-
107
- return ApiResponse.success(
108
- {
109
- task_id: log_chunks[after_idx],
110
- "total": len(log_chunks),
111
- # Only add the after if there are more pages
112
- **({"after": after_idx + 1} if after_idx < total - 1 else {}),
113
- }
114
- )