openmetadata-managed-apis 1.6.10.0__py3-none-any.whl → 1.10.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openmetadata_managed_apis/__init__.py +3 -3
- openmetadata_managed_apis/api/apis_metadata.py +3 -3
- openmetadata_managed_apis/api/app.py +3 -3
- openmetadata_managed_apis/api/config.py +3 -3
- openmetadata_managed_apis/api/error_handlers.py +3 -3
- openmetadata_managed_apis/api/response.py +3 -3
- openmetadata_managed_apis/api/routes/delete.py +3 -3
- openmetadata_managed_apis/api/routes/deploy.py +3 -3
- openmetadata_managed_apis/api/routes/disable.py +3 -3
- openmetadata_managed_apis/api/routes/enable.py +3 -3
- openmetadata_managed_apis/api/routes/health.py +3 -3
- openmetadata_managed_apis/api/routes/health_auth.py +3 -3
- openmetadata_managed_apis/api/routes/ip.py +3 -3
- openmetadata_managed_apis/api/routes/kill.py +3 -3
- openmetadata_managed_apis/api/routes/last_dag_logs.py +17 -5
- openmetadata_managed_apis/api/routes/run_automation.py +4 -4
- openmetadata_managed_apis/api/routes/status.py +3 -3
- openmetadata_managed_apis/api/routes/trigger.py +11 -6
- openmetadata_managed_apis/api/utils.py +23 -3
- openmetadata_managed_apis/operations/delete.py +3 -3
- openmetadata_managed_apis/operations/deploy.py +5 -3
- openmetadata_managed_apis/operations/health.py +3 -3
- openmetadata_managed_apis/operations/kill_all.py +3 -3
- openmetadata_managed_apis/operations/last_dag_logs.py +150 -35
- openmetadata_managed_apis/operations/state.py +3 -3
- openmetadata_managed_apis/operations/status.py +3 -3
- openmetadata_managed_apis/operations/trigger.py +8 -5
- openmetadata_managed_apis/plugin.py +3 -3
- openmetadata_managed_apis/utils/parser.py +3 -3
- openmetadata_managed_apis/views/rest_api.py +3 -3
- openmetadata_managed_apis/workflows/config.py +3 -3
- openmetadata_managed_apis/workflows/ingestion/application.py +19 -10
- openmetadata_managed_apis/workflows/ingestion/auto_classification.py +16 -11
- openmetadata_managed_apis/workflows/ingestion/common.py +46 -15
- openmetadata_managed_apis/workflows/ingestion/dbt.py +4 -3
- openmetadata_managed_apis/workflows/ingestion/elasticsearch_sink.py +3 -3
- openmetadata_managed_apis/workflows/ingestion/es_reindex.py +4 -3
- openmetadata_managed_apis/workflows/ingestion/lineage.py +4 -3
- openmetadata_managed_apis/workflows/ingestion/metadata.py +4 -3
- openmetadata_managed_apis/workflows/ingestion/profiler.py +16 -11
- openmetadata_managed_apis/workflows/ingestion/registry.py +3 -3
- openmetadata_managed_apis/workflows/ingestion/test_suite.py +16 -11
- openmetadata_managed_apis/workflows/ingestion/usage.py +12 -10
- openmetadata_managed_apis/workflows/workflow_builder.py +3 -3
- openmetadata_managed_apis/workflows/workflow_factory.py +3 -3
- {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/METADATA +10 -6
- openmetadata_managed_apis-1.10.12.0.dist-info/RECORD +62 -0
- openmetadata_managed_apis-1.6.10.0.dist-info/RECORD +0 -62
- {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/LICENSE +0 -0
- {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/WHEEL +0 -0
- {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/entry_points.txt +0 -0
- {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -11,36 +11,67 @@
|
|
|
11
11
|
"""
|
|
12
12
|
Module containing the logic to retrieve all logs from the tasks of a last DAG run
|
|
13
13
|
"""
|
|
14
|
-
|
|
14
|
+
import os
|
|
15
|
+
from functools import lru_cache, partial
|
|
15
16
|
from io import StringIO
|
|
16
|
-
from typing import List, Optional
|
|
17
|
+
from typing import List, Optional, Tuple
|
|
17
18
|
|
|
18
19
|
from airflow.models import DagModel, TaskInstance
|
|
19
20
|
from airflow.utils.log.log_reader import TaskLogReader
|
|
20
21
|
from flask import Response
|
|
21
22
|
from openmetadata_managed_apis.api.response import ApiResponse
|
|
23
|
+
from openmetadata_managed_apis.utils.logger import operations_logger
|
|
24
|
+
|
|
25
|
+
logger = operations_logger()
|
|
22
26
|
|
|
23
27
|
LOG_METADATA = {
|
|
24
28
|
"download_logs": False,
|
|
25
29
|
}
|
|
26
|
-
# Make chunks of 2M characters
|
|
27
30
|
CHUNK_SIZE = 2_000_000
|
|
31
|
+
DOT_STR = "_DOT_"
|
|
28
32
|
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
@lru_cache(maxsize=10)
|
|
35
|
+
def get_log_file_info(log_file_path: str, mtime: int) -> Tuple[int, int]:
|
|
36
|
+
"""
|
|
37
|
+
Get total size and number of chunks for a log file.
|
|
38
|
+
:param log_file_path: Path to log file
|
|
39
|
+
:param mtime: File modification time in seconds (used as cache key)
|
|
40
|
+
:return: Tuple of (file_size_bytes, total_chunks)
|
|
41
|
+
"""
|
|
42
|
+
file_size = os.path.getsize(log_file_path)
|
|
43
|
+
total_chunks = (file_size + CHUNK_SIZE - 1) // CHUNK_SIZE
|
|
44
|
+
return file_size, total_chunks
|
|
34
45
|
|
|
35
|
-
Args:
|
|
36
|
-
dag_id (str): DAG to look for
|
|
37
|
-
task_id (str): Task to fetch logs from
|
|
38
|
-
after (int): log stream cursor
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
def read_log_chunk_from_file(file_path: str, chunk_index: int) -> Optional[str]:
|
|
48
|
+
"""
|
|
49
|
+
Read a specific chunk from a log file without loading entire file.
|
|
50
|
+
:param file_path: Path to the log file
|
|
51
|
+
:param chunk_index: 0-based chunk index to read
|
|
52
|
+
:return: Log chunk content or None if error
|
|
42
53
|
"""
|
|
54
|
+
try:
|
|
55
|
+
offset = chunk_index * CHUNK_SIZE
|
|
56
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
57
|
+
f.seek(offset)
|
|
58
|
+
chunk = f.read(CHUNK_SIZE)
|
|
59
|
+
return chunk
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
logger.warning(f"Failed to read log chunk from {file_path}: {exc}")
|
|
62
|
+
return None
|
|
63
|
+
|
|
43
64
|
|
|
65
|
+
def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Response:
|
|
66
|
+
"""
|
|
67
|
+
Validate that the DAG is registered by Airflow and have at least one Run.
|
|
68
|
+
If exists, returns all logs for each task instance of the last DAG run.
|
|
69
|
+
Uses file streaming to avoid loading entire log file into memory.
|
|
70
|
+
:param dag_id: DAG to look for
|
|
71
|
+
:param task_id: Task to fetch logs from
|
|
72
|
+
:param after: log stream cursor
|
|
73
|
+
:return: Response with log and pagination
|
|
74
|
+
"""
|
|
44
75
|
dag_model = DagModel.get_dagmodel(dag_id=dag_id)
|
|
45
76
|
|
|
46
77
|
if not dag_model:
|
|
@@ -58,32 +89,116 @@ def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Res
|
|
|
58
89
|
f"Cannot find any task instance for the last DagRun of {dag_id}."
|
|
59
90
|
)
|
|
60
91
|
|
|
61
|
-
|
|
62
|
-
|
|
92
|
+
target_task_instance = None
|
|
63
93
|
for task_instance in task_instances:
|
|
64
|
-
# Only fetch the required logs
|
|
65
94
|
if task_instance.task_id == task_id:
|
|
66
|
-
|
|
67
|
-
|
|
95
|
+
target_task_instance = task_instance
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
if not target_task_instance:
|
|
99
|
+
return ApiResponse.bad_request(f"Task {task_id} not found in DAG {dag_id}.")
|
|
100
|
+
|
|
101
|
+
try_number = target_task_instance._try_number # pylint: disable=protected-access
|
|
102
|
+
|
|
103
|
+
task_log_reader = TaskLogReader()
|
|
104
|
+
if not task_log_reader.supports_read:
|
|
105
|
+
return ApiResponse.server_error("Task Log Reader does not support read logs.")
|
|
106
|
+
|
|
107
|
+
# Try to use file streaming for better performance
|
|
108
|
+
try:
|
|
109
|
+
|
|
110
|
+
from airflow.configuration import ( # pylint: disable=import-outside-toplevel
|
|
111
|
+
conf,
|
|
112
|
+
)
|
|
68
113
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
114
|
+
base_log_folder = conf.get("logging", "base_log_folder")
|
|
115
|
+
# dag_id and task_id are already sanitized at route level
|
|
116
|
+
# Only dots are replaced for Airflow log path compatibility
|
|
117
|
+
dag_id_safe = dag_id.replace(".", DOT_STR)
|
|
118
|
+
task_id_safe = task_id.replace(".", DOT_STR)
|
|
119
|
+
|
|
120
|
+
log_relative_path = f"dag_id={dag_id_safe}/run_id={last_dag_run.run_id}/task_id={task_id_safe}/attempt={try_number}.log"
|
|
121
|
+
log_file_path = os.path.join(base_log_folder, log_relative_path)
|
|
122
|
+
|
|
123
|
+
# Security: Validate the resolved path stays within base_log_folder
|
|
124
|
+
# to prevent directory traversal attacks. This provides defense-in-depth
|
|
125
|
+
# even though dag_id and task_id are already sanitized at the route level.
|
|
126
|
+
log_file_path_real = os.path.realpath(log_file_path)
|
|
127
|
+
base_log_folder_real = os.path.realpath(base_log_folder)
|
|
128
|
+
|
|
129
|
+
if not log_file_path_real.startswith(base_log_folder_real + os.sep):
|
|
130
|
+
logger.warning(
|
|
131
|
+
f"Path traversal attempt detected: {log_file_path} is outside {base_log_folder}"
|
|
132
|
+
)
|
|
133
|
+
return ApiResponse.bad_request(
|
|
134
|
+
f"Invalid log path for DAG {dag_id} and Task {task_id}."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if os.path.exists(log_file_path_real):
|
|
138
|
+
stat_info = os.stat(log_file_path_real)
|
|
139
|
+
file_mtime = int(stat_info.st_mtime)
|
|
140
|
+
|
|
141
|
+
_, total_chunks = get_log_file_info(log_file_path_real, file_mtime)
|
|
142
|
+
|
|
143
|
+
after_idx = int(after) if after is not None else 0
|
|
144
|
+
|
|
145
|
+
if after_idx >= total_chunks:
|
|
146
|
+
return ApiResponse.bad_request(
|
|
147
|
+
f"After index {after} is out of bounds. Total pagination is {total_chunks} for DAG {dag_id} and Task {task_id}."
|
|
73
148
|
)
|
|
74
149
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
150
|
+
chunk_content = read_log_chunk_from_file(log_file_path_real, after_idx)
|
|
151
|
+
|
|
152
|
+
if chunk_content is not None:
|
|
153
|
+
return ApiResponse.success(
|
|
154
|
+
{
|
|
155
|
+
task_id: chunk_content,
|
|
156
|
+
"total": total_chunks,
|
|
157
|
+
**(
|
|
158
|
+
{"after": after_idx + 1}
|
|
159
|
+
if after_idx < total_chunks - 1
|
|
160
|
+
else {}
|
|
161
|
+
),
|
|
162
|
+
}
|
|
85
163
|
)
|
|
164
|
+
except Exception as exc:
|
|
165
|
+
logger.debug(
|
|
166
|
+
f"File streaming failed for DAG {dag_id}, falling back to TaskLogReader: {exc}"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Fallback to TaskLogReader if streaming fails
|
|
170
|
+
return _last_dag_logs_fallback(
|
|
171
|
+
dag_id, task_id, after, target_task_instance, task_log_reader, try_number
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _last_dag_logs_fallback(
|
|
176
|
+
dag_id: str,
|
|
177
|
+
task_id: str,
|
|
178
|
+
after: Optional[int],
|
|
179
|
+
task_instance: TaskInstance,
|
|
180
|
+
task_log_reader: TaskLogReader,
|
|
181
|
+
try_number: int,
|
|
182
|
+
) -> Response:
|
|
183
|
+
"""
|
|
184
|
+
Fallback to reading entire log file into memory (old behavior).
|
|
185
|
+
:param dag_id: DAG to look for
|
|
186
|
+
:param task_id: Task to fetch logs from
|
|
187
|
+
:param after: log stream cursor
|
|
188
|
+
:param task_instance: Task instance to fetch logs from
|
|
189
|
+
:param task_log_reader: TaskLogReader instance
|
|
190
|
+
:param try_number: Task attempt number
|
|
191
|
+
:return: API Response
|
|
192
|
+
"""
|
|
193
|
+
raw_logs_str = "".join(
|
|
194
|
+
list(
|
|
195
|
+
task_log_reader.read_log_stream(
|
|
196
|
+
ti=task_instance,
|
|
197
|
+
try_number=try_number,
|
|
198
|
+
metadata=LOG_METADATA,
|
|
86
199
|
)
|
|
200
|
+
)
|
|
201
|
+
)
|
|
87
202
|
|
|
88
203
|
if not raw_logs_str:
|
|
89
204
|
return ApiResponse.bad_request(
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -17,17 +17,20 @@ try:
|
|
|
17
17
|
from airflow.api.common.trigger_dag import trigger_dag
|
|
18
18
|
except ImportError:
|
|
19
19
|
from airflow.api.common.experimental.trigger_dag import trigger_dag
|
|
20
|
+
|
|
20
21
|
from airflow.utils import timezone
|
|
21
22
|
from flask import Response
|
|
22
23
|
from openmetadata_managed_apis.api.response import ApiResponse
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
def trigger(
|
|
26
|
+
def trigger(
|
|
27
|
+
dag_id: str, run_id: Optional[str], conf: Optional[dict] = None
|
|
28
|
+
) -> Response:
|
|
26
29
|
dag_run = trigger_dag(
|
|
27
30
|
dag_id=dag_id,
|
|
28
31
|
run_id=run_id,
|
|
29
|
-
conf=None,
|
|
30
32
|
execution_date=timezone.utcnow(),
|
|
33
|
+
conf=conf,
|
|
31
34
|
)
|
|
32
35
|
return ApiResponse.success(
|
|
33
36
|
{"message": f"Workflow [{dag_id}] has been triggered {dag_run}"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -18,6 +18,7 @@ from openmetadata_managed_apis.utils.logger import set_operator_logger
|
|
|
18
18
|
from openmetadata_managed_apis.workflows.ingestion.common import (
|
|
19
19
|
build_dag,
|
|
20
20
|
build_workflow_config_property,
|
|
21
|
+
execute_workflow,
|
|
21
22
|
)
|
|
22
23
|
|
|
23
24
|
from metadata.generated.schema.entity.applications.configuration.applicationConfig import (
|
|
@@ -36,7 +37,7 @@ from metadata.generated.schema.metadataIngestion.applicationPipeline import (
|
|
|
36
37
|
from metadata.workflow.application import ApplicationWorkflow
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
def application_workflow(workflow_config: OpenMetadataApplicationConfig):
|
|
40
|
+
def application_workflow(workflow_config: OpenMetadataApplicationConfig, **context):
|
|
40
41
|
"""
|
|
41
42
|
Task that creates and runs the ingestion workflow.
|
|
42
43
|
|
|
@@ -48,13 +49,17 @@ def application_workflow(workflow_config: OpenMetadataApplicationConfig):
|
|
|
48
49
|
|
|
49
50
|
set_operator_logger(workflow_config)
|
|
50
51
|
|
|
51
|
-
|
|
52
|
+
# set overridden app config
|
|
53
|
+
config = json.loads(
|
|
54
|
+
workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
|
|
55
|
+
)
|
|
56
|
+
params = context.get("params") or {}
|
|
57
|
+
config["appConfig"] = {
|
|
58
|
+
**(config.get("appConfig") or {}),
|
|
59
|
+
**(params.get("appConfigOverride") or {}),
|
|
60
|
+
}
|
|
52
61
|
workflow = ApplicationWorkflow.create(config)
|
|
53
|
-
|
|
54
|
-
workflow.execute()
|
|
55
|
-
workflow.raise_from_status()
|
|
56
|
-
workflow.print_status()
|
|
57
|
-
workflow.stop()
|
|
62
|
+
execute_workflow(workflow, workflow_config)
|
|
58
63
|
|
|
59
64
|
|
|
60
65
|
def build_application_workflow_config(
|
|
@@ -84,6 +89,7 @@ def build_application_workflow_config(
|
|
|
84
89
|
else None,
|
|
85
90
|
workflowConfig=build_workflow_config_property(ingestion_pipeline),
|
|
86
91
|
ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
|
|
92
|
+
enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
|
|
87
93
|
)
|
|
88
94
|
|
|
89
95
|
return application_workflow_config
|
|
@@ -99,6 +105,9 @@ def build_application_dag(ingestion_pipeline: IngestionPipeline) -> DAG:
|
|
|
99
105
|
ingestion_pipeline=ingestion_pipeline,
|
|
100
106
|
workflow_config=application_workflow_config,
|
|
101
107
|
workflow_fn=application_workflow,
|
|
108
|
+
params={
|
|
109
|
+
"appConfigOverride": None # Default to None, will be overridden by trigger conf
|
|
110
|
+
},
|
|
102
111
|
)
|
|
103
112
|
|
|
104
113
|
return dag
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -15,7 +15,11 @@ import json
|
|
|
15
15
|
|
|
16
16
|
from airflow import DAG
|
|
17
17
|
from openmetadata_managed_apis.utils.logger import set_operator_logger
|
|
18
|
-
from openmetadata_managed_apis.workflows.ingestion.common import
|
|
18
|
+
from openmetadata_managed_apis.workflows.ingestion.common import (
|
|
19
|
+
build_dag,
|
|
20
|
+
build_source,
|
|
21
|
+
execute_workflow,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
|
|
21
25
|
IngestionPipeline,
|
|
@@ -30,7 +34,9 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|
|
30
34
|
from metadata.workflow.classification import AutoClassificationWorkflow
|
|
31
35
|
|
|
32
36
|
|
|
33
|
-
def auto_classification_workflow(
|
|
37
|
+
def auto_classification_workflow(
|
|
38
|
+
workflow_config: OpenMetadataWorkflowConfig,
|
|
39
|
+
):
|
|
34
40
|
"""
|
|
35
41
|
Task that creates and runs the auto classification workflow.
|
|
36
42
|
|
|
@@ -42,13 +48,11 @@ def auto_classification_workflow(workflow_config: OpenMetadataWorkflowConfig):
|
|
|
42
48
|
|
|
43
49
|
set_operator_logger(workflow_config)
|
|
44
50
|
|
|
45
|
-
config = json.loads(
|
|
51
|
+
config = json.loads(
|
|
52
|
+
workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
|
|
53
|
+
)
|
|
46
54
|
workflow = AutoClassificationWorkflow.create(config)
|
|
47
|
-
|
|
48
|
-
workflow.execute()
|
|
49
|
-
workflow.raise_from_status()
|
|
50
|
-
workflow.print_status()
|
|
51
|
-
workflow.stop()
|
|
55
|
+
execute_workflow(workflow, workflow_config)
|
|
52
56
|
|
|
53
57
|
|
|
54
58
|
def build_auto_classification_workflow_config(
|
|
@@ -72,6 +76,7 @@ def build_auto_classification_workflow_config(
|
|
|
72
76
|
openMetadataServerConfig=ingestion_pipeline.openMetadataServerConnection,
|
|
73
77
|
),
|
|
74
78
|
ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
|
|
79
|
+
enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
|
|
75
80
|
)
|
|
76
81
|
|
|
77
82
|
return workflow_config
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -37,6 +37,7 @@ from metadata.generated.schema.metadataIngestion.application import (
|
|
|
37
37
|
from metadata.generated.schema.type.basic import Timestamp, Uuid
|
|
38
38
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
39
39
|
from metadata.utils import fqn
|
|
40
|
+
from metadata.workflow.base import BaseWorkflow
|
|
40
41
|
|
|
41
42
|
# pylint: disable=ungrouped-imports
|
|
42
43
|
try:
|
|
@@ -44,6 +45,7 @@ try:
|
|
|
44
45
|
except ModuleNotFoundError:
|
|
45
46
|
from airflow.operators.python_operator import PythonOperator
|
|
46
47
|
|
|
48
|
+
from croniter import croniter
|
|
47
49
|
from openmetadata_managed_apis.utils.logger import set_operator_logger, workflow_logger
|
|
48
50
|
from openmetadata_managed_apis.utils.parser import (
|
|
49
51
|
parse_service_connection,
|
|
@@ -191,6 +193,18 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
|
|
|
191
193
|
)
|
|
192
194
|
|
|
193
195
|
|
|
196
|
+
def execute_workflow(
|
|
197
|
+
workflow: BaseWorkflow, workflow_config: OpenMetadataWorkflowConfig
|
|
198
|
+
) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Execute the workflow and handle the status
|
|
201
|
+
"""
|
|
202
|
+
workflow.execute()
|
|
203
|
+
workflow.stop()
|
|
204
|
+
if workflow_config.workflowConfig.raiseOnError:
|
|
205
|
+
workflow.raise_from_status()
|
|
206
|
+
|
|
207
|
+
|
|
194
208
|
def metadata_ingestion_workflow(workflow_config: OpenMetadataWorkflowConfig):
|
|
195
209
|
"""
|
|
196
210
|
Task that creates and runs the ingestion workflow.
|
|
@@ -203,13 +217,11 @@ def metadata_ingestion_workflow(workflow_config: OpenMetadataWorkflowConfig):
|
|
|
203
217
|
|
|
204
218
|
set_operator_logger(workflow_config)
|
|
205
219
|
|
|
206
|
-
config = json.loads(
|
|
220
|
+
config = json.loads(
|
|
221
|
+
workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
|
|
222
|
+
)
|
|
207
223
|
workflow = MetadataWorkflow.create(config)
|
|
208
|
-
|
|
209
|
-
workflow.execute()
|
|
210
|
-
workflow.raise_from_status()
|
|
211
|
-
workflow.print_status()
|
|
212
|
-
workflow.stop()
|
|
224
|
+
execute_workflow(workflow, workflow_config)
|
|
213
225
|
|
|
214
226
|
|
|
215
227
|
def build_workflow_config_property(
|
|
@@ -222,6 +234,7 @@ def build_workflow_config_property(
|
|
|
222
234
|
"""
|
|
223
235
|
return WorkflowConfig(
|
|
224
236
|
loggerLevel=ingestion_pipeline.loggerLevel or LogLevels.INFO,
|
|
237
|
+
raiseOnError=ingestion_pipeline.raiseOnError,
|
|
225
238
|
openMetadataServerConfig=ingestion_pipeline.openMetadataServerConnection,
|
|
226
239
|
)
|
|
227
240
|
|
|
@@ -247,11 +260,19 @@ def build_dag_configs(ingestion_pipeline: IngestionPipeline) -> dict:
|
|
|
247
260
|
:param ingestion_pipeline: pipeline configs
|
|
248
261
|
:return: dict to use as kwargs
|
|
249
262
|
"""
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
263
|
+
# Determine start_date based on schedule_interval using croniter
|
|
264
|
+
schedule_interval = ingestion_pipeline.airflowConfig.scheduleInterval
|
|
265
|
+
now = datetime.now()
|
|
266
|
+
|
|
267
|
+
if schedule_interval is None:
|
|
268
|
+
# On-demand DAG, set start_date to now
|
|
269
|
+
start_date = now
|
|
270
|
+
elif croniter.is_valid(schedule_interval):
|
|
271
|
+
cron = croniter(schedule_interval, now)
|
|
272
|
+
start_date = cron.get_prev(datetime)
|
|
253
273
|
else:
|
|
254
|
-
|
|
274
|
+
# Handle invalid cron expressions if necessary
|
|
275
|
+
start_date = now
|
|
255
276
|
|
|
256
277
|
return {
|
|
257
278
|
"dag_id": clean_dag_id(ingestion_pipeline.name.root),
|
|
@@ -272,7 +293,7 @@ def build_dag_configs(ingestion_pipeline: IngestionPipeline) -> dict:
|
|
|
272
293
|
"is_paused_upon_creation": ingestion_pipeline.airflowConfig.pausePipeline
|
|
273
294
|
or False,
|
|
274
295
|
"catchup": ingestion_pipeline.airflowConfig.pipelineCatchup or False,
|
|
275
|
-
"schedule_interval":
|
|
296
|
+
"schedule_interval": schedule_interval,
|
|
276
297
|
"tags": [
|
|
277
298
|
"OpenMetadata",
|
|
278
299
|
clean_name_tag(ingestion_pipeline.displayName)
|
|
@@ -347,9 +368,16 @@ def build_dag(
|
|
|
347
368
|
ingestion_pipeline: IngestionPipeline,
|
|
348
369
|
workflow_config: Union[OpenMetadataWorkflowConfig, OpenMetadataApplicationConfig],
|
|
349
370
|
workflow_fn: Callable,
|
|
371
|
+
params: Optional[dict] = None,
|
|
350
372
|
) -> DAG:
|
|
351
373
|
"""
|
|
352
374
|
Build a simple metadata workflow DAG
|
|
375
|
+
:param task_name: Name of the task
|
|
376
|
+
:param ingestion_pipeline: Pipeline configs
|
|
377
|
+
:param workflow_config: Workflow configurations
|
|
378
|
+
:param workflow_fn: Function to be executed
|
|
379
|
+
:param params: Optional parameters to pass to the operator
|
|
380
|
+
:return: DAG
|
|
353
381
|
"""
|
|
354
382
|
|
|
355
383
|
with DAG(**build_dag_configs(ingestion_pipeline)) as dag:
|
|
@@ -360,7 +388,9 @@ def build_dag(
|
|
|
360
388
|
CustomPythonOperator(
|
|
361
389
|
task_id=task_name,
|
|
362
390
|
python_callable=workflow_fn,
|
|
363
|
-
op_kwargs={
|
|
391
|
+
op_kwargs={
|
|
392
|
+
"workflow_config": workflow_config,
|
|
393
|
+
},
|
|
364
394
|
# There's no need to retry if we have had an error. Wait until the next schedule or manual rerun.
|
|
365
395
|
retries=ingestion_pipeline.airflowConfig.retries or 0,
|
|
366
396
|
# each DAG will call its own OpenMetadataWorkflowConfig
|
|
@@ -369,6 +399,7 @@ def build_dag(
|
|
|
369
399
|
owner=ingestion_pipeline.owners.root[0].name
|
|
370
400
|
if (ingestion_pipeline.owners and ingestion_pipeline.owners.root)
|
|
371
401
|
else "openmetadata",
|
|
402
|
+
params=params,
|
|
372
403
|
)
|
|
373
404
|
|
|
374
405
|
return dag
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Copyright 2022 Collate
|
|
2
|
-
# Licensed under the
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -42,7 +42,7 @@ def build_dbt_workflow_config(
|
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
44
|
source = build_source(ingestion_pipeline)
|
|
45
|
-
source.type =
|
|
45
|
+
source.type = "dbt" # Mark the source as dbt
|
|
46
46
|
|
|
47
47
|
workflow_config = OpenMetadataWorkflowConfig(
|
|
48
48
|
source=source,
|
|
@@ -52,6 +52,7 @@ def build_dbt_workflow_config(
|
|
|
52
52
|
),
|
|
53
53
|
workflowConfig=build_workflow_config_property(ingestion_pipeline),
|
|
54
54
|
ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
|
|
55
|
+
enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
|
|
55
56
|
)
|
|
56
57
|
|
|
57
58
|
return workflow_config
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# Copyright
|
|
2
|
-
# Licensed under the
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
3
|
# you may not use this file except in compliance with the License.
|
|
4
4
|
# You may obtain a copy of the License at
|
|
5
|
-
#
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
6
|
# Unless required by applicable law or agreed to in writing, software
|
|
7
7
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
8
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|