openmetadata-managed-apis 1.6.10.0__py3-none-any.whl → 1.10.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. openmetadata_managed_apis/__init__.py +3 -3
  2. openmetadata_managed_apis/api/apis_metadata.py +3 -3
  3. openmetadata_managed_apis/api/app.py +3 -3
  4. openmetadata_managed_apis/api/config.py +3 -3
  5. openmetadata_managed_apis/api/error_handlers.py +3 -3
  6. openmetadata_managed_apis/api/response.py +3 -3
  7. openmetadata_managed_apis/api/routes/delete.py +3 -3
  8. openmetadata_managed_apis/api/routes/deploy.py +3 -3
  9. openmetadata_managed_apis/api/routes/disable.py +3 -3
  10. openmetadata_managed_apis/api/routes/enable.py +3 -3
  11. openmetadata_managed_apis/api/routes/health.py +3 -3
  12. openmetadata_managed_apis/api/routes/health_auth.py +3 -3
  13. openmetadata_managed_apis/api/routes/ip.py +3 -3
  14. openmetadata_managed_apis/api/routes/kill.py +3 -3
  15. openmetadata_managed_apis/api/routes/last_dag_logs.py +17 -5
  16. openmetadata_managed_apis/api/routes/run_automation.py +4 -4
  17. openmetadata_managed_apis/api/routes/status.py +3 -3
  18. openmetadata_managed_apis/api/routes/trigger.py +11 -6
  19. openmetadata_managed_apis/api/utils.py +23 -3
  20. openmetadata_managed_apis/operations/delete.py +3 -3
  21. openmetadata_managed_apis/operations/deploy.py +5 -3
  22. openmetadata_managed_apis/operations/health.py +3 -3
  23. openmetadata_managed_apis/operations/kill_all.py +3 -3
  24. openmetadata_managed_apis/operations/last_dag_logs.py +150 -35
  25. openmetadata_managed_apis/operations/state.py +3 -3
  26. openmetadata_managed_apis/operations/status.py +3 -3
  27. openmetadata_managed_apis/operations/trigger.py +8 -5
  28. openmetadata_managed_apis/plugin.py +3 -3
  29. openmetadata_managed_apis/utils/parser.py +3 -3
  30. openmetadata_managed_apis/views/rest_api.py +3 -3
  31. openmetadata_managed_apis/workflows/config.py +3 -3
  32. openmetadata_managed_apis/workflows/ingestion/application.py +19 -10
  33. openmetadata_managed_apis/workflows/ingestion/auto_classification.py +16 -11
  34. openmetadata_managed_apis/workflows/ingestion/common.py +46 -15
  35. openmetadata_managed_apis/workflows/ingestion/dbt.py +4 -3
  36. openmetadata_managed_apis/workflows/ingestion/elasticsearch_sink.py +3 -3
  37. openmetadata_managed_apis/workflows/ingestion/es_reindex.py +4 -3
  38. openmetadata_managed_apis/workflows/ingestion/lineage.py +4 -3
  39. openmetadata_managed_apis/workflows/ingestion/metadata.py +4 -3
  40. openmetadata_managed_apis/workflows/ingestion/profiler.py +16 -11
  41. openmetadata_managed_apis/workflows/ingestion/registry.py +3 -3
  42. openmetadata_managed_apis/workflows/ingestion/test_suite.py +16 -11
  43. openmetadata_managed_apis/workflows/ingestion/usage.py +12 -10
  44. openmetadata_managed_apis/workflows/workflow_builder.py +3 -3
  45. openmetadata_managed_apis/workflows/workflow_factory.py +3 -3
  46. {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/METADATA +10 -6
  47. openmetadata_managed_apis-1.10.12.0.dist-info/RECORD +62 -0
  48. openmetadata_managed_apis-1.6.10.0.dist-info/RECORD +0 -62
  49. {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/LICENSE +0 -0
  50. {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/WHEEL +0 -0
  51. {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/entry_points.txt +0 -0
  52. {openmetadata_managed_apis-1.6.10.0.dist-info → openmetadata_managed_apis-1.10.12.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
- # Copyright 2022 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -11,36 +11,67 @@
11
11
  """
12
12
  Module containing the logic to retrieve all logs from the tasks of a last DAG run
13
13
  """
14
- from functools import partial
14
+ import os
15
+ from functools import lru_cache, partial
15
16
  from io import StringIO
16
- from typing import List, Optional
17
+ from typing import List, Optional, Tuple
17
18
 
18
19
  from airflow.models import DagModel, TaskInstance
19
20
  from airflow.utils.log.log_reader import TaskLogReader
20
21
  from flask import Response
21
22
  from openmetadata_managed_apis.api.response import ApiResponse
23
+ from openmetadata_managed_apis.utils.logger import operations_logger
24
+
25
+ logger = operations_logger()
22
26
 
23
27
  LOG_METADATA = {
24
28
  "download_logs": False,
25
29
  }
26
- # Make chunks of 2M characters
27
30
  CHUNK_SIZE = 2_000_000
31
+ DOT_STR = "_DOT_"
28
32
 
29
33
 
30
- def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Response:
31
- """Validate that the DAG is registered by Airflow and have at least one Run.
32
-
33
- If exists, returns all logs for each task instance of the last DAG run.
34
+ @lru_cache(maxsize=10)
35
+ def get_log_file_info(log_file_path: str, mtime: int) -> Tuple[int, int]:
36
+ """
37
+ Get total size and number of chunks for a log file.
38
+ :param log_file_path: Path to log file
39
+ :param mtime: File modification time in seconds (used as cache key)
40
+ :return: Tuple of (file_size_bytes, total_chunks)
41
+ """
42
+ file_size = os.path.getsize(log_file_path)
43
+ total_chunks = (file_size + CHUNK_SIZE - 1) // CHUNK_SIZE
44
+ return file_size, total_chunks
34
45
 
35
- Args:
36
- dag_id (str): DAG to look for
37
- task_id (str): Task to fetch logs from
38
- after (int): log stream cursor
39
46
 
40
- Return:
41
- Response with log and pagination
47
+ def read_log_chunk_from_file(file_path: str, chunk_index: int) -> Optional[str]:
48
+ """
49
+ Read a specific chunk from a log file without loading entire file.
50
+ :param file_path: Path to the log file
51
+ :param chunk_index: 0-based chunk index to read
52
+ :return: Log chunk content or None if error
42
53
  """
54
+ try:
55
+ offset = chunk_index * CHUNK_SIZE
56
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
57
+ f.seek(offset)
58
+ chunk = f.read(CHUNK_SIZE)
59
+ return chunk
60
+ except Exception as exc:
61
+ logger.warning(f"Failed to read log chunk from {file_path}: {exc}")
62
+ return None
63
+
43
64
 
65
+ def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Response:
66
+ """
67
+ Validate that the DAG is registered by Airflow and have at least one Run.
68
+ If exists, returns all logs for each task instance of the last DAG run.
69
+ Uses file streaming to avoid loading entire log file into memory.
70
+ :param dag_id: DAG to look for
71
+ :param task_id: Task to fetch logs from
72
+ :param after: log stream cursor
73
+ :return: Response with log and pagination
74
+ """
44
75
  dag_model = DagModel.get_dagmodel(dag_id=dag_id)
45
76
 
46
77
  if not dag_model:
@@ -58,32 +89,116 @@ def last_dag_logs(dag_id: str, task_id: str, after: Optional[int] = None) -> Res
58
89
  f"Cannot find any task instance for the last DagRun of {dag_id}."
59
90
  )
60
91
 
61
- raw_logs_str = None
62
-
92
+ target_task_instance = None
63
93
  for task_instance in task_instances:
64
- # Only fetch the required logs
65
94
  if task_instance.task_id == task_id:
66
- # Pick up the _try_number, otherwise they are adding 1
67
- try_number = task_instance._try_number # pylint: disable=protected-access
95
+ target_task_instance = task_instance
96
+ break
97
+
98
+ if not target_task_instance:
99
+ return ApiResponse.bad_request(f"Task {task_id} not found in DAG {dag_id}.")
100
+
101
+ try_number = target_task_instance._try_number # pylint: disable=protected-access
102
+
103
+ task_log_reader = TaskLogReader()
104
+ if not task_log_reader.supports_read:
105
+ return ApiResponse.server_error("Task Log Reader does not support read logs.")
106
+
107
+ # Try to use file streaming for better performance
108
+ try:
109
+
110
+ from airflow.configuration import ( # pylint: disable=import-outside-toplevel
111
+ conf,
112
+ )
68
113
 
69
- task_log_reader = TaskLogReader()
70
- if not task_log_reader.supports_read:
71
- return ApiResponse.server_error(
72
- "Task Log Reader does not support read logs."
114
+ base_log_folder = conf.get("logging", "base_log_folder")
115
+ # dag_id and task_id are already sanitized at route level
116
+ # Only dots are replaced for Airflow log path compatibility
117
+ dag_id_safe = dag_id.replace(".", DOT_STR)
118
+ task_id_safe = task_id.replace(".", DOT_STR)
119
+
120
+ log_relative_path = f"dag_id={dag_id_safe}/run_id={last_dag_run.run_id}/task_id={task_id_safe}/attempt={try_number}.log"
121
+ log_file_path = os.path.join(base_log_folder, log_relative_path)
122
+
123
+ # Security: Validate the resolved path stays within base_log_folder
124
+ # to prevent directory traversal attacks. This provides defense-in-depth
125
+ # even though dag_id and task_id are already sanitized at the route level.
126
+ log_file_path_real = os.path.realpath(log_file_path)
127
+ base_log_folder_real = os.path.realpath(base_log_folder)
128
+
129
+ if not log_file_path_real.startswith(base_log_folder_real + os.sep):
130
+ logger.warning(
131
+ f"Path traversal attempt detected: {log_file_path} is outside {base_log_folder}"
132
+ )
133
+ return ApiResponse.bad_request(
134
+ f"Invalid log path for DAG {dag_id} and Task {task_id}."
135
+ )
136
+
137
+ if os.path.exists(log_file_path_real):
138
+ stat_info = os.stat(log_file_path_real)
139
+ file_mtime = int(stat_info.st_mtime)
140
+
141
+ _, total_chunks = get_log_file_info(log_file_path_real, file_mtime)
142
+
143
+ after_idx = int(after) if after is not None else 0
144
+
145
+ if after_idx >= total_chunks:
146
+ return ApiResponse.bad_request(
147
+ f"After index {after} is out of bounds. Total pagination is {total_chunks} for DAG {dag_id} and Task {task_id}."
73
148
  )
74
149
 
75
- # Even when generating a ton of logs, we just get a single element.
76
- # Same happens when trying to call task_log_reader.read_log_chunks
77
- # We'll create our own chunk size and paginate based on that
78
- raw_logs_str = "".join(
79
- list(
80
- task_log_reader.read_log_stream(
81
- ti=task_instance,
82
- try_number=try_number,
83
- metadata=LOG_METADATA,
84
- )
150
+ chunk_content = read_log_chunk_from_file(log_file_path_real, after_idx)
151
+
152
+ if chunk_content is not None:
153
+ return ApiResponse.success(
154
+ {
155
+ task_id: chunk_content,
156
+ "total": total_chunks,
157
+ **(
158
+ {"after": after_idx + 1}
159
+ if after_idx < total_chunks - 1
160
+ else {}
161
+ ),
162
+ }
85
163
  )
164
+ except Exception as exc:
165
+ logger.debug(
166
+ f"File streaming failed for DAG {dag_id}, falling back to TaskLogReader: {exc}"
167
+ )
168
+
169
+ # Fallback to TaskLogReader if streaming fails
170
+ return _last_dag_logs_fallback(
171
+ dag_id, task_id, after, target_task_instance, task_log_reader, try_number
172
+ )
173
+
174
+
175
+ def _last_dag_logs_fallback(
176
+ dag_id: str,
177
+ task_id: str,
178
+ after: Optional[int],
179
+ task_instance: TaskInstance,
180
+ task_log_reader: TaskLogReader,
181
+ try_number: int,
182
+ ) -> Response:
183
+ """
184
+ Fallback to reading entire log file into memory (old behavior).
185
+ :param dag_id: DAG to look for
186
+ :param task_id: Task to fetch logs from
187
+ :param after: log stream cursor
188
+ :param task_instance: Task instance to fetch logs from
189
+ :param task_log_reader: TaskLogReader instance
190
+ :param try_number: Task attempt number
191
+ :return: API Response
192
+ """
193
+ raw_logs_str = "".join(
194
+ list(
195
+ task_log_reader.read_log_stream(
196
+ ti=task_instance,
197
+ try_number=try_number,
198
+ metadata=LOG_METADATA,
86
199
  )
200
+ )
201
+ )
87
202
 
88
203
  if not raw_logs_str:
89
204
  return ApiResponse.bad_request(
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,17 +17,20 @@ try:
17
17
  from airflow.api.common.trigger_dag import trigger_dag
18
18
  except ImportError:
19
19
  from airflow.api.common.experimental.trigger_dag import trigger_dag
20
+
20
21
  from airflow.utils import timezone
21
22
  from flask import Response
22
23
  from openmetadata_managed_apis.api.response import ApiResponse
23
24
 
24
25
 
25
- def trigger(dag_id: str, run_id: Optional[str]) -> Response:
26
+ def trigger(
27
+ dag_id: str, run_id: Optional[str], conf: Optional[dict] = None
28
+ ) -> Response:
26
29
  dag_run = trigger_dag(
27
30
  dag_id=dag_id,
28
31
  run_id=run_id,
29
- conf=None,
30
32
  execution_date=timezone.utcnow(),
33
+ conf=conf,
31
34
  )
32
35
  return ApiResponse.success(
33
36
  {"message": f"Workflow [{dag_id}] has been triggered {dag_run}"}
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,6 +18,7 @@ from openmetadata_managed_apis.utils.logger import set_operator_logger
18
18
  from openmetadata_managed_apis.workflows.ingestion.common import (
19
19
  build_dag,
20
20
  build_workflow_config_property,
21
+ execute_workflow,
21
22
  )
22
23
 
23
24
  from metadata.generated.schema.entity.applications.configuration.applicationConfig import (
@@ -36,7 +37,7 @@ from metadata.generated.schema.metadataIngestion.applicationPipeline import (
36
37
  from metadata.workflow.application import ApplicationWorkflow
37
38
 
38
39
 
39
- def application_workflow(workflow_config: OpenMetadataApplicationConfig):
40
+ def application_workflow(workflow_config: OpenMetadataApplicationConfig, **context):
40
41
  """
41
42
  Task that creates and runs the ingestion workflow.
42
43
 
@@ -48,13 +49,17 @@ def application_workflow(workflow_config: OpenMetadataApplicationConfig):
48
49
 
49
50
  set_operator_logger(workflow_config)
50
51
 
51
- config = json.loads(workflow_config.model_dump_json(exclude_defaults=False))
52
+ # set overridden app config
53
+ config = json.loads(
54
+ workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
55
+ )
56
+ params = context.get("params") or {}
57
+ config["appConfig"] = {
58
+ **(config.get("appConfig") or {}),
59
+ **(params.get("appConfigOverride") or {}),
60
+ }
52
61
  workflow = ApplicationWorkflow.create(config)
53
-
54
- workflow.execute()
55
- workflow.raise_from_status()
56
- workflow.print_status()
57
- workflow.stop()
62
+ execute_workflow(workflow, workflow_config)
58
63
 
59
64
 
60
65
  def build_application_workflow_config(
@@ -84,6 +89,7 @@ def build_application_workflow_config(
84
89
  else None,
85
90
  workflowConfig=build_workflow_config_property(ingestion_pipeline),
86
91
  ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
92
+ enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
87
93
  )
88
94
 
89
95
  return application_workflow_config
@@ -99,6 +105,9 @@ def build_application_dag(ingestion_pipeline: IngestionPipeline) -> DAG:
99
105
  ingestion_pipeline=ingestion_pipeline,
100
106
  workflow_config=application_workflow_config,
101
107
  workflow_fn=application_workflow,
108
+ params={
109
+ "appConfigOverride": None # Default to None, will be overridden by trigger conf
110
+ },
102
111
  )
103
112
 
104
113
  return dag
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,7 +15,11 @@ import json
15
15
 
16
16
  from airflow import DAG
17
17
  from openmetadata_managed_apis.utils.logger import set_operator_logger
18
- from openmetadata_managed_apis.workflows.ingestion.common import build_dag, build_source
18
+ from openmetadata_managed_apis.workflows.ingestion.common import (
19
+ build_dag,
20
+ build_source,
21
+ execute_workflow,
22
+ )
19
23
 
20
24
  from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
21
25
  IngestionPipeline,
@@ -30,7 +34,9 @@ from metadata.generated.schema.metadataIngestion.workflow import (
30
34
  from metadata.workflow.classification import AutoClassificationWorkflow
31
35
 
32
36
 
33
- def auto_classification_workflow(workflow_config: OpenMetadataWorkflowConfig):
37
+ def auto_classification_workflow(
38
+ workflow_config: OpenMetadataWorkflowConfig,
39
+ ):
34
40
  """
35
41
  Task that creates and runs the auto classification workflow.
36
42
 
@@ -42,13 +48,11 @@ def auto_classification_workflow(workflow_config: OpenMetadataWorkflowConfig):
42
48
 
43
49
  set_operator_logger(workflow_config)
44
50
 
45
- config = json.loads(workflow_config.model_dump_json(exclude_defaults=False))
51
+ config = json.loads(
52
+ workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
53
+ )
46
54
  workflow = AutoClassificationWorkflow.create(config)
47
-
48
- workflow.execute()
49
- workflow.raise_from_status()
50
- workflow.print_status()
51
- workflow.stop()
55
+ execute_workflow(workflow, workflow_config)
52
56
 
53
57
 
54
58
  def build_auto_classification_workflow_config(
@@ -72,6 +76,7 @@ def build_auto_classification_workflow_config(
72
76
  openMetadataServerConfig=ingestion_pipeline.openMetadataServerConnection,
73
77
  ),
74
78
  ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
79
+ enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
75
80
  )
76
81
 
77
82
  return workflow_config
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -37,6 +37,7 @@ from metadata.generated.schema.metadataIngestion.application import (
37
37
  from metadata.generated.schema.type.basic import Timestamp, Uuid
38
38
  from metadata.ingestion.ometa.ometa_api import OpenMetadata
39
39
  from metadata.utils import fqn
40
+ from metadata.workflow.base import BaseWorkflow
40
41
 
41
42
  # pylint: disable=ungrouped-imports
42
43
  try:
@@ -44,6 +45,7 @@ try:
44
45
  except ModuleNotFoundError:
45
46
  from airflow.operators.python_operator import PythonOperator
46
47
 
48
+ from croniter import croniter
47
49
  from openmetadata_managed_apis.utils.logger import set_operator_logger, workflow_logger
48
50
  from openmetadata_managed_apis.utils.parser import (
49
51
  parse_service_connection,
@@ -191,6 +193,18 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
191
193
  )
192
194
 
193
195
 
196
+ def execute_workflow(
197
+ workflow: BaseWorkflow, workflow_config: OpenMetadataWorkflowConfig
198
+ ) -> None:
199
+ """
200
+ Execute the workflow and handle the status
201
+ """
202
+ workflow.execute()
203
+ workflow.stop()
204
+ if workflow_config.workflowConfig.raiseOnError:
205
+ workflow.raise_from_status()
206
+
207
+
194
208
  def metadata_ingestion_workflow(workflow_config: OpenMetadataWorkflowConfig):
195
209
  """
196
210
  Task that creates and runs the ingestion workflow.
@@ -203,13 +217,11 @@ def metadata_ingestion_workflow(workflow_config: OpenMetadataWorkflowConfig):
203
217
 
204
218
  set_operator_logger(workflow_config)
205
219
 
206
- config = json.loads(workflow_config.model_dump_json(exclude_defaults=False))
220
+ config = json.loads(
221
+ workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False)
222
+ )
207
223
  workflow = MetadataWorkflow.create(config)
208
-
209
- workflow.execute()
210
- workflow.raise_from_status()
211
- workflow.print_status()
212
- workflow.stop()
224
+ execute_workflow(workflow, workflow_config)
213
225
 
214
226
 
215
227
  def build_workflow_config_property(
@@ -222,6 +234,7 @@ def build_workflow_config_property(
222
234
  """
223
235
  return WorkflowConfig(
224
236
  loggerLevel=ingestion_pipeline.loggerLevel or LogLevels.INFO,
237
+ raiseOnError=ingestion_pipeline.raiseOnError,
225
238
  openMetadataServerConfig=ingestion_pipeline.openMetadataServerConnection,
226
239
  )
227
240
 
@@ -247,11 +260,19 @@ def build_dag_configs(ingestion_pipeline: IngestionPipeline) -> dict:
247
260
  :param ingestion_pipeline: pipeline configs
248
261
  :return: dict to use as kwargs
249
262
  """
250
-
251
- if ingestion_pipeline.airflowConfig.startDate:
252
- start_date = ingestion_pipeline.airflowConfig.startDate.root
263
+ # Determine start_date based on schedule_interval using croniter
264
+ schedule_interval = ingestion_pipeline.airflowConfig.scheduleInterval
265
+ now = datetime.now()
266
+
267
+ if schedule_interval is None:
268
+ # On-demand DAG, set start_date to now
269
+ start_date = now
270
+ elif croniter.is_valid(schedule_interval):
271
+ cron = croniter(schedule_interval, now)
272
+ start_date = cron.get_prev(datetime)
253
273
  else:
254
- start_date = datetime.now() - timedelta(days=1)
274
+ # Handle invalid cron expressions if necessary
275
+ start_date = now
255
276
 
256
277
  return {
257
278
  "dag_id": clean_dag_id(ingestion_pipeline.name.root),
@@ -272,7 +293,7 @@ def build_dag_configs(ingestion_pipeline: IngestionPipeline) -> dict:
272
293
  "is_paused_upon_creation": ingestion_pipeline.airflowConfig.pausePipeline
273
294
  or False,
274
295
  "catchup": ingestion_pipeline.airflowConfig.pipelineCatchup or False,
275
- "schedule_interval": ingestion_pipeline.airflowConfig.scheduleInterval,
296
+ "schedule_interval": schedule_interval,
276
297
  "tags": [
277
298
  "OpenMetadata",
278
299
  clean_name_tag(ingestion_pipeline.displayName)
@@ -347,9 +368,16 @@ def build_dag(
347
368
  ingestion_pipeline: IngestionPipeline,
348
369
  workflow_config: Union[OpenMetadataWorkflowConfig, OpenMetadataApplicationConfig],
349
370
  workflow_fn: Callable,
371
+ params: Optional[dict] = None,
350
372
  ) -> DAG:
351
373
  """
352
374
  Build a simple metadata workflow DAG
375
+ :param task_name: Name of the task
376
+ :param ingestion_pipeline: Pipeline configs
377
+ :param workflow_config: Workflow configurations
378
+ :param workflow_fn: Function to be executed
379
+ :param params: Optional parameters to pass to the operator
380
+ :return: DAG
353
381
  """
354
382
 
355
383
  with DAG(**build_dag_configs(ingestion_pipeline)) as dag:
@@ -360,7 +388,9 @@ def build_dag(
360
388
  CustomPythonOperator(
361
389
  task_id=task_name,
362
390
  python_callable=workflow_fn,
363
- op_kwargs={"workflow_config": workflow_config},
391
+ op_kwargs={
392
+ "workflow_config": workflow_config,
393
+ },
364
394
  # There's no need to retry if we have had an error. Wait until the next schedule or manual rerun.
365
395
  retries=ingestion_pipeline.airflowConfig.retries or 0,
366
396
  # each DAG will call its own OpenMetadataWorkflowConfig
@@ -369,6 +399,7 @@ def build_dag(
369
399
  owner=ingestion_pipeline.owners.root[0].name
370
400
  if (ingestion_pipeline.owners and ingestion_pipeline.owners.root)
371
401
  else "openmetadata",
402
+ params=params,
372
403
  )
373
404
 
374
405
  return dag
@@ -1,8 +1,8 @@
1
1
  # Copyright 2022 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -42,7 +42,7 @@ def build_dbt_workflow_config(
42
42
  """
43
43
 
44
44
  source = build_source(ingestion_pipeline)
45
- source.type = f"dbt" # Mark the source as dbt
45
+ source.type = "dbt" # Mark the source as dbt
46
46
 
47
47
  workflow_config = OpenMetadataWorkflowConfig(
48
48
  source=source,
@@ -52,6 +52,7 @@ def build_dbt_workflow_config(
52
52
  ),
53
53
  workflowConfig=build_workflow_config_property(ingestion_pipeline),
54
54
  ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root,
55
+ enableStreamableLogs=ingestion_pipeline.enableStreamableLogs,
55
56
  )
56
57
 
57
58
  return workflow_config
@@ -1,8 +1,8 @@
1
- # Copyright 2021 Collate
2
- # Licensed under the Apache License, Version 2.0 (the "License");
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
5
- # http://www.apache.org/licenses/LICENSE-2.0
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
6
  # Unless required by applicable law or agreed to in writing, software
7
7
  # distributed under the License is distributed on an "AS IS" BASIS,
8
8
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.