apache-airflow-providers-databricks 7.6.0rc1__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/hooks/databricks.py +2 -1
- airflow/providers/databricks/hooks/databricks_sql.py +36 -6
- airflow/providers/databricks/operators/databricks.py +27 -22
- airflow/providers/databricks/operators/databricks_workflow.py +5 -1
- airflow/providers/databricks/plugins/databricks_workflow.py +3 -3
- airflow/providers/databricks/triggers/databricks.py +3 -13
- airflow/providers/databricks/utils/databricks.py +53 -1
- airflow/providers/databricks/utils/openlineage.py +16 -19
- {apache_airflow_providers_databricks-7.6.0rc1.dist-info → apache_airflow_providers_databricks-7.7.0.dist-info}/METADATA +25 -21
- {apache_airflow_providers_databricks-7.6.0rc1.dist-info → apache_airflow_providers_databricks-7.7.0.dist-info}/RECORD +13 -13
- {apache_airflow_providers_databricks-7.6.0rc1.dist-info → apache_airflow_providers_databricks-7.7.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_databricks-7.6.0rc1.dist-info → apache_airflow_providers_databricks-7.7.0.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.
|
|
32
|
+
__version__ = "7.7.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -62,6 +62,7 @@ CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
|
|
|
62
62
|
|
|
63
63
|
LIST_JOBS_ENDPOINT = ("GET", "2.1/jobs/list")
|
|
64
64
|
LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
|
|
65
|
+
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/endpoints")
|
|
65
66
|
|
|
66
67
|
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
|
|
67
68
|
|
|
@@ -770,7 +771,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
770
771
|
:param json: payload
|
|
771
772
|
:return: json containing permission specification
|
|
772
773
|
"""
|
|
773
|
-
return self._do_api_call(("PATCH", f"
|
|
774
|
+
return self._do_api_call(("PATCH", f"2.0/permissions/jobs/{job_id}"), json)
|
|
774
775
|
|
|
775
776
|
def post_sql_statement(self, json: dict[str, Any]) -> str:
|
|
776
777
|
"""
|
|
@@ -30,13 +30,15 @@ from typing import (
|
|
|
30
30
|
overload,
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
from databricks import sql
|
|
33
|
+
from databricks import sql
|
|
34
34
|
from databricks.sql.types import Row
|
|
35
|
+
from sqlalchemy.engine import URL
|
|
35
36
|
|
|
36
37
|
from airflow.exceptions import AirflowException
|
|
37
38
|
from airflow.providers.common.sql.hooks.handlers import return_single_query_results
|
|
38
39
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
|
39
40
|
from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
|
|
41
|
+
from airflow.providers.databricks.hooks.databricks import LIST_SQL_ENDPOINTS_ENDPOINT
|
|
40
42
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
41
43
|
|
|
42
44
|
if TYPE_CHECKING:
|
|
@@ -47,9 +49,6 @@ if TYPE_CHECKING:
|
|
|
47
49
|
from airflow.providers.openlineage.sqlparser import DatabaseInfo
|
|
48
50
|
|
|
49
51
|
|
|
50
|
-
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
|
|
51
|
-
|
|
52
|
-
|
|
53
52
|
T = TypeVar("T")
|
|
54
53
|
|
|
55
54
|
|
|
@@ -173,7 +172,38 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
173
172
|
raise AirflowException("SQL connection is not initialized")
|
|
174
173
|
return cast("AirflowConnection", self._sql_conn)
|
|
175
174
|
|
|
176
|
-
@
|
|
175
|
+
@property
|
|
176
|
+
def sqlalchemy_url(self) -> URL:
|
|
177
|
+
"""
|
|
178
|
+
Return a Sqlalchemy.engine.URL object from the connection.
|
|
179
|
+
|
|
180
|
+
:return: the extracted sqlalchemy.engine.URL object.
|
|
181
|
+
"""
|
|
182
|
+
conn = self.get_conn()
|
|
183
|
+
url_query = {
|
|
184
|
+
"http_path": self._http_path,
|
|
185
|
+
"catalog": self.catalog,
|
|
186
|
+
"schema": self.schema,
|
|
187
|
+
}
|
|
188
|
+
url_query = {k: v for k, v in url_query.items() if v is not None}
|
|
189
|
+
return URL.create(
|
|
190
|
+
drivername="databricks",
|
|
191
|
+
username="token",
|
|
192
|
+
password=conn.password,
|
|
193
|
+
host=conn.host,
|
|
194
|
+
port=conn.port,
|
|
195
|
+
query=url_query,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def get_uri(self) -> str:
|
|
199
|
+
"""
|
|
200
|
+
Extract the URI from the connection.
|
|
201
|
+
|
|
202
|
+
:return: the extracted uri.
|
|
203
|
+
"""
|
|
204
|
+
return self.sqlalchemy_url.render_as_string(hide_password=False)
|
|
205
|
+
|
|
206
|
+
@overload
|
|
177
207
|
def run(
|
|
178
208
|
self,
|
|
179
209
|
sql: str | Iterable[str],
|
|
@@ -258,7 +288,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
258
288
|
|
|
259
289
|
# TODO: adjust this to make testing easier
|
|
260
290
|
try:
|
|
261
|
-
self._run_command(cur, sql_statement, parameters)
|
|
291
|
+
self._run_command(cur, sql_statement, parameters)
|
|
262
292
|
except Exception as e:
|
|
263
293
|
if t is None or t.is_alive():
|
|
264
294
|
raise DatabricksSqlExecutionError(
|
|
@@ -46,21 +46,32 @@ from airflow.providers.databricks.plugins.databricks_workflow import (
|
|
|
46
46
|
from airflow.providers.databricks.triggers.databricks import (
|
|
47
47
|
DatabricksExecutionTrigger,
|
|
48
48
|
)
|
|
49
|
-
from airflow.providers.databricks.utils.databricks import
|
|
49
|
+
from airflow.providers.databricks.utils.databricks import (
|
|
50
|
+
extract_failed_task_errors,
|
|
51
|
+
normalise_json_content,
|
|
52
|
+
validate_trigger_event,
|
|
53
|
+
)
|
|
50
54
|
from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
|
|
51
55
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
52
56
|
|
|
53
57
|
if TYPE_CHECKING:
|
|
54
58
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
59
|
+
from airflow.providers.databricks.operators.databricks_workflow import (
|
|
60
|
+
DatabricksWorkflowTaskGroup,
|
|
61
|
+
)
|
|
55
62
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
56
63
|
from airflow.utils.context import Context
|
|
57
|
-
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
from airflow.sdk import TaskGroup
|
|
67
|
+
except ImportError:
|
|
68
|
+
from airflow.utils.task_group import TaskGroup # type: ignore[no-redef]
|
|
58
69
|
|
|
59
70
|
if AIRFLOW_V_3_0_PLUS:
|
|
60
71
|
from airflow.sdk import BaseOperatorLink
|
|
61
72
|
from airflow.sdk.execution_time.xcom import XCom
|
|
62
73
|
else:
|
|
63
|
-
from airflow.models import XCom
|
|
74
|
+
from airflow.models import XCom
|
|
64
75
|
from airflow.models.baseoperatorlink import BaseOperatorLink # type: ignore[no-redef]
|
|
65
76
|
|
|
66
77
|
DEFER_METHOD_NAME = "execute_complete"
|
|
@@ -95,17 +106,7 @@ def _handle_databricks_operator_execution(operator, hook, log, context) -> None:
|
|
|
95
106
|
log.info("View run status, Spark UI, and logs at %s", run_page_url)
|
|
96
107
|
return
|
|
97
108
|
if run_state.result_state == "FAILED":
|
|
98
|
-
failed_tasks =
|
|
99
|
-
for task in run_info.get("tasks", []):
|
|
100
|
-
if task.get("state", {}).get("result_state", "") == "FAILED":
|
|
101
|
-
task_run_id = task["run_id"]
|
|
102
|
-
task_key = task["task_key"]
|
|
103
|
-
run_output = hook.get_run_output(task_run_id)
|
|
104
|
-
if "error" in run_output:
|
|
105
|
-
error = run_output["error"]
|
|
106
|
-
else:
|
|
107
|
-
error = run_state.state_message
|
|
108
|
-
failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
|
|
109
|
+
failed_tasks = extract_failed_task_errors(hook, run_info, run_state)
|
|
109
110
|
|
|
110
111
|
error_message = (
|
|
111
112
|
f"{operator.task_id} failed with terminal state: {run_state} "
|
|
@@ -1324,15 +1325,15 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1324
1325
|
|
|
1325
1326
|
return self.databricks_run_id
|
|
1326
1327
|
|
|
1327
|
-
def _handle_terminal_run_state(self, run_state: RunState) -> None:
|
|
1328
|
+
def _handle_terminal_run_state(self, run_state: RunState, errors: list) -> None:
|
|
1328
1329
|
"""Handle the terminal state of the run."""
|
|
1329
1330
|
if run_state.life_cycle_state != RunLifeCycleState.TERMINATED.value:
|
|
1330
1331
|
raise AirflowException(
|
|
1331
|
-
f"Databricks job failed with state {run_state.life_cycle_state}. Message: {run_state.state_message}"
|
|
1332
|
+
f"Databricks job failed with state {run_state.life_cycle_state}. Message: {run_state.state_message}. Errors: {errors}"
|
|
1332
1333
|
)
|
|
1333
1334
|
if not run_state.is_successful:
|
|
1334
1335
|
raise AirflowException(
|
|
1335
|
-
f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}"
|
|
1336
|
+
f"Task failed. Final state {run_state.result_state}. Reason: {run_state.state_message}. Errors: {errors}"
|
|
1336
1337
|
)
|
|
1337
1338
|
self.log.info("Task succeeded. Final state %s.", run_state.result_state)
|
|
1338
1339
|
|
|
@@ -1414,12 +1415,17 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1414
1415
|
time.sleep(self.polling_period_seconds)
|
|
1415
1416
|
run = self._hook.get_run(current_task_run_id)
|
|
1416
1417
|
run_state = RunState(**run["state"])
|
|
1418
|
+
|
|
1417
1419
|
self.log.info(
|
|
1418
1420
|
"Current state of the databricks task %s is %s",
|
|
1419
1421
|
self.databricks_task_key,
|
|
1420
1422
|
run_state.life_cycle_state,
|
|
1421
1423
|
)
|
|
1422
|
-
|
|
1424
|
+
|
|
1425
|
+
# Extract errors from the run response using utility function
|
|
1426
|
+
errors = extract_failed_task_errors(self._hook, run, run_state)
|
|
1427
|
+
|
|
1428
|
+
self._handle_terminal_run_state(run_state, errors)
|
|
1423
1429
|
|
|
1424
1430
|
def execute(self, context: Context) -> None:
|
|
1425
1431
|
"""Execute the operator. Launch the job and monitor it if wait_for_termination is set to True."""
|
|
@@ -1428,9 +1434,7 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1428
1434
|
if not self.workflow_run_metadata:
|
|
1429
1435
|
launch_task_id = next(task for task in self.upstream_task_ids if task.endswith(".launch"))
|
|
1430
1436
|
self.workflow_run_metadata = context["ti"].xcom_pull(task_ids=launch_task_id)
|
|
1431
|
-
workflow_run_metadata = WorkflowRunMetadata(
|
|
1432
|
-
**self.workflow_run_metadata
|
|
1433
|
-
)
|
|
1437
|
+
workflow_run_metadata = WorkflowRunMetadata(**self.workflow_run_metadata)
|
|
1434
1438
|
self.databricks_run_id = workflow_run_metadata.run_id
|
|
1435
1439
|
self.databricks_conn_id = workflow_run_metadata.conn_id
|
|
1436
1440
|
|
|
@@ -1449,7 +1453,8 @@ class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
|
1449
1453
|
|
|
1450
1454
|
def execute_complete(self, context: dict | None, event: dict) -> None:
|
|
1451
1455
|
run_state = RunState.from_json(event["run_state"])
|
|
1452
|
-
|
|
1456
|
+
errors = event.get("errors", [])
|
|
1457
|
+
self._handle_terminal_run_state(run_state, errors)
|
|
1453
1458
|
|
|
1454
1459
|
|
|
1455
1460
|
class DatabricksNotebookOperator(DatabricksTaskBaseOperator):
|
|
@@ -33,7 +33,11 @@ from airflow.providers.databricks.plugins.databricks_workflow import (
|
|
|
33
33
|
store_databricks_job_run_link,
|
|
34
34
|
)
|
|
35
35
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
36
|
-
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
from airflow.sdk import TaskGroup
|
|
39
|
+
except ImportError:
|
|
40
|
+
from airflow.utils.task_group import TaskGroup # type: ignore[no-redef]
|
|
37
41
|
|
|
38
42
|
if TYPE_CHECKING:
|
|
39
43
|
from types import TracebackType
|
|
@@ -41,7 +41,6 @@ else:
|
|
|
41
41
|
from airflow.www import auth # type: ignore
|
|
42
42
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
43
43
|
from airflow.utils.state import TaskInstanceState
|
|
44
|
-
from airflow.utils.task_group import TaskGroup
|
|
45
44
|
|
|
46
45
|
if TYPE_CHECKING:
|
|
47
46
|
from sqlalchemy.orm.session import Session
|
|
@@ -51,11 +50,12 @@ if TYPE_CHECKING:
|
|
|
51
50
|
from airflow.utils.context import Context
|
|
52
51
|
|
|
53
52
|
if AIRFLOW_V_3_0_PLUS:
|
|
54
|
-
from airflow.sdk import BaseOperatorLink
|
|
53
|
+
from airflow.sdk import BaseOperatorLink, TaskGroup
|
|
55
54
|
from airflow.sdk.execution_time.xcom import XCom
|
|
56
55
|
else:
|
|
57
|
-
from airflow.models import XCom
|
|
56
|
+
from airflow.models import XCom
|
|
58
57
|
from airflow.models.baseoperatorlink import BaseOperatorLink # type: ignore[no-redef]
|
|
58
|
+
from airflow.utils.task_group import TaskGroup # type: ignore[no-redef]
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
REPAIR_WAIT_ATTEMPTS = os.getenv("DATABRICKS_REPAIR_WAIT_ATTEMPTS", 20)
|
|
@@ -22,6 +22,7 @@ import time
|
|
|
22
22
|
from typing import Any
|
|
23
23
|
|
|
24
24
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook
|
|
25
|
+
from airflow.providers.databricks.utils.databricks import extract_failed_task_errors_async
|
|
25
26
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
26
27
|
|
|
27
28
|
|
|
@@ -97,19 +98,8 @@ class DatabricksExecutionTrigger(BaseTrigger):
|
|
|
97
98
|
await asyncio.sleep(self.polling_period_seconds)
|
|
98
99
|
continue
|
|
99
100
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
run_info = await self.hook.a_get_run(self.run_id)
|
|
103
|
-
for task in run_info.get("tasks", []):
|
|
104
|
-
if task.get("state", {}).get("result_state", "") == "FAILED":
|
|
105
|
-
task_run_id = task["run_id"]
|
|
106
|
-
task_key = task["task_key"]
|
|
107
|
-
run_output = await self.hook.a_get_run_output(task_run_id)
|
|
108
|
-
if "error" in run_output:
|
|
109
|
-
error = run_output["error"]
|
|
110
|
-
else:
|
|
111
|
-
error = run_state.state_message
|
|
112
|
-
failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
|
|
101
|
+
run_info = await self.hook.a_get_run(self.run_id)
|
|
102
|
+
failed_tasks = await extract_failed_task_errors_async(self.hook, run_info, run_state)
|
|
113
103
|
yield TriggerEvent(
|
|
114
104
|
{
|
|
115
105
|
"run_id": self.run_id,
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
from airflow.exceptions import AirflowException
|
|
21
|
-
from airflow.providers.databricks.hooks.databricks import RunState
|
|
21
|
+
from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunState
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def normalise_json_content(content, json_path: str = "json") -> str | bool | list | dict:
|
|
@@ -48,6 +48,58 @@ def normalise_json_content(content, json_path: str = "json") -> str | bool | lis
|
|
|
48
48
|
raise AirflowException(msg)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def extract_failed_task_errors(
|
|
52
|
+
hook: DatabricksHook, run_info: dict, run_state: RunState
|
|
53
|
+
) -> list[dict[str, str | int]]:
|
|
54
|
+
"""
|
|
55
|
+
Extract error information from failed tasks in a Databricks run (synchronous version).
|
|
56
|
+
|
|
57
|
+
:param hook: Databricks hook instance for making API calls
|
|
58
|
+
:param run_info: Run information dictionary from Databricks API
|
|
59
|
+
:param run_state: Run state object
|
|
60
|
+
:return: List of failed task information with task_key, run_id, and error
|
|
61
|
+
"""
|
|
62
|
+
failed_tasks = []
|
|
63
|
+
if run_state.result_state == "FAILED":
|
|
64
|
+
for task in run_info.get("tasks", []):
|
|
65
|
+
if task.get("state", {}).get("result_state", "") == "FAILED":
|
|
66
|
+
task_run_id = task["run_id"]
|
|
67
|
+
task_key = task["task_key"]
|
|
68
|
+
run_output = hook.get_run_output(task_run_id)
|
|
69
|
+
if "error" in run_output:
|
|
70
|
+
error = run_output["error"]
|
|
71
|
+
else:
|
|
72
|
+
error = run_state.state_message
|
|
73
|
+
failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
|
|
74
|
+
return failed_tasks
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
async def extract_failed_task_errors_async(
|
|
78
|
+
hook: DatabricksHook, run_info: dict, run_state: RunState
|
|
79
|
+
) -> list[dict[str, str | int]]:
|
|
80
|
+
"""
|
|
81
|
+
Extract error information from failed tasks in a Databricks run (asynchronous version).
|
|
82
|
+
|
|
83
|
+
:param hook: Databricks hook instance for making API calls
|
|
84
|
+
:param run_info: Run information dictionary from Databricks API
|
|
85
|
+
:param run_state: Run state object
|
|
86
|
+
:return: List of failed task information with task_key, run_id, and error
|
|
87
|
+
"""
|
|
88
|
+
failed_tasks = []
|
|
89
|
+
if run_state.result_state == "FAILED":
|
|
90
|
+
for task in run_info.get("tasks", []):
|
|
91
|
+
if task.get("state", {}).get("result_state", "") == "FAILED":
|
|
92
|
+
task_run_id = task["run_id"]
|
|
93
|
+
task_key = task["task_key"]
|
|
94
|
+
run_output = await hook.a_get_run_output(task_run_id)
|
|
95
|
+
if "error" in run_output:
|
|
96
|
+
error = run_output["error"]
|
|
97
|
+
else:
|
|
98
|
+
error = run_state.state_message
|
|
99
|
+
failed_tasks.append({"task_key": task_key, "run_id": task_run_id, "error": error})
|
|
100
|
+
return failed_tasks
|
|
101
|
+
|
|
102
|
+
|
|
51
103
|
def validate_trigger_event(event: dict):
|
|
52
104
|
"""
|
|
53
105
|
Validate correctness of the event received from DatabricksExecutionTrigger.
|
|
@@ -124,22 +124,15 @@ def _get_parent_run_facet(task_instance):
|
|
|
124
124
|
|
|
125
125
|
def _run_api_call(hook: DatabricksSqlHook | DatabricksHook, query_ids: list[str]) -> list[dict]:
|
|
126
126
|
"""Retrieve execution details for specific queries from Databricks's query history API."""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
response.raise_for_status()
|
|
137
|
-
except Exception as e:
|
|
138
|
-
log.warning(
|
|
139
|
-
"OpenLineage could not retrieve Databricks queries details. Error received: `%s`.",
|
|
140
|
-
e,
|
|
141
|
-
)
|
|
142
|
-
return []
|
|
127
|
+
token = hook._get_token(raise_error=True)
|
|
128
|
+
# https://docs.databricks.com/api/azure/workspace/queryhistory/list
|
|
129
|
+
response = requests.get(
|
|
130
|
+
url=f"https://{hook.host}/api/2.0/sql/history/queries",
|
|
131
|
+
headers={"Authorization": f"Bearer {token}"},
|
|
132
|
+
data=json.dumps({"filter_by": {"statement_ids": query_ids}}),
|
|
133
|
+
timeout=3,
|
|
134
|
+
)
|
|
135
|
+
response.raise_for_status()
|
|
143
136
|
|
|
144
137
|
return response.json()["res"]
|
|
145
138
|
|
|
@@ -176,7 +169,11 @@ def _get_queries_details_from_databricks(
|
|
|
176
169
|
if query_info["query_id"]
|
|
177
170
|
}
|
|
178
171
|
except Exception as e:
|
|
179
|
-
log.
|
|
172
|
+
log.info(
|
|
173
|
+
"OpenLineage encountered an error while retrieving additional metadata about SQL queries"
|
|
174
|
+
" from Databricks. The process will continue with default values. Error details: %s",
|
|
175
|
+
e,
|
|
176
|
+
)
|
|
180
177
|
|
|
181
178
|
return query_details
|
|
182
179
|
|
|
@@ -345,8 +342,8 @@ def emit_openlineage_events_for_databricks_queries(
|
|
|
345
342
|
event_batch = _create_ol_event_pair(
|
|
346
343
|
job_namespace=namespace(),
|
|
347
344
|
job_name=f"{task_instance.dag_id}.{task_instance.task_id}.query.{counter}",
|
|
348
|
-
start_time=query_metadata.get("start_time") or default_event_time,
|
|
349
|
-
end_time=query_metadata.get("end_time") or default_event_time,
|
|
345
|
+
start_time=query_metadata.get("start_time") or default_event_time,
|
|
346
|
+
end_time=query_metadata.get("end_time") or default_event_time,
|
|
350
347
|
# Only finished status means it completed without failures
|
|
351
348
|
is_successful=(query_metadata.get("status") or default_state).lower() == "finished",
|
|
352
349
|
run_facets={**query_specific_run_facets, **common_run_facets, **additional_run_facets},
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.7.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
7
7
|
Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
8
|
-
Requires-Python:
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
9
|
Description-Content-Type: text/x-rst
|
|
10
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: Environment :: Console
|
|
@@ -18,33 +18,34 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
22
|
Classifier: Topic :: System :: Monitoring
|
|
22
|
-
Requires-Dist: apache-airflow>=2.10.
|
|
23
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.6.
|
|
24
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.27.
|
|
23
|
+
Requires-Dist: apache-airflow>=2.10.0
|
|
24
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.6.0
|
|
25
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.27.0
|
|
26
|
+
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0
|
|
25
27
|
Requires-Dist: requests>=2.32.0,<3
|
|
26
|
-
Requires-Dist: databricks-sql-connector>=
|
|
28
|
+
Requires-Dist: databricks-sql-connector>=4.0.0
|
|
27
29
|
Requires-Dist: databricks-sqlalchemy>=1.0.2
|
|
28
30
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
29
31
|
Requires-Dist: mergedeep>=1.3.4
|
|
30
32
|
Requires-Dist: pandas>=2.1.2; python_version <"3.13"
|
|
31
33
|
Requires-Dist: pandas>=2.2.3; python_version >="3.13"
|
|
32
|
-
Requires-Dist: pyarrow>=16.1.0
|
|
34
|
+
Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
|
|
35
|
+
Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
|
|
33
36
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
34
|
-
Requires-Dist: apache-airflow-providers-fab ; extra == "fab"
|
|
35
|
-
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
|
|
37
|
+
Requires-Dist: apache-airflow-providers-fab>=2.2.0 ; extra == "fab" and ( python_version < '3.13')
|
|
36
38
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
37
39
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
38
40
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
39
|
-
Project-URL: Changelog, https://airflow.
|
|
40
|
-
Project-URL: Documentation, https://airflow.
|
|
41
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.0/changelog.html
|
|
42
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.0
|
|
41
43
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
42
44
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
43
45
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
44
46
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
45
47
|
Provides-Extra: azure-identity
|
|
46
48
|
Provides-Extra: fab
|
|
47
|
-
Provides-Extra: openlineage
|
|
48
49
|
Provides-Extra: sdk
|
|
49
50
|
Provides-Extra: standard
|
|
50
51
|
|
|
@@ -73,8 +74,9 @@ Provides-Extra: standard
|
|
|
73
74
|
|
|
74
75
|
Package ``apache-airflow-providers-databricks``
|
|
75
76
|
|
|
76
|
-
Release: ``7.
|
|
77
|
+
Release: ``7.7.0``
|
|
77
78
|
|
|
79
|
+
Release Date: ``|PypiReleaseDate|``
|
|
78
80
|
|
|
79
81
|
`Databricks <https://databricks.com/>`__
|
|
80
82
|
|
|
@@ -86,7 +88,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
86
88
|
are in ``airflow.providers.databricks`` python package.
|
|
87
89
|
|
|
88
90
|
You can find package information and changelog for the provider
|
|
89
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
91
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.0/>`_.
|
|
90
92
|
|
|
91
93
|
Installation
|
|
92
94
|
------------
|
|
@@ -95,26 +97,28 @@ You can install this package on top of an existing Airflow 2 installation (see `
|
|
|
95
97
|
for the minimum Airflow version supported) via
|
|
96
98
|
``pip install apache-airflow-providers-databricks``
|
|
97
99
|
|
|
98
|
-
The package supports the following python versions: 3.10,3.11,3.12
|
|
100
|
+
The package supports the following python versions: 3.10,3.11,3.12,3.13
|
|
99
101
|
|
|
100
102
|
Requirements
|
|
101
103
|
------------
|
|
102
104
|
|
|
103
|
-
==========================================
|
|
105
|
+
========================================== ======================================
|
|
104
106
|
PIP package Version required
|
|
105
|
-
==========================================
|
|
107
|
+
========================================== ======================================
|
|
106
108
|
``apache-airflow`` ``>=2.10.0``
|
|
107
109
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
108
110
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
111
|
+
``apache-airflow-providers-openlineage`` ``>=2.3.0``
|
|
109
112
|
``requests`` ``>=2.32.0,<3``
|
|
110
|
-
``databricks-sql-connector`` ``>=
|
|
113
|
+
``databricks-sql-connector`` ``>=4.0.0``
|
|
111
114
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
112
115
|
``aiohttp`` ``>=3.9.2,<4``
|
|
113
116
|
``mergedeep`` ``>=1.3.4``
|
|
114
117
|
``pandas`` ``>=2.1.2; python_version < "3.13"``
|
|
115
118
|
``pandas`` ``>=2.2.3; python_version >= "3.13"``
|
|
116
|
-
``pyarrow`` ``>=16.1.0``
|
|
117
|
-
|
|
119
|
+
``pyarrow`` ``>=16.1.0; python_version < "3.13"``
|
|
120
|
+
``pyarrow`` ``>=18.0.0; python_version >= "3.13"``
|
|
121
|
+
========================================== ======================================
|
|
118
122
|
|
|
119
123
|
Cross provider package dependencies
|
|
120
124
|
-----------------------------------
|
|
@@ -139,5 +143,5 @@ Dependent package
|
|
|
139
143
|
================================================================================================================== =================
|
|
140
144
|
|
|
141
145
|
The changelog for the provider package can be found in the
|
|
142
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
146
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.7.0/changelog.html>`_.
|
|
143
147
|
|
|
@@ -1,30 +1,30 @@
|
|
|
1
1
|
airflow/providers/databricks/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
2
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
2
|
+
airflow/providers/databricks/__init__.py,sha256=ch567KD4zxy2WvI--zqEcirDTKHH8fqL37GC1PCo7WY,1499
|
|
3
3
|
airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
|
|
4
4
|
airflow/providers/databricks/get_provider_info.py,sha256=NZ-rY6k6ctDZN7rDngN7mAzq7RMhLag5NwfnuBNcKuw,5644
|
|
5
5
|
airflow/providers/databricks/version_compat.py,sha256=7RHBehpYMeNSBtmJiPUeJHA0c7l-Eqsdy546kW3RFa4,1712
|
|
6
6
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
7
|
-
airflow/providers/databricks/hooks/databricks.py,sha256
|
|
7
|
+
airflow/providers/databricks/hooks/databricks.py,sha256=-Mrgig6XXQ1Uzk_0gfPlxu93N6aNkfdPiugGTvZfndg,29020
|
|
8
8
|
airflow/providers/databricks/hooks/databricks_base.py,sha256=gish0H2rHEzPqI5ZpU3BPFCUaycHMEYGYev0ufJMzzI,35167
|
|
9
|
-
airflow/providers/databricks/hooks/databricks_sql.py,sha256=
|
|
9
|
+
airflow/providers/databricks/hooks/databricks_sql.py,sha256=x-Wgmass2IQRm7wXAqMCmZNUvtb2GAM9_4VTla_fBV4,17765
|
|
10
10
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
11
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
11
|
+
airflow/providers/databricks/operators/databricks.py,sha256=6T5T2YdfX9tKkDZ--5WiiYh08rGPzVaRibpCQAuyvzw,79884
|
|
12
12
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=NLigItgvQOpxhDhttkU2Jhrcu1gODXQME2i5f8w7gYk,13311
|
|
13
13
|
airflow/providers/databricks/operators/databricks_sql.py,sha256=QmFUM83jY0pvnG4K-iM7Kuc4H48ORIx2jgGoOdAtEJw,21836
|
|
14
|
-
airflow/providers/databricks/operators/databricks_workflow.py,sha256=
|
|
14
|
+
airflow/providers/databricks/operators/databricks_workflow.py,sha256=BAWsfFdEG-7p0_6ykkz-xZX1-vdtHnS8uhwjDFpevyg,15088
|
|
15
15
|
airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
16
|
-
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=
|
|
16
|
+
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=mur_BvrhI4Suf4Vz4lThoLR19_LePLwTUzrKzGLfdEY,20125
|
|
17
17
|
airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
18
18
|
airflow/providers/databricks/sensors/databricks.py,sha256=AVSqvHDr7iDXL1WZ46MTN3KUnVSIOc_g5JEViA1MeVE,6428
|
|
19
19
|
airflow/providers/databricks/sensors/databricks_partition.py,sha256=1PZo-rdRo6E7yBa30ISFjgQ-iaFdqPYm0gnN5tXgxCU,10205
|
|
20
20
|
airflow/providers/databricks/sensors/databricks_sql.py,sha256=cbPKia5eH2no_sl-LltjBA-1qM64lurmB8lT9QR9eGk,5948
|
|
21
21
|
airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
22
|
-
airflow/providers/databricks/triggers/databricks.py,sha256=
|
|
22
|
+
airflow/providers/databricks/triggers/databricks.py,sha256=DQbXLw1W_e3Iw-hsDph7vPuHc2caj623V7WmA2_PftM,8672
|
|
23
23
|
airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
24
|
-
airflow/providers/databricks/utils/databricks.py,sha256=
|
|
24
|
+
airflow/providers/databricks/utils/databricks.py,sha256=ecvzZbC4KdXds47VeSayot9EO-RQnTRJTEwKITH7waQ,5117
|
|
25
25
|
airflow/providers/databricks/utils/mixins.py,sha256=WUmkt3AmXalmV6zOUIJZWbTldxYunAZOstddDhKCC94,7407
|
|
26
|
-
airflow/providers/databricks/utils/openlineage.py,sha256=
|
|
27
|
-
apache_airflow_providers_databricks-7.
|
|
28
|
-
apache_airflow_providers_databricks-7.
|
|
29
|
-
apache_airflow_providers_databricks-7.
|
|
30
|
-
apache_airflow_providers_databricks-7.
|
|
26
|
+
airflow/providers/databricks/utils/openlineage.py,sha256=1jT5Woh9YifawdP-VFWsabfF-ecuCjPlzD5P_W4DAhI,15078
|
|
27
|
+
apache_airflow_providers_databricks-7.7.0.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
|
|
28
|
+
apache_airflow_providers_databricks-7.7.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
29
|
+
apache_airflow_providers_databricks-7.7.0.dist-info/METADATA,sha256=ngrJovznPHM6ZLMZbfMCjIRfNv-20CDt1zuDIjyEWbk,7265
|
|
30
|
+
apache_airflow_providers_databricks-7.7.0.dist-info/RECORD,,
|
|
File without changes
|