apache-airflow-providers-databricks 7.8.2rc1__py3-none-any.whl → 7.9.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/databricks/__init__.py +1 -1
- airflow/providers/databricks/hooks/databricks.py +17 -17
- airflow/providers/databricks/hooks/databricks_base.py +1 -2
- airflow/providers/databricks/hooks/databricks_sql.py +10 -2
- airflow/providers/databricks/operators/databricks.py +2 -3
- airflow/providers/databricks/operators/databricks_sql.py +164 -29
- airflow/providers/databricks/plugins/databricks_workflow.py +25 -7
- airflow/providers/databricks/sensors/databricks.py +1 -2
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/METADATA +19 -10
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/RECORD +14 -14
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/licenses/NOTICE +1 -1
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_databricks-7.8.2rc1.dist-info → apache_airflow_providers_databricks-7.9.0rc1.dist-info}/licenses/LICENSE +0 -0
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.
|
|
32
|
+
__version__ = "7.9.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.11.0"
|
|
@@ -37,10 +37,10 @@ from requests import exceptions as requests_exceptions
|
|
|
37
37
|
from airflow.providers.common.compat.sdk import AirflowException
|
|
38
38
|
from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
|
|
39
39
|
|
|
40
|
-
GET_CLUSTER_ENDPOINT = ("GET", "2.
|
|
41
|
-
RESTART_CLUSTER_ENDPOINT = ("POST", "2.
|
|
42
|
-
START_CLUSTER_ENDPOINT = ("POST", "2.
|
|
43
|
-
TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.
|
|
40
|
+
GET_CLUSTER_ENDPOINT = ("GET", "2.1/clusters/get")
|
|
41
|
+
RESTART_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/restart")
|
|
42
|
+
START_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/start")
|
|
43
|
+
TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/delete")
|
|
44
44
|
|
|
45
45
|
CREATE_ENDPOINT = ("POST", "2.2/jobs/create")
|
|
46
46
|
RESET_ENDPOINT = ("POST", "2.2/jobs/reset")
|
|
@@ -54,20 +54,20 @@ REPAIR_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/repair")
|
|
|
54
54
|
OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.2/jobs/runs/get-output")
|
|
55
55
|
CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.2/jobs/runs/cancel-all")
|
|
56
56
|
|
|
57
|
-
INSTALL_LIBS_ENDPOINT = ("POST", "2.
|
|
58
|
-
UNINSTALL_LIBS_ENDPOINT = ("POST", "2.
|
|
59
|
-
UPDATE_REPO_ENDPOINT = ("PATCH", "2.
|
|
60
|
-
DELETE_REPO_ENDPOINT = ("DELETE", "2.
|
|
61
|
-
CREATE_REPO_ENDPOINT = ("POST", "2.
|
|
57
|
+
INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
|
|
58
|
+
UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
|
|
59
|
+
UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
|
|
60
|
+
DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
|
|
61
|
+
CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
|
|
62
62
|
|
|
63
63
|
LIST_JOBS_ENDPOINT = ("GET", "2.2/jobs/list")
|
|
64
|
-
LIST_PIPELINES_ENDPOINT = ("GET", "2.
|
|
65
|
-
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.
|
|
64
|
+
LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
|
|
65
|
+
LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/warehouses")
|
|
66
66
|
|
|
67
|
-
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.
|
|
67
|
+
WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
|
|
68
68
|
|
|
69
|
-
SPARK_VERSIONS_ENDPOINT = ("GET", "2.
|
|
70
|
-
SQL_STATEMENTS_ENDPOINT = "2.
|
|
69
|
+
SPARK_VERSIONS_ENDPOINT = ("GET", "2.1/clusters/spark-versions")
|
|
70
|
+
SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
class RunLifeCycleState(Enum):
|
|
@@ -717,7 +717,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
717
717
|
"""
|
|
718
718
|
Install libraries on the cluster.
|
|
719
719
|
|
|
720
|
-
Utility function to call the ``2.
|
|
720
|
+
Utility function to call the ``2.0/libraries/install`` endpoint.
|
|
721
721
|
|
|
722
722
|
:param json: json dictionary containing cluster_id and an array of library
|
|
723
723
|
"""
|
|
@@ -727,7 +727,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
727
727
|
"""
|
|
728
728
|
Uninstall libraries on the cluster.
|
|
729
729
|
|
|
730
|
-
Utility function to call the ``2.
|
|
730
|
+
Utility function to call the ``2.0/libraries/uninstall`` endpoint.
|
|
731
731
|
|
|
732
732
|
:param json: json dictionary containing cluster_id and an array of library
|
|
733
733
|
"""
|
|
@@ -790,7 +790,7 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
790
790
|
:param json: payload
|
|
791
791
|
:return: json containing permission specification
|
|
792
792
|
"""
|
|
793
|
-
return self._do_api_call(("PATCH", f"2.
|
|
793
|
+
return self._do_api_call(("PATCH", f"2.0/permissions/jobs/{job_id}"), json)
|
|
794
794
|
|
|
795
795
|
def post_sql_statement(self, json: dict[str, Any]) -> str:
|
|
796
796
|
"""
|
|
@@ -49,8 +49,7 @@ from tenacity import (
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
from airflow import __version__
|
|
52
|
-
from airflow.
|
|
53
|
-
from airflow.providers.common.compat.sdk import AirflowException
|
|
52
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowOptionalProviderFeatureException
|
|
54
53
|
from airflow.providers_manager import ProvidersManager
|
|
55
54
|
|
|
56
55
|
try:
|
|
@@ -32,9 +32,8 @@ from typing import (
|
|
|
32
32
|
|
|
33
33
|
from databricks import sql
|
|
34
34
|
from databricks.sql.types import Row
|
|
35
|
-
from sqlalchemy.engine import URL
|
|
36
35
|
|
|
37
|
-
from airflow.providers.common.compat.sdk import AirflowException
|
|
36
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowOptionalProviderFeatureException
|
|
38
37
|
from airflow.providers.common.sql.hooks.handlers import return_single_query_results
|
|
39
38
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
|
40
39
|
from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
|
|
@@ -43,6 +42,7 @@ from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHoo
|
|
|
43
42
|
|
|
44
43
|
if TYPE_CHECKING:
|
|
45
44
|
from databricks.sql.client import Connection
|
|
45
|
+
from sqlalchemy.engine import URL
|
|
46
46
|
|
|
47
47
|
from airflow.models.connection import Connection as AirflowConnection
|
|
48
48
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
@@ -179,6 +179,14 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
|
|
|
179
179
|
|
|
180
180
|
:return: the extracted sqlalchemy.engine.URL object.
|
|
181
181
|
"""
|
|
182
|
+
try:
|
|
183
|
+
from sqlalchemy.engine import URL
|
|
184
|
+
except ImportError:
|
|
185
|
+
raise AirflowOptionalProviderFeatureException(
|
|
186
|
+
"sqlalchemy is required to generate the connection URL. "
|
|
187
|
+
"Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
|
|
188
|
+
)
|
|
189
|
+
|
|
182
190
|
url_query = {
|
|
183
191
|
"http_path": self._http_path,
|
|
184
192
|
"catalog": self.catalog,
|
|
@@ -26,8 +26,7 @@ from collections.abc import Sequence
|
|
|
26
26
|
from functools import cached_property
|
|
27
27
|
from typing import TYPE_CHECKING, Any
|
|
28
28
|
|
|
29
|
-
from airflow.
|
|
30
|
-
from airflow.providers.common.compat.sdk import AirflowException, BaseOperator, BaseOperatorLink, XCom
|
|
29
|
+
from airflow.providers.common.compat.sdk import AirflowException, BaseOperator, BaseOperatorLink, XCom, conf
|
|
31
30
|
from airflow.providers.databricks.hooks.databricks import (
|
|
32
31
|
DatabricksHook,
|
|
33
32
|
RunLifeCycleState,
|
|
@@ -54,7 +53,7 @@ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMix
|
|
|
54
53
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
|
|
55
54
|
|
|
56
55
|
if TYPE_CHECKING:
|
|
57
|
-
from airflow.
|
|
56
|
+
from airflow.providers.common.compat.sdk import TaskInstanceKey
|
|
58
57
|
from airflow.providers.databricks.operators.databricks_workflow import (
|
|
59
58
|
DatabricksWorkflowTaskGroup,
|
|
60
59
|
)
|
|
@@ -21,13 +21,20 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import csv
|
|
23
23
|
import json
|
|
24
|
+
import os
|
|
24
25
|
from collections.abc import Sequence
|
|
25
26
|
from functools import cached_property
|
|
27
|
+
from tempfile import NamedTemporaryFile
|
|
26
28
|
from typing import TYPE_CHECKING, Any, ClassVar
|
|
29
|
+
from urllib.parse import urlparse
|
|
27
30
|
|
|
28
31
|
from databricks.sql.utils import ParamEscaper
|
|
29
32
|
|
|
30
|
-
from airflow.providers.common.compat.sdk import
|
|
33
|
+
from airflow.providers.common.compat.sdk import (
|
|
34
|
+
AirflowException,
|
|
35
|
+
AirflowOptionalProviderFeatureException,
|
|
36
|
+
BaseOperator,
|
|
37
|
+
)
|
|
31
38
|
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
|
32
39
|
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
33
40
|
|
|
@@ -62,13 +69,27 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
62
69
|
:param catalog: An optional initial catalog to use. Requires DBR version 9.0+ (templated)
|
|
63
70
|
:param schema: An optional initial schema to use. Requires DBR version 9.0+ (templated)
|
|
64
71
|
:param output_path: optional string specifying the file to which write selected data. (templated)
|
|
65
|
-
|
|
66
|
-
|
|
72
|
+
Supports local file paths and GCS URIs (e.g., ``gs://bucket/path/file.parquet``).
|
|
73
|
+
When using GCS URIs, requires the ``apache-airflow-providers-google`` package.
|
|
74
|
+
:param output_format: format of output data if ``output_path`` is specified.
|
|
75
|
+
Possible values are ``csv``, ``json``, ``jsonl``, ``parquet``, ``avro``. Default is ``csv``.
|
|
67
76
|
:param csv_params: parameters that will be passed to the ``csv.DictWriter`` class used to write CSV data.
|
|
77
|
+
:param gcp_conn_id: The connection ID to use for connecting to Google Cloud when using GCS output path.
|
|
78
|
+
Default is ``google_cloud_default``.
|
|
79
|
+
:param gcs_impersonation_chain: Optional service account to impersonate using short-term
|
|
80
|
+
credentials for GCS upload, or chained list of accounts required to get the access_token
|
|
81
|
+
of the last account in the list, which will be impersonated in the request. (templated)
|
|
68
82
|
"""
|
|
69
83
|
|
|
70
84
|
template_fields: Sequence[str] = tuple(
|
|
71
|
-
{
|
|
85
|
+
{
|
|
86
|
+
"_output_path",
|
|
87
|
+
"schema",
|
|
88
|
+
"catalog",
|
|
89
|
+
"http_headers",
|
|
90
|
+
"databricks_conn_id",
|
|
91
|
+
"_gcs_impersonation_chain",
|
|
92
|
+
}
|
|
72
93
|
| set(SQLExecuteQueryOperator.template_fields)
|
|
73
94
|
)
|
|
74
95
|
|
|
@@ -90,6 +111,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
90
111
|
output_format: str = "csv",
|
|
91
112
|
csv_params: dict[str, Any] | None = None,
|
|
92
113
|
client_parameters: dict[str, Any] | None = None,
|
|
114
|
+
gcp_conn_id: str = "google_cloud_default",
|
|
115
|
+
gcs_impersonation_chain: str | Sequence[str] | None = None,
|
|
93
116
|
**kwargs,
|
|
94
117
|
) -> None:
|
|
95
118
|
super().__init__(conn_id=databricks_conn_id, **kwargs)
|
|
@@ -105,6 +128,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
105
128
|
self.http_headers = http_headers
|
|
106
129
|
self.catalog = catalog
|
|
107
130
|
self.schema = schema
|
|
131
|
+
self._gcp_conn_id = gcp_conn_id
|
|
132
|
+
self._gcs_impersonation_chain = gcs_impersonation_chain
|
|
108
133
|
|
|
109
134
|
@cached_property
|
|
110
135
|
def _hook(self) -> DatabricksSqlHook:
|
|
@@ -127,41 +152,151 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
127
152
|
def _should_run_output_processing(self) -> bool:
|
|
128
153
|
return self.do_xcom_push or bool(self._output_path)
|
|
129
154
|
|
|
155
|
+
@property
|
|
156
|
+
def _is_gcs_output(self) -> bool:
|
|
157
|
+
"""Check if the output path is a GCS URI."""
|
|
158
|
+
return self._output_path.startswith("gs://") if self._output_path else False
|
|
159
|
+
|
|
160
|
+
def _parse_gcs_path(self, path: str) -> tuple[str, str]:
|
|
161
|
+
"""Parse a GCS URI into bucket and object name."""
|
|
162
|
+
parsed = urlparse(path)
|
|
163
|
+
bucket = parsed.netloc
|
|
164
|
+
object_name = parsed.path.lstrip("/")
|
|
165
|
+
return bucket, object_name
|
|
166
|
+
|
|
167
|
+
def _upload_to_gcs(self, local_path: str, gcs_path: str) -> None:
|
|
168
|
+
"""Upload a local file to GCS."""
|
|
169
|
+
try:
|
|
170
|
+
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
|
171
|
+
except ImportError:
|
|
172
|
+
raise AirflowOptionalProviderFeatureException(
|
|
173
|
+
"The 'apache-airflow-providers-google' package is required for GCS output. "
|
|
174
|
+
"Install it with: pip install apache-airflow-providers-google"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
bucket, object_name = self._parse_gcs_path(gcs_path)
|
|
178
|
+
hook = GCSHook(
|
|
179
|
+
gcp_conn_id=self._gcp_conn_id,
|
|
180
|
+
impersonation_chain=self._gcs_impersonation_chain,
|
|
181
|
+
)
|
|
182
|
+
hook.upload(
|
|
183
|
+
bucket_name=bucket,
|
|
184
|
+
object_name=object_name,
|
|
185
|
+
filename=local_path,
|
|
186
|
+
)
|
|
187
|
+
self.log.info("Uploaded output to %s", gcs_path)
|
|
188
|
+
|
|
189
|
+
def _write_parquet(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
|
|
190
|
+
"""Write data to a Parquet file."""
|
|
191
|
+
import pyarrow as pa
|
|
192
|
+
import pyarrow.parquet as pq
|
|
193
|
+
|
|
194
|
+
data: dict[str, list] = {name: [] for name in field_names}
|
|
195
|
+
for row in rows:
|
|
196
|
+
row_dict = row._asdict()
|
|
197
|
+
for name in field_names:
|
|
198
|
+
data[name].append(row_dict[name])
|
|
199
|
+
|
|
200
|
+
table = pa.Table.from_pydict(data)
|
|
201
|
+
pq.write_table(table, file_path)
|
|
202
|
+
|
|
203
|
+
def _write_avro(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
|
|
204
|
+
"""Write data to an Avro file using fastavro."""
|
|
205
|
+
try:
|
|
206
|
+
from fastavro import writer
|
|
207
|
+
except ImportError:
|
|
208
|
+
raise AirflowOptionalProviderFeatureException(
|
|
209
|
+
"The 'fastavro' package is required for Avro output. Install it with: pip install fastavro"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
data: dict[str, list] = {name: [] for name in field_names}
|
|
213
|
+
for row in rows:
|
|
214
|
+
row_dict = row._asdict()
|
|
215
|
+
for name in field_names:
|
|
216
|
+
data[name].append(row_dict[name])
|
|
217
|
+
|
|
218
|
+
schema_fields = []
|
|
219
|
+
for name in field_names:
|
|
220
|
+
sample_val = next(
|
|
221
|
+
(data[name][i] for i in range(len(data[name])) if data[name][i] is not None), None
|
|
222
|
+
)
|
|
223
|
+
if sample_val is None:
|
|
224
|
+
avro_type = ["null", "string"]
|
|
225
|
+
elif isinstance(sample_val, bool):
|
|
226
|
+
avro_type = ["null", "boolean"]
|
|
227
|
+
elif isinstance(sample_val, int):
|
|
228
|
+
avro_type = ["null", "long"]
|
|
229
|
+
elif isinstance(sample_val, float):
|
|
230
|
+
avro_type = ["null", "double"]
|
|
231
|
+
else:
|
|
232
|
+
avro_type = ["null", "string"]
|
|
233
|
+
schema_fields.append({"name": name, "type": avro_type})
|
|
234
|
+
|
|
235
|
+
avro_schema = {
|
|
236
|
+
"type": "record",
|
|
237
|
+
"name": "QueryResult",
|
|
238
|
+
"fields": schema_fields,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
records = [row._asdict() for row in rows]
|
|
242
|
+
with open(file_path, "wb") as f:
|
|
243
|
+
writer(f, avro_schema, records)
|
|
244
|
+
|
|
130
245
|
def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
|
|
131
246
|
if not self._output_path:
|
|
132
247
|
return list(zip(descriptions, results))
|
|
133
248
|
if not self._output_format:
|
|
134
249
|
raise AirflowException("Output format should be specified!")
|
|
135
|
-
|
|
250
|
+
|
|
136
251
|
last_description = descriptions[-1]
|
|
137
252
|
last_results = results[-1]
|
|
138
253
|
if last_description is None:
|
|
139
|
-
raise AirflowException("There is missing description present for the output file.
|
|
254
|
+
raise AirflowException("There is missing description present for the output file.")
|
|
140
255
|
field_names = [field[0] for field in last_description]
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
write_header = csv_params.get("header", True)
|
|
148
|
-
if "header" in csv_params:
|
|
149
|
-
del csv_params["header"]
|
|
150
|
-
writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
|
|
151
|
-
if write_header:
|
|
152
|
-
writer.writeheader()
|
|
153
|
-
for row in last_results:
|
|
154
|
-
writer.writerow(row._asdict())
|
|
155
|
-
elif self._output_format.lower() == "json":
|
|
156
|
-
with open(self._output_path, "w") as file:
|
|
157
|
-
file.write(json.dumps([row._asdict() for row in last_results]))
|
|
158
|
-
elif self._output_format.lower() == "jsonl":
|
|
159
|
-
with open(self._output_path, "w") as file:
|
|
160
|
-
for row in last_results:
|
|
161
|
-
file.write(json.dumps(row._asdict()))
|
|
162
|
-
file.write("\n")
|
|
256
|
+
|
|
257
|
+
if self._is_gcs_output:
|
|
258
|
+
suffix = f".{self._output_format.lower()}"
|
|
259
|
+
tmp_file = NamedTemporaryFile(mode="w", suffix=suffix, delete=False, newline="")
|
|
260
|
+
local_path = tmp_file.name
|
|
261
|
+
tmp_file.close()
|
|
163
262
|
else:
|
|
164
|
-
|
|
263
|
+
local_path = self._output_path
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
output_format = self._output_format.lower()
|
|
267
|
+
if output_format == "csv":
|
|
268
|
+
with open(local_path, "w", newline="") as file:
|
|
269
|
+
if self._csv_params:
|
|
270
|
+
csv_params = self._csv_params.copy()
|
|
271
|
+
else:
|
|
272
|
+
csv_params = {}
|
|
273
|
+
write_header = csv_params.pop("header", True)
|
|
274
|
+
writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
|
|
275
|
+
if write_header:
|
|
276
|
+
writer.writeheader()
|
|
277
|
+
for row in last_results:
|
|
278
|
+
writer.writerow(row._asdict())
|
|
279
|
+
elif output_format == "json":
|
|
280
|
+
with open(local_path, "w") as file:
|
|
281
|
+
file.write(json.dumps([row._asdict() for row in last_results]))
|
|
282
|
+
elif output_format == "jsonl":
|
|
283
|
+
with open(local_path, "w") as file:
|
|
284
|
+
for row in last_results:
|
|
285
|
+
file.write(json.dumps(row._asdict()))
|
|
286
|
+
file.write("\n")
|
|
287
|
+
elif output_format == "parquet":
|
|
288
|
+
self._write_parquet(local_path, field_names, last_results)
|
|
289
|
+
elif output_format == "avro":
|
|
290
|
+
self._write_avro(local_path, field_names, last_results)
|
|
291
|
+
else:
|
|
292
|
+
raise ValueError(f"Unsupported output format: '{self._output_format}'")
|
|
293
|
+
|
|
294
|
+
if self._is_gcs_output:
|
|
295
|
+
self._upload_to_gcs(local_path, self._output_path)
|
|
296
|
+
finally:
|
|
297
|
+
if self._is_gcs_output and os.path.exists(local_path):
|
|
298
|
+
os.unlink(local_path)
|
|
299
|
+
|
|
165
300
|
return list(zip(descriptions, results))
|
|
166
301
|
|
|
167
302
|
|
|
@@ -20,13 +20,17 @@ from __future__ import annotations
|
|
|
20
20
|
from typing import TYPE_CHECKING, Any
|
|
21
21
|
from urllib.parse import unquote
|
|
22
22
|
|
|
23
|
-
from sqlalchemy import select
|
|
24
|
-
|
|
25
23
|
from airflow.exceptions import TaskInstanceNotFound
|
|
26
24
|
from airflow.models.dagrun import DagRun
|
|
27
25
|
from airflow.models.taskinstance import TaskInstance, TaskInstanceKey, clear_task_instances
|
|
28
|
-
from airflow.
|
|
29
|
-
|
|
26
|
+
from airflow.providers.common.compat.sdk import (
|
|
27
|
+
AirflowException,
|
|
28
|
+
AirflowOptionalProviderFeatureException,
|
|
29
|
+
AirflowPlugin,
|
|
30
|
+
BaseOperatorLink,
|
|
31
|
+
TaskGroup,
|
|
32
|
+
XCom,
|
|
33
|
+
)
|
|
30
34
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook
|
|
31
35
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
|
|
32
36
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
@@ -70,6 +74,10 @@ if not AIRFLOW_V_3_0_PLUS:
|
|
|
70
74
|
from flask_appbuilder import BaseView
|
|
71
75
|
from flask_appbuilder.api import expose
|
|
72
76
|
|
|
77
|
+
try:
|
|
78
|
+
from sqlalchemy import select
|
|
79
|
+
except ImportError:
|
|
80
|
+
select = None # type: ignore[assignment,misc]
|
|
73
81
|
from airflow.utils.session import NEW_SESSION, provide_session
|
|
74
82
|
from airflow.www import auth
|
|
75
83
|
|
|
@@ -142,6 +150,11 @@ if not AIRFLOW_V_3_0_PLUS:
|
|
|
142
150
|
:param session: The SQLAlchemy session to use for the query. If None, uses the default session.
|
|
143
151
|
:return: The DagRun object associated with the specified DAG and run_id.
|
|
144
152
|
"""
|
|
153
|
+
if select is None:
|
|
154
|
+
raise AirflowOptionalProviderFeatureException(
|
|
155
|
+
"sqlalchemy is required for workflow repair functionality. "
|
|
156
|
+
"Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
|
|
157
|
+
)
|
|
145
158
|
if not session:
|
|
146
159
|
raise AirflowException("Session not provided.")
|
|
147
160
|
|
|
@@ -161,6 +174,11 @@ if not AIRFLOW_V_3_0_PLUS:
|
|
|
161
174
|
|
|
162
175
|
@provide_session
|
|
163
176
|
def get_task_instance(operator: BaseOperator, dttm, session: Session = NEW_SESSION) -> TaskInstance:
|
|
177
|
+
if select is None:
|
|
178
|
+
raise AirflowOptionalProviderFeatureException(
|
|
179
|
+
"sqlalchemy is required to get task instance. "
|
|
180
|
+
"Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
|
|
181
|
+
)
|
|
164
182
|
dag_id = operator.dag.dag_id
|
|
165
183
|
if hasattr(DagRun, "execution_date"): # Airflow 2.x.
|
|
166
184
|
dag_run = DagRun.find(dag_id, execution_date=dttm)[0] # type: ignore[call-arg]
|
|
@@ -280,7 +298,7 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
|
|
|
280
298
|
"""XCom key where the link is stored during task execution."""
|
|
281
299
|
return "databricks_job_run_link"
|
|
282
300
|
|
|
283
|
-
def get_link(
|
|
301
|
+
def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
|
|
284
302
|
self,
|
|
285
303
|
operator: BaseOperator,
|
|
286
304
|
dttm=None,
|
|
@@ -356,7 +374,7 @@ class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
|
|
|
356
374
|
|
|
357
375
|
name = "Repair All Failed Tasks"
|
|
358
376
|
|
|
359
|
-
def get_link(
|
|
377
|
+
def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
|
|
360
378
|
self,
|
|
361
379
|
operator,
|
|
362
380
|
dttm=None,
|
|
@@ -453,7 +471,7 @@ class WorkflowJobRepairSingleTaskLink(BaseOperatorLink, LoggingMixin):
|
|
|
453
471
|
|
|
454
472
|
name = "Repair a single task"
|
|
455
473
|
|
|
456
|
-
def get_link(
|
|
474
|
+
def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
|
|
457
475
|
self,
|
|
458
476
|
operator,
|
|
459
477
|
dttm=None,
|
|
@@ -22,8 +22,7 @@ from collections.abc import Sequence
|
|
|
22
22
|
from functools import cached_property
|
|
23
23
|
from typing import TYPE_CHECKING, Any
|
|
24
24
|
|
|
25
|
-
from airflow.
|
|
26
|
-
from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator
|
|
25
|
+
from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator, conf
|
|
27
26
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook, SQLStatementState
|
|
28
27
|
from airflow.providers.databricks.operators.databricks import DEFER_METHOD_NAME
|
|
29
28
|
from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.9.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -23,33 +23,39 @@ Classifier: Topic :: System :: Monitoring
|
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
License-File: NOTICE
|
|
25
25
|
Requires-Dist: apache-airflow>=2.11.0rc1
|
|
26
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.
|
|
26
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.13.0rc1
|
|
27
27
|
Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
|
|
28
28
|
Requires-Dist: requests>=2.32.0,<3
|
|
29
29
|
Requires-Dist: databricks-sql-connector>=4.0.0
|
|
30
|
-
Requires-Dist: databricks-sqlalchemy>=1.0.2
|
|
31
30
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
32
31
|
Requires-Dist: mergedeep>=1.3.4
|
|
33
32
|
Requires-Dist: pandas>=2.1.2; python_version <"3.13"
|
|
34
33
|
Requires-Dist: pandas>=2.2.3; python_version >="3.13"
|
|
35
34
|
Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
|
|
36
35
|
Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
|
|
36
|
+
Requires-Dist: fastavro>=1.9.0 ; extra == "avro"
|
|
37
|
+
Requires-Dist: fastavro>=1.10.0 ; extra == "avro" and (python_version>="3.12")
|
|
37
38
|
Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
|
|
38
39
|
Requires-Dist: apache-airflow-providers-fab>=2.2.0rc1 ; extra == "fab" and ( python_version < '3.13')
|
|
40
|
+
Requires-Dist: apache-airflow-providers-google>=10.24.0rc1 ; extra == "google"
|
|
39
41
|
Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
|
|
40
42
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
43
|
+
Requires-Dist: databricks-sqlalchemy>=1.0.2 ; extra == "sqlalchemy"
|
|
41
44
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
42
45
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
43
|
-
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
44
|
-
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
46
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html
|
|
47
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.9.0
|
|
45
48
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
46
49
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
47
50
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
48
51
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
52
|
+
Provides-Extra: avro
|
|
49
53
|
Provides-Extra: azure-identity
|
|
50
54
|
Provides-Extra: fab
|
|
55
|
+
Provides-Extra: google
|
|
51
56
|
Provides-Extra: openlineage
|
|
52
57
|
Provides-Extra: sdk
|
|
58
|
+
Provides-Extra: sqlalchemy
|
|
53
59
|
Provides-Extra: standard
|
|
54
60
|
|
|
55
61
|
|
|
@@ -77,7 +83,7 @@ Provides-Extra: standard
|
|
|
77
83
|
|
|
78
84
|
Package ``apache-airflow-providers-databricks``
|
|
79
85
|
|
|
80
|
-
Release: ``7.
|
|
86
|
+
Release: ``7.9.0``
|
|
81
87
|
|
|
82
88
|
|
|
83
89
|
`Databricks <https://databricks.com/>`__
|
|
@@ -90,7 +96,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
90
96
|
are in ``airflow.providers.databricks`` python package.
|
|
91
97
|
|
|
92
98
|
You can find package information and changelog for the provider
|
|
93
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
99
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
|
|
94
100
|
|
|
95
101
|
Installation
|
|
96
102
|
------------
|
|
@@ -108,11 +114,10 @@ Requirements
|
|
|
108
114
|
PIP package Version required
|
|
109
115
|
========================================== ======================================
|
|
110
116
|
``apache-airflow`` ``>=2.11.0``
|
|
111
|
-
``apache-airflow-providers-common-compat`` ``>=1.
|
|
117
|
+
``apache-airflow-providers-common-compat`` ``>=1.13.0``
|
|
112
118
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
113
119
|
``requests`` ``>=2.32.0,<3``
|
|
114
120
|
``databricks-sql-connector`` ``>=4.0.0``
|
|
115
|
-
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
116
121
|
``aiohttp`` ``>=3.9.2,<4``
|
|
117
122
|
``mergedeep`` ``>=1.3.4``
|
|
118
123
|
``pandas`` ``>=2.1.2; python_version < "3.13"``
|
|
@@ -139,6 +144,7 @@ Dependent package
|
|
|
139
144
|
================================================================================================================== =================
|
|
140
145
|
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
141
146
|
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
147
|
+
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
|
142
148
|
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
143
149
|
================================================================================================================== =================
|
|
144
150
|
|
|
@@ -153,8 +159,11 @@ Extra Dependencies
|
|
|
153
159
|
``fab`` ``apache-airflow-providers-fab>=2.2.0; python_version < '3.13'``
|
|
154
160
|
``standard`` ``apache-airflow-providers-standard``
|
|
155
161
|
``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
|
|
162
|
+
``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
|
|
163
|
+
``google`` ``apache-airflow-providers-google>=10.24.0``
|
|
164
|
+
``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
|
|
156
165
|
================== ================================================================
|
|
157
166
|
|
|
158
167
|
The changelog for the provider package can be found in the
|
|
159
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
168
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
|
|
160
169
|
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
airflow/providers/databricks/__init__.py,sha256=
|
|
1
|
+
airflow/providers/databricks/__init__.py,sha256=nAdKvPEVae_IY8zBScAW_De79Ob4OC-dGDALAno1HA0,1499
|
|
2
2
|
airflow/providers/databricks/exceptions.py,sha256=v7TD8auFp9LmyWqRtnXYG8mOit0WE3OuInUNFoC0zTo,1278
|
|
3
3
|
airflow/providers/databricks/get_provider_info.py,sha256=LfK0AwIARVh4tX5146-J2VRZwfe6GP3xjLyltA7X7iU,5738
|
|
4
4
|
airflow/providers/databricks/version_compat.py,sha256=RQbdCueLOaFZWekpQmF0BoAoJInW8EoyvJ3Ah-HbrPo,1577
|
|
5
5
|
airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
6
|
-
airflow/providers/databricks/hooks/databricks.py,sha256=
|
|
7
|
-
airflow/providers/databricks/hooks/databricks_base.py,sha256=
|
|
8
|
-
airflow/providers/databricks/hooks/databricks_sql.py,sha256=
|
|
6
|
+
airflow/providers/databricks/hooks/databricks.py,sha256=eYvrc9H3-gpGZRXBGms_DyjeFjxg-JB1lYKmdr2bwcE,29789
|
|
7
|
+
airflow/providers/databricks/hooks/databricks_base.py,sha256=ud9Mxzi86tAaGunlx0vypLR6ICapdn2qyFlT3WFjZjQ,36881
|
|
8
|
+
airflow/providers/databricks/hooks/databricks_sql.py,sha256=4LSTSYxHPJolmB91eOP_LuShyAUcjWATx6-ywUx8ASc,18149
|
|
9
9
|
airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
10
|
-
airflow/providers/databricks/operators/databricks.py,sha256=
|
|
10
|
+
airflow/providers/databricks/operators/databricks.py,sha256=NqcMOAlC_OvkrBFUaRFQa37P36Shja-plECZzg04Gl8,79258
|
|
11
11
|
airflow/providers/databricks/operators/databricks_repos.py,sha256=jOrYO_tFQJ5JBXeu7Rhrc3pcQJ4qtzSGSjGZ4GffmwU,13125
|
|
12
|
-
airflow/providers/databricks/operators/databricks_sql.py,sha256=
|
|
12
|
+
airflow/providers/databricks/operators/databricks_sql.py,sha256=9hXLFSUtdVlg45lwBTIZgY33is5-Kkgp00Cz22sI-yg,27076
|
|
13
13
|
airflow/providers/databricks/operators/databricks_workflow.py,sha256=xqk6kbFcqArHo4w9E0sVGbAkX2tuBqWdtvwiFyc9jzo,14989
|
|
14
14
|
airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
15
|
-
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=
|
|
15
|
+
airflow/providers/databricks/plugins/databricks_workflow.py,sha256=Tg4fgrMQ31NqtcjPK6D61ehSqp-Jtf3_OS4db7BDSCo,21019
|
|
16
16
|
airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
|
17
|
-
airflow/providers/databricks/sensors/databricks.py,sha256=
|
|
17
|
+
airflow/providers/databricks/sensors/databricks.py,sha256=dtVcb-Ka9R3l8y_59hdz65be3JUIVEsAodTsviwx1Mg,6199
|
|
18
18
|
airflow/providers/databricks/sensors/databricks_partition.py,sha256=AV7GoAIRnV7NEtbqUxp9WdSeN-LeIc49I3_NaI1cBiY,9910
|
|
19
19
|
airflow/providers/databricks/sensors/databricks_sql.py,sha256=ON3ulhD0I4ukJhKzDYTqw-8ZkdUuED_8QyDZbzFgHko,5603
|
|
20
20
|
airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
|
|
@@ -23,9 +23,9 @@ airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2
|
|
|
23
23
|
airflow/providers/databricks/utils/databricks.py,sha256=bnZdjQ1etvAcfgdmb8BR4i1M4YjdcDXxxznVtmur1GM,5134
|
|
24
24
|
airflow/providers/databricks/utils/mixins.py,sha256=XDA9v9BeCgMIznYPpa-X7XIqrD1mJbw4eSQUjvTsQXI,7397
|
|
25
25
|
airflow/providers/databricks/utils/openlineage.py,sha256=naqLzbdBebwDUPvDhhIa5Ey_8SgKkYqdwhzJC_51gFU,13674
|
|
26
|
-
apache_airflow_providers_databricks-7.
|
|
27
|
-
apache_airflow_providers_databricks-7.
|
|
28
|
-
apache_airflow_providers_databricks-7.
|
|
29
|
-
apache_airflow_providers_databricks-7.
|
|
30
|
-
apache_airflow_providers_databricks-7.
|
|
31
|
-
apache_airflow_providers_databricks-7.
|
|
26
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
|
|
27
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/licenses/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
|
|
28
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/licenses/NOTICE,sha256=_cWHznIoUSbLCY_KfmKqetlKlsoH0c2VBjmZjElAzuc,168
|
|
29
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
30
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/METADATA,sha256=tpOvZ91-nvm9jtuUodsVmGB4Zfv8ODG93V_sJ75WPnE,8360
|
|
31
|
+
apache_airflow_providers_databricks-7.9.0rc1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|