apache-airflow-providers-databricks 4.1.0rc1__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,3 +15,27 @@
15
15
  # KIND, either express or implied. See the License for the
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
+ #
19
+ # NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE
20
+ # OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES.
21
+ #
22
+ # IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE
23
+ # `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY
24
+ #
25
+ from __future__ import annotations
26
+
27
+ import packaging.version
28
+
29
+ __all__ = ["__version__"]
30
+
31
+ __version__ = "4.2.0"
32
+
33
+ try:
34
+ from airflow import __version__ as airflow_version
35
+ except ImportError:
36
+ from airflow.version import version as airflow_version
37
+
38
+ if packaging.version.parse(airflow_version) < packaging.version.parse("2.4.0"):
39
+ raise RuntimeError(
40
+ f"The package `apache-airflow-providers-databricks:{__version__}` requires Apache Airflow 2.4.0+" # NOQA: E501
41
+ )
@@ -29,6 +29,7 @@ def get_provider_info():
29
29
  "description": "`Databricks <https://databricks.com/>`__\n",
30
30
  "suspended": False,
31
31
  "versions": [
32
+ "4.2.0",
32
33
  "4.1.0",
33
34
  "4.0.1",
34
35
  "4.0.0",
@@ -51,8 +52,8 @@ def get_provider_info():
51
52
  "1.0.0",
52
53
  ],
53
54
  "dependencies": [
54
- "apache-airflow>=2.3.0",
55
- "apache-airflow-providers-common-sql>=1.3.1",
55
+ "apache-airflow>=2.4.0",
56
+ "apache-airflow-providers-common-sql>=1.5.0",
56
57
  "requests>=2.27,<3",
57
58
  "databricks-sql-connector>=2.0.0, <3.0.0",
58
59
  "aiohttp>=3.6.3, <4",
@@ -120,13 +121,18 @@ def get_provider_info():
120
121
  "triggers": [
121
122
  {
122
123
  "integration-name": "Databricks",
123
- "python-modules": ["airflow.providers.databricks.triggers.databricks"],
124
+ "class-names": [
125
+ "airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger"
126
+ ],
124
127
  }
125
128
  ],
126
129
  "sensors": [
127
130
  {
128
131
  "integration-name": "Databricks",
129
- "python-modules": ["airflow.providers.databricks.sensors.databricks_sql"],
132
+ "python-modules": [
133
+ "airflow.providers.databricks.sensors.databricks_sql",
134
+ "airflow.providers.databricks.sensors.databricks_partition",
135
+ ],
130
136
  }
131
137
  ],
132
138
  "connection-types": [
@@ -44,7 +44,9 @@ SUBMIT_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/submit")
44
44
  GET_RUN_ENDPOINT = ("GET", "api/2.1/jobs/runs/get")
45
45
  CANCEL_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel")
46
46
  DELETE_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/delete")
47
+ REPAIR_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/repair")
47
48
  OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "api/2.1/jobs/runs/get-output")
49
+ CANCEL_ALL_RUNS_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel-all")
48
50
 
49
51
  INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install")
50
52
  UNINSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/uninstall")
@@ -352,6 +354,15 @@ class DatabricksHook(BaseDatabricksHook):
352
354
  json = {"run_id": run_id}
353
355
  self._do_api_call(CANCEL_RUN_ENDPOINT, json)
354
356
 
357
+ def cancel_all_runs(self, job_id: int) -> None:
358
+ """
359
+ Cancels all active runs of a job. The runs are canceled asynchronously.
360
+
361
+ :param job_id: The canonical identifier of the job to cancel all runs of
362
+ """
363
+ json = {"job_id": job_id}
364
+ self._do_api_call(CANCEL_ALL_RUNS_ENDPOINT, json)
365
+
355
366
  def delete_run(self, run_id: int) -> None:
356
367
  """
357
368
  Deletes a non-active run.
@@ -361,6 +372,14 @@ class DatabricksHook(BaseDatabricksHook):
361
372
  json = {"run_id": run_id}
362
373
  self._do_api_call(DELETE_RUN_ENDPOINT, json)
363
374
 
375
+ def repair_run(self, json: dict) -> None:
376
+ """
377
+ Re-run one or more tasks.
378
+
379
+ :param json: repair a job run.
380
+ """
381
+ self._do_api_call(REPAIR_RUN_ENDPOINT, json)
382
+
364
383
  def restart_cluster(self, json: dict) -> None:
365
384
  """
366
385
  Restarts the cluster.
@@ -19,22 +19,24 @@
19
19
  from __future__ import annotations
20
20
 
21
21
  import time
22
+ import warnings
22
23
  from logging import Logger
23
24
  from typing import TYPE_CHECKING, Any, Sequence
24
25
 
25
26
  from airflow.compat.functools import cached_property
26
- from airflow.exceptions import AirflowException
27
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
27
28
  from airflow.models import BaseOperator, BaseOperatorLink, XCom
28
29
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunState
29
30
  from airflow.providers.databricks.triggers.databricks import DatabricksExecutionTrigger
30
31
  from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
31
32
 
32
33
  if TYPE_CHECKING:
33
- from airflow.models.taskinstance import TaskInstanceKey
34
+ from airflow.models.taskinstancekey import TaskInstanceKey
34
35
  from airflow.utils.context import Context
35
36
 
36
37
  DEFER_METHOD_NAME = "execute_complete"
37
38
  XCOM_RUN_ID_KEY = "run_id"
39
+ XCOM_JOB_ID_KEY = "job_id"
38
40
  XCOM_RUN_PAGE_URL_KEY = "run_page_url"
39
41
 
40
42
 
@@ -103,6 +105,9 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
103
105
  :param operator: Databricks async operator being handled
104
106
  :param context: Airflow context
105
107
  """
108
+ job_id = hook.get_job_id(operator.run_id)
109
+ if operator.do_xcom_push and context is not None:
110
+ context["ti"].xcom_push(key=XCOM_JOB_ID_KEY, value=job_id)
106
111
  if operator.do_xcom_push and context is not None:
107
112
  context["ti"].xcom_push(key=XCOM_RUN_ID_KEY, value=operator.run_id)
108
113
  log.info("Run submitted with run_id: %s", operator.run_id)
@@ -118,6 +123,10 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
118
123
  run_id=operator.run_id,
119
124
  databricks_conn_id=operator.databricks_conn_id,
120
125
  polling_period_seconds=operator.polling_period_seconds,
126
+ retry_limit=operator.databricks_retry_limit,
127
+ retry_delay=operator.databricks_retry_delay,
128
+ retry_args=operator.databricks_retry_args,
129
+ run_page_url=run_page_url,
121
130
  ),
122
131
  method_name=DEFER_METHOD_NAME,
123
132
  )
@@ -267,6 +276,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
267
276
  :param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
268
277
  :param git_source: Optional specification of a remote git repository from which
269
278
  supported task types are retrieved.
279
+ :param deferrable: Run operator in the deferrable mode.
270
280
 
271
281
  .. seealso::
272
282
  https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit
@@ -306,6 +316,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
306
316
  access_control_list: list[dict[str, str]] | None = None,
307
317
  wait_for_termination: bool = True,
308
318
  git_source: dict[str, str] | None = None,
319
+ deferrable: bool = False,
309
320
  **kwargs,
310
321
  ) -> None:
311
322
  """Creates a new ``DatabricksSubmitRunOperator``."""
@@ -317,6 +328,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
317
328
  self.databricks_retry_delay = databricks_retry_delay
318
329
  self.databricks_retry_args = databricks_retry_args
319
330
  self.wait_for_termination = wait_for_termination
331
+ self.deferrable = deferrable
320
332
  if tasks is not None:
321
333
  self.json["tasks"] = tasks
322
334
  if spark_jar_task is not None:
@@ -373,7 +385,10 @@ class DatabricksSubmitRunOperator(BaseOperator):
373
385
  def execute(self, context: Context):
374
386
  json_normalised = normalise_json_content(self.json)
375
387
  self.run_id = self._hook.submit_run(json_normalised)
376
- _handle_databricks_operator_execution(self, self._hook, self.log, context)
388
+ if self.deferrable:
389
+ _handle_deferrable_databricks_operator_execution(self, self._hook, self.log, context)
390
+ else:
391
+ _handle_databricks_operator_execution(self, self._hook, self.log, context)
377
392
 
378
393
  def on_kill(self):
379
394
  if self.run_id:
@@ -384,10 +399,23 @@ class DatabricksSubmitRunOperator(BaseOperator):
384
399
  else:
385
400
  self.log.error("Error: Task: %s with invalid run_id was requested to be cancelled.", self.task_id)
386
401
 
402
+ def execute_complete(self, context: dict | None, event: dict):
403
+ _handle_deferrable_databricks_operator_completion(event, self.log)
404
+
387
405
 
388
406
  class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
389
407
  """Deferrable version of ``DatabricksSubmitRunOperator``"""
390
408
 
409
+ def __init__(self, *args, **kwargs):
410
+ warnings.warn(
411
+ "`DatabricksSubmitRunDeferrableOperator` has been deprecated. "
412
+ "Please use `airflow.providers.databricks.operators.DatabricksSubmitRunOperator` with "
413
+ "`deferrable=True` instead.",
414
+ AirflowProviderDeprecationWarning,
415
+ stacklevel=2,
416
+ )
417
+ super().__init__(deferrable=True, *args, **kwargs)
418
+
391
419
  def execute(self, context):
392
420
  hook = self._get_hook(caller="DatabricksSubmitRunDeferrableOperator")
393
421
  json_normalised = normalise_json_content(self.json)
@@ -549,6 +577,7 @@ class DatabricksRunNowOperator(BaseOperator):
549
577
  :param databricks_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
550
578
  :param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
551
579
  :param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
580
+ :param deferrable: Run operator in the deferrable mode.
552
581
  """
553
582
 
554
583
  # Used in airflow.models.BaseOperator
@@ -578,6 +607,7 @@ class DatabricksRunNowOperator(BaseOperator):
578
607
  databricks_retry_args: dict[Any, Any] | None = None,
579
608
  do_xcom_push: bool = True,
580
609
  wait_for_termination: bool = True,
610
+ deferrable: bool = False,
581
611
  **kwargs,
582
612
  ) -> None:
583
613
  """Creates a new ``DatabricksRunNowOperator``."""
@@ -589,6 +619,7 @@ class DatabricksRunNowOperator(BaseOperator):
589
619
  self.databricks_retry_delay = databricks_retry_delay
590
620
  self.databricks_retry_args = databricks_retry_args
591
621
  self.wait_for_termination = wait_for_termination
622
+ self.deferrable = deferrable
592
623
 
593
624
  if job_id is not None:
594
625
  self.json["job_id"] = job_id
@@ -636,7 +667,10 @@ class DatabricksRunNowOperator(BaseOperator):
636
667
  self.json["job_id"] = job_id
637
668
  del self.json["job_name"]
638
669
  self.run_id = hook.run_now(self.json)
639
- _handle_databricks_operator_execution(self, hook, self.log, context)
670
+ if self.deferrable:
671
+ _handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
672
+ else:
673
+ _handle_databricks_operator_execution(self, hook, self.log, context)
640
674
 
641
675
  def on_kill(self):
642
676
  if self.run_id:
@@ -651,6 +685,16 @@ class DatabricksRunNowOperator(BaseOperator):
651
685
  class DatabricksRunNowDeferrableOperator(DatabricksRunNowOperator):
652
686
  """Deferrable version of ``DatabricksRunNowOperator``"""
653
687
 
688
+ def __init__(self, *args, **kwargs):
689
+ warnings.warn(
690
+ "`DatabricksRunNowDeferrableOperator` has been deprecated. "
691
+ "Please use `airflow.providers.databricks.operators.DatabricksRunNowOperator` with "
692
+ "`deferrable=True` instead.",
693
+ AirflowProviderDeprecationWarning,
694
+ stacklevel=2,
695
+ )
696
+ super().__init__(deferrable=True, *args, **kwargs)
697
+
654
698
  def execute(self, context):
655
699
  hook = self._get_hook(caller="DatabricksRunNowDeferrableOperator")
656
700
  self.run_id = hook.run_now(self.json)
@@ -120,6 +120,9 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
120
120
  }
121
121
  return DatabricksSqlHook(self.databricks_conn_id, **hook_params)
122
122
 
123
+ def _should_run_output_processing(self) -> bool:
124
+ return self.do_xcom_push or bool(self._output_path)
125
+
123
126
  def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
124
127
  if not self._output_path:
125
128
  return list(zip(descriptions, results))
@@ -0,0 +1,228 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ #
19
+ """This module contains Databricks sensors."""
20
+
21
+ from __future__ import annotations
22
+
23
+ from datetime import datetime
24
+ from typing import TYPE_CHECKING, Any, Callable, Sequence
25
+
26
+ from databricks.sql.utils import ParamEscaper
27
+
28
+ from airflow.compat.functools import cached_property
29
+ from airflow.exceptions import AirflowException
30
+ from airflow.providers.common.sql.hooks.sql import fetch_all_handler
31
+ from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
32
+ from airflow.sensors.base import BaseSensorOperator
33
+
34
+ if TYPE_CHECKING:
35
+ from airflow.utils.context import Context
36
+
37
+
38
+ class DatabricksPartitionSensor(BaseSensorOperator):
39
+ """
40
+ Sensor to detect the presence of table partitions in Databricks.
41
+
42
+ :param databricks_conn_id: Reference to :ref:`Databricks
43
+ connection id<howto/connection:databricks>` (templated), defaults to
44
+ DatabricksSqlHook.default_conn_name.
45
+ :param sql_warehouse_name: Optional name of Databricks SQL warehouse. If not specified, ``http_path``
46
+ must be provided as described below, defaults to None
47
+ :param http_path: Optional string specifying HTTP path of Databricks SQL warehouse or All Purpose cluster.
48
+ If not specified, it should be either specified in the Databricks connection's
49
+ extra parameters, or ``sql_warehouse_name`` must be specified.
50
+ :param session_configuration: An optional dictionary of Spark session parameters. If not specified,
51
+ it could be specified in the Databricks connection's extra parameters, defaults to None
52
+ :param http_headers: An optional list of (k, v) pairs
53
+ that will be set as HTTP headers on every request. (templated).
54
+ :param catalog: An optional initial catalog to use.
55
+ Requires Databricks Runtime version 9.0+ (templated), defaults to ""
56
+ :param schema: An optional initial schema to use.
57
+ Requires Databricks Runtime version 9.0+ (templated), defaults to "default"
58
+ :param table_name: Name of the table to check partitions.
59
+ :param partitions: Name of the partitions to check.
60
+ Example: {"date": "2023-01-03", "name": ["abc", "def"]}
61
+ :param partition_operator: Optional comparison operator for partitions, such as >=.
62
+ :param handler: Handler for DbApiHook.run() to return results, defaults to fetch_all_handler
63
+ :param client_parameters: Additional parameters internal to Databricks SQL connector parameters.
64
+ """
65
+
66
+ template_fields: Sequence[str] = (
67
+ "databricks_conn_id",
68
+ "catalog",
69
+ "schema",
70
+ "table_name",
71
+ "partitions",
72
+ "http_headers",
73
+ )
74
+
75
+ template_ext: Sequence[str] = (".sql",)
76
+ template_fields_renderers = {"sql": "sql"}
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ databricks_conn_id: str = DatabricksSqlHook.default_conn_name,
82
+ http_path: str | None = None,
83
+ sql_warehouse_name: str | None = None,
84
+ session_configuration=None,
85
+ http_headers: list[tuple[str, str]] | None = None,
86
+ catalog: str = "",
87
+ schema: str = "default",
88
+ table_name: str,
89
+ partitions: dict,
90
+ partition_operator: str = "=",
91
+ handler: Callable[[Any], Any] = fetch_all_handler,
92
+ client_parameters: dict[str, Any] | None = None,
93
+ **kwargs,
94
+ ) -> None:
95
+ self.databricks_conn_id = databricks_conn_id
96
+ self._http_path = http_path
97
+ self._sql_warehouse_name = sql_warehouse_name
98
+ self.session_config = session_configuration
99
+ self.http_headers = http_headers
100
+ self.catalog = catalog
101
+ self.schema = schema
102
+ self.caller = "DatabricksPartitionSensor"
103
+ self.partitions = partitions
104
+ self.partition_operator = partition_operator
105
+ self.table_name = table_name
106
+ self.client_parameters = client_parameters or {}
107
+ self.hook_params = kwargs.pop("hook_params", {})
108
+ self.handler = handler
109
+ self.escaper = ParamEscaper()
110
+ super().__init__(**kwargs)
111
+
112
+ def _sql_sensor(self, sql):
113
+ """Executes the supplied SQL statement using the hook object."""
114
+ hook = self._get_hook
115
+ sql_result = hook.run(
116
+ sql,
117
+ handler=self.handler if self.do_xcom_push else None,
118
+ )
119
+ self.log.debug("SQL result: %s", sql_result)
120
+ return sql_result
121
+
122
+ @cached_property
123
+ def _get_hook(self) -> DatabricksSqlHook:
124
+ """Creates and returns a DatabricksSqlHook object."""
125
+ return DatabricksSqlHook(
126
+ self.databricks_conn_id,
127
+ self._http_path,
128
+ self._sql_warehouse_name,
129
+ self.session_config,
130
+ self.http_headers,
131
+ self.catalog,
132
+ self.schema,
133
+ self.caller,
134
+ **self.client_parameters,
135
+ **self.hook_params,
136
+ )
137
+
138
+ def _check_table_partitions(self) -> list:
139
+ """
140
+ The method performs the following:
141
+ * Generates the fully qualified table name.
142
+ * Calls the generate partition query.
143
+ * Based on the result returned by the partition generation method,
144
+ the _sql_sensor method is called.
145
+ """
146
+ if self.table_name.split(".")[0] == "delta":
147
+ _fully_qualified_table_name = self.table_name
148
+ else:
149
+ _fully_qualified_table_name = str(self.catalog + "." + self.schema + "." + self.table_name)
150
+ self.log.debug("Table name generated from arguments: %s", _fully_qualified_table_name)
151
+ _joiner_val = " AND "
152
+ _prefix = f"SELECT 1 FROM {_fully_qualified_table_name} WHERE"
153
+ _suffix = " LIMIT 1"
154
+
155
+ partition_sql = self._generate_partition_query(
156
+ prefix=_prefix,
157
+ suffix=_suffix,
158
+ joiner_val=_joiner_val,
159
+ opts=self.partitions,
160
+ table_name=_fully_qualified_table_name,
161
+ escape_key=False,
162
+ )
163
+ return self._sql_sensor(partition_sql)
164
+
165
+ def _generate_partition_query(
166
+ self,
167
+ prefix: str,
168
+ suffix: str,
169
+ joiner_val: str,
170
+ table_name: str,
171
+ opts: dict[str, str] | None = None,
172
+ escape_key: bool = False,
173
+ ) -> str:
174
+ """
175
+ Queries the table for available partitions.
176
+ Generates the SQL query based on the partition data types.
177
+ * For a list, it prepares the SQL in the format:
178
+ column_name in (value1, value2,...)
179
+ * For a numeric type, it prepares the format:
180
+ column_name =(or other provided operator such as >=) value
181
+ * For a date type, it prepares the format:
182
+ column_name =(or other provided operator such as >=) value
183
+ Once the filter predicates have been generated like above, the query
184
+ is prepared to be executed using the prefix and suffix supplied, which are:
185
+ "SELECT 1 FROM {_fully_qualified_table_name} WHERE" and "LIMIT 1".
186
+ """
187
+ partition_columns = self._sql_sensor(f"DESCRIBE DETAIL {table_name}")[0][7]
188
+ self.log.debug("Partition columns: %s", partition_columns)
189
+ if len(partition_columns) < 1:
190
+ raise AirflowException(f"Table {table_name} does not have partitions")
191
+ formatted_opts = ""
192
+ if opts is not None and len(opts) > 0:
193
+ output_list = []
194
+ for partition_col, partition_value in opts.items():
195
+ if escape_key:
196
+ partition_col = self.escaper.escape_item(partition_col)
197
+ if partition_col in partition_columns:
198
+ if isinstance(partition_value, list):
199
+ output_list.append(f"""{partition_col} in {tuple(partition_value)}""")
200
+ self.log.debug("List formatting for partitions: %s", output_list)
201
+ if isinstance(partition_value, (int, float, complex)):
202
+ output_list.append(
203
+ f"""{partition_col}{self.partition_operator}{self.escaper.escape_item(partition_value)}"""
204
+ )
205
+ if isinstance(partition_value, (str, datetime)):
206
+ output_list.append(
207
+ f"""{partition_col}{self.partition_operator}{self.escaper.escape_item(partition_value)}"""
208
+ )
209
+ else:
210
+ raise AirflowException(
211
+ f"Column {partition_col} not part of table partitions: {partition_columns}"
212
+ )
213
+ else:
214
+ # Raises exception if the table does not have any partitions.
215
+ raise AirflowException("No partitions specified to check with the sensor.")
216
+ formatted_opts = f"{prefix} {joiner_val.join(output_list)} {suffix}"
217
+ self.log.debug("Formatted options: %s", formatted_opts)
218
+
219
+ return formatted_opts.strip()
220
+
221
+ def poke(self, context: Context) -> bool:
222
+ """Checks the table partitions and returns the results."""
223
+ partition_result = self._check_table_partitions()
224
+ self.log.debug("Partition sensor result: %s", partition_result)
225
+ if len(partition_result) >= 1:
226
+ return True
227
+ else:
228
+ raise AirflowException(f"Specified partition(s): {self.partitions} were not found.")
@@ -32,14 +32,36 @@ class DatabricksExecutionTrigger(BaseTrigger):
32
32
  :param databricks_conn_id: Reference to the :ref:`Databricks connection <howto/connection:databricks>`.
33
33
  :param polling_period_seconds: Controls the rate of the poll for the result of this run.
34
34
  By default, the trigger will poll every 30 seconds.
35
+ :param retry_limit: The number of times to retry the connection in case of service outages.
36
+ :param retry_delay: The number of seconds to wait between retries.
37
+ :param retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
38
+ :param run_page_url: The run page url.
35
39
  """
36
40
 
37
- def __init__(self, run_id: int, databricks_conn_id: str, polling_period_seconds: int = 30) -> None:
41
+ def __init__(
42
+ self,
43
+ run_id: int,
44
+ databricks_conn_id: str,
45
+ polling_period_seconds: int = 30,
46
+ retry_limit: int = 3,
47
+ retry_delay: int = 10,
48
+ retry_args: dict[Any, Any] | None = None,
49
+ run_page_url: str | None = None,
50
+ ) -> None:
38
51
  super().__init__()
39
52
  self.run_id = run_id
40
53
  self.databricks_conn_id = databricks_conn_id
41
54
  self.polling_period_seconds = polling_period_seconds
42
- self.hook = DatabricksHook(databricks_conn_id)
55
+ self.retry_limit = retry_limit
56
+ self.retry_delay = retry_delay
57
+ self.retry_args = retry_args
58
+ self.run_page_url = run_page_url
59
+ self.hook = DatabricksHook(
60
+ databricks_conn_id,
61
+ retry_limit=self.retry_limit,
62
+ retry_delay=self.retry_delay,
63
+ retry_args=retry_args,
64
+ )
43
65
 
44
66
  def serialize(self) -> tuple[str, dict[str, Any]]:
45
67
  return (
@@ -48,22 +70,31 @@ class DatabricksExecutionTrigger(BaseTrigger):
48
70
  "run_id": self.run_id,
49
71
  "databricks_conn_id": self.databricks_conn_id,
50
72
  "polling_period_seconds": self.polling_period_seconds,
73
+ "retry_limit": self.retry_limit,
74
+ "retry_delay": self.retry_delay,
75
+ "retry_args": self.retry_args,
76
+ "run_page_url": self.run_page_url,
51
77
  },
52
78
  )
53
79
 
54
80
  async def run(self):
55
81
  async with self.hook:
56
- run_page_url = await self.hook.a_get_run_page_url(self.run_id)
57
82
  while True:
58
83
  run_state = await self.hook.a_get_run_state(self.run_id)
59
84
  if run_state.is_terminal:
60
85
  yield TriggerEvent(
61
86
  {
62
87
  "run_id": self.run_id,
88
+ "run_page_url": self.run_page_url,
63
89
  "run_state": run_state.to_json(),
64
- "run_page_url": run_page_url,
65
90
  }
66
91
  )
67
- break
92
+ return
68
93
  else:
94
+ self.log.info(
95
+ "run-id %s in run state %s. sleeping for %s seconds",
96
+ self.run_id,
97
+ run_state,
98
+ self.polling_period_seconds,
99
+ )
69
100
  await asyncio.sleep(self.polling_period_seconds)
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 4.1.0rc1
3
+ Version: 4.2.0
4
4
  Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
5
5
  Home-page: https://airflow.apache.org/
6
6
  Download-URL: https://archive.apache.org/dist/airflow/providers
7
7
  Author: Apache Software Foundation
8
8
  Author-email: dev@airflow.apache.org
9
9
  License: Apache License 2.0
10
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.1.0/
10
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/
11
11
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
12
12
  Project-URL: Source Code, https://github.com/apache/airflow
13
13
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
@@ -31,8 +31,8 @@ Description-Content-Type: text/x-rst
31
31
  License-File: LICENSE
32
32
  License-File: NOTICE
33
33
  Requires-Dist: aiohttp (<4,>=3.6.3)
34
- Requires-Dist: apache-airflow-providers-common-sql (>=1.3.1.dev0)
35
- Requires-Dist: apache-airflow (>=2.3.0.dev0)
34
+ Requires-Dist: apache-airflow-providers-common-sql (>=1.5.0)
35
+ Requires-Dist: apache-airflow (>=2.4.0)
36
36
  Requires-Dist: databricks-sql-connector (<3.0.0,>=2.0.0)
37
37
  Requires-Dist: requests (<3,>=2.27)
38
38
  Provides-Extra: common.sql
@@ -59,7 +59,7 @@ Requires-Dist: apache-airflow-providers-common-sql ; extra == 'common.sql'
59
59
 
60
60
  Package ``apache-airflow-providers-databricks``
61
61
 
62
- Release: ``4.1.0rc1``
62
+ Release: ``4.2.0``
63
63
 
64
64
 
65
65
  `Databricks <https://databricks.com/>`__
@@ -72,7 +72,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
72
72
  are in ``airflow.providers.databricks`` python package.
73
73
 
74
74
  You can find package information and changelog for the provider
75
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.1.0/>`_.
75
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/>`_.
76
76
 
77
77
 
78
78
  Installation
@@ -90,8 +90,8 @@ Requirements
90
90
  ======================================= ===================
91
91
  PIP package Version required
92
92
  ======================================= ===================
93
- ``apache-airflow`` ``>=2.3.0``
94
- ``apache-airflow-providers-common-sql`` ``>=1.3.1``
93
+ ``apache-airflow`` ``>=2.4.0``
94
+ ``apache-airflow-providers-common-sql`` ``>=1.5.0``
95
95
  ``requests`` ``>=2.27,<3``
96
96
  ``databricks-sql-connector`` ``>=2.0.0, <3.0.0``
97
97
  ``aiohttp`` ``>=3.6.3, <4``
@@ -142,6 +142,38 @@ Dependent package
142
142
  Changelog
143
143
  ---------
144
144
 
145
+ 4.2.0
146
+ .....
147
+
148
+ .. note::
149
+ This release of provider is only available for Airflow 2.4+ as explained in the
150
+ `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
151
+
152
+ Features
153
+ ~~~~~~~~
154
+
155
+ * ``Add conditional output processing in SQL operators (#31136)``
156
+ * ``Add cancel all runs functionality to Databricks hook (#31038)``
157
+ * ``Add retry param in databrics async operator (#30744)``
158
+ * ``Add repair job functionality to databricks hook (#30786)``
159
+ * ``Add 'DatabricksPartitionSensor' (#30980)``
160
+
161
+ Misc
162
+ ~~~~
163
+
164
+ * ``Bump minimum Airflow version in providers (#30917)``
165
+ * ``Deprecate databricks async operator (#30761)``
166
+
167
+ .. Below changes are excluded from the changelog. Move them to
168
+ appropriate section above if needed. Do not delete the lines(!):
169
+ * ``Move TaskInstanceKey to a separate file (#31033)``
170
+ * ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
171
+ * ``Add full automation for min Airflow version for providers (#30994)``
172
+ * ``Add cli cmd to list the provider trigger info (#30822)``
173
+ * ``Use '__version__' in providers not 'version' (#31393)``
174
+ * ``Fixing circular import error in providers caused by airflow version check (#31379)``
175
+ * ``Prepare docs for May 2023 wave of Providers (#31252)``
176
+
145
177
  4.1.0
146
178
  .....
147
179
 
@@ -209,8 +241,9 @@ Bug Fixes
209
241
  3.4.0
210
242
  .....
211
243
 
212
- This release of provider is only available for Airflow 2.3+ as explained in the
213
- `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/README.md#support-for-providers>`_.
244
+ .. note::
245
+ This release of provider is only available for Airflow 2.3+ as explained in the
246
+ `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
214
247
 
215
248
  Misc
216
249
  ~~~~
@@ -306,8 +339,9 @@ Bug Fixes
306
339
  Breaking changes
307
340
  ~~~~~~~~~~~~~~~~
308
341
 
309
- * This release of provider is only available for Airflow 2.2+ as explained in the Apache Airflow
310
- providers support policy https://github.com/apache/airflow/blob/main/README.md#support-for-providers
342
+ .. note::
343
+ This release of provider is only available for Airflow 2.2+ as explained in the
344
+ `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
311
345
 
312
346
  Features
313
347
  ~~~~~~~~
@@ -0,0 +1,24 @@
1
+ airflow/providers/databricks/__init__.py,sha256=eHAIbBTSe40lQHvcPqHX9FzgcyiWbUdfAUt2kVeUlZ0,1535
2
+ airflow/providers/databricks/get_provider_info.py,sha256=l9sKMRQa8jcpIKRGvs0QvSXZLZkeDjNpGkkkQa4jarE,5547
3
+ airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
4
+ airflow/providers/databricks/hooks/databricks.py,sha256=7xBIg-XYTR0X4B2LAr1LMCABYqKow-45aplJWt9KfzA,16761
5
+ airflow/providers/databricks/hooks/databricks_base.py,sha256=ly679clkcHbkJJnz9_SQIe2TNVVhGgKjy4ZNlSiQ1aI,26704
6
+ airflow/providers/databricks/hooks/databricks_sql.py,sha256=o4gBduSTrqiHGuZAO1pc6P7-O5sXd_L7-LVXb9qhATc,9278
7
+ airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
8
+ airflow/providers/databricks/operators/databricks.py,sha256=IihlOQyNTfv5UxNvb7hI5v0ZcZGVn8ctYbrXb3HYaT8,33394
9
+ airflow/providers/databricks/operators/databricks_repos.py,sha256=23wyOHSMcnQQdhcsESc9n-I5v9W868NezxxU6RZCRNg,13226
10
+ airflow/providers/databricks/operators/databricks_sql.py,sha256=Gkv2pVpFAG-N_af49YBnQz3ehj0yUIVl4l3Z-rW4DTs,16793
11
+ airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
12
+ airflow/providers/databricks/sensors/databricks_partition.py,sha256=ugLBefvwwchtvOkHbX2m-rEKH3r6ZhAIuV7G1Eh83TA,10015
13
+ airflow/providers/databricks/sensors/databricks_sql.py,sha256=NP3qmEyb0RYheuShqgz0z9yeOoqRll26poOQ4oFBHi4,5558
14
+ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
15
+ airflow/providers/databricks/triggers/databricks.py,sha256=Qj9mB0bNYRY_toPEU17gxbxmPkkT3P789kCHu_T64BA,3997
16
+ airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
17
+ airflow/providers/databricks/utils/databricks.py,sha256=TxIyKfFsb9tm7pLK3a81TYa0icQMfn07KwcTMBI3W1E,2909
18
+ apache_airflow_providers_databricks-4.2.0.dist-info/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
19
+ apache_airflow_providers_databricks-4.2.0.dist-info/METADATA,sha256=jzX8b1_PiaHtavJNm-d3VYmSj0ED1DY0yzksJGx9yr0,20560
20
+ apache_airflow_providers_databricks-4.2.0.dist-info/NOTICE,sha256=m-6s2XynUxVSUIxO4rVablAZCvFq-wmLrqV91DotRBw,240
21
+ apache_airflow_providers_databricks-4.2.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
22
+ apache_airflow_providers_databricks-4.2.0.dist-info/entry_points.txt,sha256=8r3YBg2Qr0qeOALxzlooH5pXM6QmoPQuyQ75cQrkY5A,107
23
+ apache_airflow_providers_databricks-4.2.0.dist-info/top_level.txt,sha256=OeMVH5md7fr2QQWpnZoOWWxWO-0WH1IP70lpTVwopPg,8
24
+ apache_airflow_providers_databricks-4.2.0.dist-info/RECORD,,
@@ -1,23 +0,0 @@
1
- airflow/providers/databricks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
2
- airflow/providers/databricks/get_provider_info.py,sha256=dB5PZbP_JZ8qOTcMGyuyRc-dNkF24-kZ9KOXQiD2Pp4,5344
3
- airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
4
- airflow/providers/databricks/hooks/databricks.py,sha256=C2QxAwZN9idHKQ7WnulPj8F5I-BPOqQbk9vENhDKiY0,16105
5
- airflow/providers/databricks/hooks/databricks_base.py,sha256=ly679clkcHbkJJnz9_SQIe2TNVVhGgKjy4ZNlSiQ1aI,26704
6
- airflow/providers/databricks/hooks/databricks_sql.py,sha256=o4gBduSTrqiHGuZAO1pc6P7-O5sXd_L7-LVXb9qhATc,9278
7
- airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
8
- airflow/providers/databricks/operators/databricks.py,sha256=rUChbUHukgNdS8RqrBP_xsfGa302LG2liX3SyhNv7Gk,31391
9
- airflow/providers/databricks/operators/databricks_repos.py,sha256=23wyOHSMcnQQdhcsESc9n-I5v9W868NezxxU6RZCRNg,13226
10
- airflow/providers/databricks/operators/databricks_sql.py,sha256=3muhLDKufPqqoSt_0V4Z7XGm0QJAZpu14nDj1EyBhb0,16679
11
- airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
12
- airflow/providers/databricks/sensors/databricks_sql.py,sha256=NP3qmEyb0RYheuShqgz0z9yeOoqRll26poOQ4oFBHi4,5558
13
- airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
14
- airflow/providers/databricks/triggers/databricks.py,sha256=SYH1tz3yU-xxnGqPkmjLo1XAVhriggrE0Z0RLb0MojI,2811
15
- airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
16
- airflow/providers/databricks/utils/databricks.py,sha256=TxIyKfFsb9tm7pLK3a81TYa0icQMfn07KwcTMBI3W1E,2909
17
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
18
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/METADATA,sha256=4XHcKjX9Tzi-4qwpJMKa1a6HDcIdM_nzJRZqDTNP2uU,19102
19
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/NOTICE,sha256=m-6s2XynUxVSUIxO4rVablAZCvFq-wmLrqV91DotRBw,240
20
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
21
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/entry_points.txt,sha256=8r3YBg2Qr0qeOALxzlooH5pXM6QmoPQuyQ75cQrkY5A,107
22
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/top_level.txt,sha256=OeMVH5md7fr2QQWpnZoOWWxWO-0WH1IP70lpTVwopPg,8
23
- apache_airflow_providers_databricks-4.1.0rc1.dist-info/RECORD,,