PyPI - apache-airflow-providers-databricks - Versions diffs - 6.6.0__tar.gz → 6.7.0__tar.gz - Mend

apache-airflow-providers-databricks 6.6.0tar.gz → 6.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (22) hide show

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: apache-airflow-providers-databricks
-Version: 6.6.0
+Version: 6.7.0
 Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
 Keywords: airflow-provider,databricks,airflow,integration
 Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -30,15 +30,17 @@ Requires-Dist: pandas>=1.5.3,<2.2;python_version<"3.9"
 Requires-Dist: pandas>=2.1.2,<2.2;python_version>="3.9"
 Requires-Dist: pyarrow>=14.0.1
 Requires-Dist: requests>=2.27.0,<3
+Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
 Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
 Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
 Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
-Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/changelog.html
-Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0
+Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/changelog.html
+Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0
 Project-URL: Slack Chat, https://s.apache.org/airflow-slack
 Project-URL: Source Code, https://github.com/apache/airflow
 Project-URL: Twitter, https://twitter.com/ApacheAirflow
 Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
+Provides-Extra: azure-identity
 Provides-Extra: common.sql
 Provides-Extra: sdk
@@ -86,7 +88,7 @@ Provides-Extra: sdk
 Package ``apache-airflow-providers-databricks``
-Release: ``6.6.0``
+Release: ``6.7.0``
 `Databricks <https://databricks.com/>`__
@@ -99,7 +101,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
 are in ``airflow.providers.databricks`` python package.
 You can find package information and changelog for the provider
-in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/>`_.
+in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/>`_.
 Installation
 ------------
@@ -147,4 +149,4 @@ Dependent package
 ============================================================================================================  ==============
 The changelog for the provider package can be found in the
-`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/changelog.html>`_.
+`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/changelog.html>`_.

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/README.rst RENAMED Viewed

@@ -42,7 +42,7 @@
 Package ``apache-airflow-providers-databricks``
-Release: ``6.6.0``
+Release: ``6.7.0``
 `Databricks <https://databricks.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
 are in ``airflow.providers.databricks`` python package.
 You can find package information and changelog for the provider
-in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/>`_.
+in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/>`_.
 Installation
 ------------
@@ -103,4 +103,4 @@ Dependent package
 ============================================================================================================  ==============
 The changelog for the provider package can be found in the
-`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/changelog.html>`_.
+`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/changelog.html>`_.

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/__init__.py RENAMED Viewed

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
 __all__ = ["__version__"]
-__version__ = "6.6.0"
+__version__ = "6.7.0"
 if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
     "2.7.0"

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/get_provider_info.py RENAMED Viewed

@@ -28,8 +28,9 @@ def get_provider_info():
         "name": "Databricks",
         "description": "`Databricks <https://databricks.com/>`__\n",
         "state": "ready",
-        "source-date-epoch": 1718604145,
+        "source-date-epoch": 1720422668,
         "versions": [
+            "6.7.0",
             "6.6.0",
             "6.5.0",
             "6.4.0",
@@ -85,7 +86,12 @@ def get_provider_info():
                 "name": "sdk",
                 "description": "Install Databricks SDK",
                 "dependencies": ["databricks-sdk==0.10.0"],
-            }
+            },
+            {
+                "name": "azure-identity",
+                "description": "Install Azure Identity client library",
+                "dependencies": ["azure-identity>=1.3.1"],
+            },
         ],
         "devel-dependencies": ["deltalake>=0.12.0"],
         "integrations": [

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/hooks/databricks.py RENAMED Viewed

@@ -65,7 +65,8 @@ SPARK_VERSIONS_ENDPOINT = ("GET", "api/2.0/clusters/spark-versions")
 class RunLifeCycleState(Enum):
-    """Enum for the run life cycle state concept of Databricks runs.
+    """
+    Enum for the run life cycle state concept of Databricks runs.
     See more information at: https://docs.databricks.com/api/azure/workspace/jobs/listruns#runs-state-life_cycle_state
     """
@@ -215,7 +216,8 @@ class DatabricksHook(BaseDatabricksHook):
         super().__init__(databricks_conn_id, timeout_seconds, retry_limit, retry_delay, retry_args, caller)
     def create_job(self, json: dict) -> int:
-        """Call the ``api/2.1/jobs/create`` endpoint.
+        """
+        Call the ``api/2.1/jobs/create`` endpoint.
         :param json: The data used in the body of the request to the ``create`` endpoint.
         :return: the job_id as an int
@@ -224,7 +226,8 @@ class DatabricksHook(BaseDatabricksHook):
         return response["job_id"]
     def reset_job(self, job_id: str, json: dict) -> None:
-        """Call the ``api/2.1/jobs/reset`` endpoint.
+        """
+        Call the ``api/2.1/jobs/reset`` endpoint.
         :param json: The data used in the new_settings of the request to the ``reset`` endpoint.
         """

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/hooks/databricks_base.py RENAMED Viewed

@@ -47,17 +47,13 @@ from tenacity import (
 )
 from airflow import __version__
-from airflow.exceptions import AirflowException
+from airflow.exceptions import AirflowException, AirflowOptionalProviderFeatureException
 from airflow.hooks.base import BaseHook
 from airflow.providers_manager import ProvidersManager
 if TYPE_CHECKING:
     from airflow.models import Connection
-# https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/aad/service-prin-aad-token#--get-an-azure-active-directory-access-token
-# https://docs.microsoft.com/en-us/graph/deployments#app-registration-and-token-service-root-endpoints
-AZURE_DEFAULT_AD_ENDPOINT = "https://login.microsoftonline.com"
-AZURE_TOKEN_SERVICE_URL = "{}/{}/oauth2/token"
 # https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token
 AZURE_METADATA_SERVICE_TOKEN_URL = "http://169.254.169.254/metadata/identity/oauth2/token"
 AZURE_METADATA_SERVICE_INSTANCE_URL = "http://169.254.169.254/metadata/instance"
@@ -301,46 +297,29 @@ class BaseDatabricksHook(BaseHook):
         self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
         try:
+            from azure.identity import ClientSecretCredential, ManagedIdentityCredential
             for attempt in self._get_retry_object():
                 with attempt:
                     if self.databricks_conn.extra_dejson.get("use_azure_managed_identity", False):
-                        params = {
-                            "api-version": "2018-02-01",
-                            "resource": resource,
-                        }
-                        resp = requests.get(
-                            AZURE_METADATA_SERVICE_TOKEN_URL,
-                            params=params,
-                            headers={**self.user_agent_header, "Metadata": "true"},
-                            timeout=self.token_timeout_seconds,
-                        )
+                        token = ManagedIdentityCredential().get_token(f"{resource}/.default")
                     else:
-                        tenant_id = self.databricks_conn.extra_dejson["azure_tenant_id"]
-                        data = {
-                            "grant_type": "client_credentials",
-                            "client_id": self.databricks_conn.login,
-                            "resource": resource,
-                            "client_secret": self.databricks_conn.password,
-                        }
-                        azure_ad_endpoint = self.databricks_conn.extra_dejson.get(
-                            "azure_ad_endpoint", AZURE_DEFAULT_AD_ENDPOINT
-                        )
-                        resp = requests.post(
-                            AZURE_TOKEN_SERVICE_URL.format(azure_ad_endpoint, tenant_id),
-                            data=data,
-                            headers={
-                                **self.user_agent_header,
-                                "Content-Type": "application/x-www-form-urlencoded",
-                            },
-                            timeout=self.token_timeout_seconds,
+                        credential = ClientSecretCredential(
+                            client_id=self.databricks_conn.login,
+                            client_secret=self.databricks_conn.password,
+                            tenant_id=self.databricks_conn.extra_dejson["azure_tenant_id"],
                         )
-                    resp.raise_for_status()
-                    jsn = resp.json()
+                        token = credential.get_token(f"{resource}/.default")
+                    jsn = {
+                        "access_token": token.token,
+                        "token_type": "Bearer",
+                        "expires_on": token.expires_on,
+                    }
                     self._is_oauth_token_valid(jsn)
                     self.oauth_tokens[resource] = jsn
                     break
+        except ImportError as e:
+            raise AirflowOptionalProviderFeatureException(e)
         except RetryError:
             raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
         except requests_exceptions.HTTPError as e:
@@ -362,47 +341,32 @@ class BaseDatabricksHook(BaseHook):
         self.log.info("Existing AAD token is expired, or going to expire soon. Refreshing...")
         try:
+            from azure.identity.aio import (
+                ClientSecretCredential as AsyncClientSecretCredential,
+                ManagedIdentityCredential as AsyncManagedIdentityCredential,
+            )
             async for attempt in self._a_get_retry_object():
                 with attempt:
                     if self.databricks_conn.extra_dejson.get("use_azure_managed_identity", False):
-                        params = {
-                            "api-version": "2018-02-01",
-                            "resource": resource,
-                        }
-                        async with self._session.get(
-                            url=AZURE_METADATA_SERVICE_TOKEN_URL,
-                            params=params,
-                            headers={**self.user_agent_header, "Metadata": "true"},
-                            timeout=self.token_timeout_seconds,
-                        ) as resp:
-                            resp.raise_for_status()
-                            jsn = await resp.json()
+                        token = await AsyncManagedIdentityCredential().get_token(f"{resource}/.default")
                     else:
-                        tenant_id = self.databricks_conn.extra_dejson["azure_tenant_id"]
-                        data = {
-                            "grant_type": "client_credentials",
-                            "client_id": self.databricks_conn.login,
-                            "resource": resource,
-                            "client_secret": self.databricks_conn.password,
-                        }
-                        azure_ad_endpoint = self.databricks_conn.extra_dejson.get(
-                            "azure_ad_endpoint", AZURE_DEFAULT_AD_ENDPOINT
+                        credential = AsyncClientSecretCredential(
+                            client_id=self.databricks_conn.login,
+                            client_secret=self.databricks_conn.password,
+                            tenant_id=self.databricks_conn.extra_dejson["azure_tenant_id"],
                         )
-                        async with self._session.post(
-                            url=AZURE_TOKEN_SERVICE_URL.format(azure_ad_endpoint, tenant_id),
-                            data=data,
-                            headers={
-                                **self.user_agent_header,
-                                "Content-Type": "application/x-www-form-urlencoded",
-                            },
-                            timeout=self.token_timeout_seconds,
-                        ) as resp:
-                            resp.raise_for_status()
-                            jsn = await resp.json()
+                        token = await credential.get_token(f"{resource}/.default")
+                    jsn = {
+                        "access_token": token.token,
+                        "token_type": "Bearer",
+                        "expires_on": token.expires_on,
+                    }
                     self._is_oauth_token_valid(jsn)
                     self.oauth_tokens[resource] = jsn
                     break
+        except ImportError as e:
+            raise AirflowOptionalProviderFeatureException(e)
         except RetryError:
             raise AirflowException(f"API requests to Azure failed {self.retry_limit} times. Giving up.")
         except aiohttp.ClientResponseError as err:

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/hooks/databricks_sql.py RENAMED Viewed

@@ -50,7 +50,8 @@ T = TypeVar("T")
 class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
-    """Hook to interact with Databricks SQL.
+    """
+    Hook to interact with Databricks SQL.
     :param databricks_conn_id: Reference to the
         :ref:`Databricks connection <howto/connection:databricks>`.

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/operators/databricks.py RENAMED Viewed

@@ -36,7 +36,7 @@ from airflow.providers.databricks.operators.databricks_workflow import (
     WorkflowRunMetadata,
 )
 from airflow.providers.databricks.triggers.databricks import DatabricksExecutionTrigger
-from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
+from airflow.providers.databricks.utils.databricks import _normalise_json_content, validate_trigger_event
 if TYPE_CHECKING:
     from airflow.models.taskinstancekey import TaskInstanceKey
@@ -182,6 +182,17 @@ def _handle_deferrable_databricks_operator_completion(event: dict, log: Logger)
     raise AirflowException(error_message)
+def _handle_overridden_json_params(operator):
+    for key, value in operator.overridden_json_params.items():
+        if value is not None:
+            operator.json[key] = value
+def normalise_json_content(operator):
+    if operator.json:
+        operator.json = _normalise_json_content(operator.json)
 class DatabricksJobRunLink(BaseOperatorLink):
     """Constructs a link to monitor a Databricks Job Run."""
@@ -197,7 +208,8 @@ class DatabricksJobRunLink(BaseOperatorLink):
 class DatabricksCreateJobsOperator(BaseOperator):
-    """Creates (or resets) a Databricks job using the API endpoint.
+    """
+    Creates (or resets) a Databricks job using the API endpoint.
     .. seealso::
         https://docs.databricks.com/api/workspace/jobs/create
@@ -284,34 +296,21 @@ class DatabricksCreateJobsOperator(BaseOperator):
         self.databricks_retry_limit = databricks_retry_limit
         self.databricks_retry_delay = databricks_retry_delay
         self.databricks_retry_args = databricks_retry_args
-        if name is not None:
-            self.json["name"] = name
-        if description is not None:
-            self.json["description"] = description
-        if tags is not None:
-            self.json["tags"] = tags
-        if tasks is not None:
-            self.json["tasks"] = tasks
-        if job_clusters is not None:
-            self.json["job_clusters"] = job_clusters
-        if email_notifications is not None:
-            self.json["email_notifications"] = email_notifications
-        if webhook_notifications is not None:
-            self.json["webhook_notifications"] = webhook_notifications
-        if notification_settings is not None:
-            self.json["notification_settings"] = notification_settings
-        if timeout_seconds is not None:
-            self.json["timeout_seconds"] = timeout_seconds
-        if schedule is not None:
-            self.json["schedule"] = schedule
-        if max_concurrent_runs is not None:
-            self.json["max_concurrent_runs"] = max_concurrent_runs
-        if git_source is not None:
-            self.json["git_source"] = git_source
-        if access_control_list is not None:
-            self.json["access_control_list"] = access_control_list
-        if self.json:
-            self.json = normalise_json_content(self.json)
+        self.overridden_json_params = {
+            "name": name,
+            "description": description,
+            "tags": tags,
+            "tasks": tasks,
+            "job_clusters": job_clusters,
+            "email_notifications": email_notifications,
+            "webhook_notifications": webhook_notifications,
+            "notification_settings": notification_settings,
+            "timeout_seconds": timeout_seconds,
+            "schedule": schedule,
+            "max_concurrent_runs": max_concurrent_runs,
+            "git_source": git_source,
+            "access_control_list": access_control_list,
+        }
     @cached_property
     def _hook(self):
@@ -323,16 +322,24 @@ class DatabricksCreateJobsOperator(BaseOperator):
             caller="DatabricksCreateJobsOperator",
         )
-    def execute(self, context: Context) -> int:
+    def _setup_and_validate_json(self):
+        _handle_overridden_json_params(self)
         if "name" not in self.json:
             raise AirflowException("Missing required parameter: name")
+        normalise_json_content(self)
+    def execute(self, context: Context) -> int:
+        self._setup_and_validate_json()
         job_id = self._hook.find_job_id_by_name(self.json["name"])
         if job_id is None:
             return self._hook.create_job(self.json)
         self._hook.reset_job(str(job_id), self.json)
         if (access_control_list := self.json.get("access_control_list")) is not None:
             acl_json = {"access_control_list": access_control_list}
-            self._hook.update_job_permission(job_id, normalise_json_content(acl_json))
+            self._hook.update_job_permission(job_id, _normalise_json_content(acl_json))
         return job_id
@@ -505,43 +512,23 @@ class DatabricksSubmitRunOperator(BaseOperator):
         self.databricks_retry_args = databricks_retry_args
         self.wait_for_termination = wait_for_termination
         self.deferrable = deferrable
-        if tasks is not None:
-            self.json["tasks"] = tasks
-        if spark_jar_task is not None:
-            self.json["spark_jar_task"] = spark_jar_task
-        if notebook_task is not None:
-            self.json["notebook_task"] = notebook_task
-        if spark_python_task is not None:
-            self.json["spark_python_task"] = spark_python_task
-        if spark_submit_task is not None:
-            self.json["spark_submit_task"] = spark_submit_task
-        if pipeline_task is not None:
-            self.json["pipeline_task"] = pipeline_task
-        if dbt_task is not None:
-            self.json["dbt_task"] = dbt_task
-        if new_cluster is not None:
-            self.json["new_cluster"] = new_cluster
-        if existing_cluster_id is not None:
-            self.json["existing_cluster_id"] = existing_cluster_id
-        if libraries is not None:
-            self.json["libraries"] = libraries
-        if run_name is not None:
-            self.json["run_name"] = run_name
-        if timeout_seconds is not None:
-            self.json["timeout_seconds"] = timeout_seconds
-        if "run_name" not in self.json:
-            self.json["run_name"] = run_name or kwargs["task_id"]
-        if idempotency_token is not None:
-            self.json["idempotency_token"] = idempotency_token
-        if access_control_list is not None:
-            self.json["access_control_list"] = access_control_list
-        if git_source is not None:
-            self.json["git_source"] = git_source
-        if "dbt_task" in self.json and "git_source" not in self.json:
-            raise AirflowException("git_source is required for dbt_task")
-        if pipeline_task is not None and "pipeline_id" in pipeline_task and "pipeline_name" in pipeline_task:
-            raise AirflowException("'pipeline_name' is not allowed in conjunction with 'pipeline_id'")
+        self.overridden_json_params = {
+            "tasks": tasks,
+            "spark_jar_task": spark_jar_task,
+            "notebook_task": notebook_task,
+            "spark_python_task": spark_python_task,
+            "spark_submit_task": spark_submit_task,
+            "pipeline_task": pipeline_task,
+            "dbt_task": dbt_task,
+            "new_cluster": new_cluster,
+            "existing_cluster_id": existing_cluster_id,
+            "libraries": libraries,
+            "run_name": run_name,
+            "timeout_seconds": timeout_seconds,
+            "idempotency_token": idempotency_token,
+            "access_control_list": access_control_list,
+            "git_source": git_source,
+        }
         # This variable will be used in case our task gets killed.
         self.run_id: int | None = None
@@ -560,7 +547,25 @@ class DatabricksSubmitRunOperator(BaseOperator):
             caller=caller,
         )
+    def _setup_and_validate_json(self):
+        _handle_overridden_json_params(self)
+        if "run_name" not in self.json or self.json["run_name"] is None:
+            self.json["run_name"] = self.task_id
+        if "dbt_task" in self.json and "git_source" not in self.json:
+            raise AirflowException("git_source is required for dbt_task")
+        if (
+            "pipeline_task" in self.json
+            and "pipeline_id" in self.json["pipeline_task"]
+            and "pipeline_name" in self.json["pipeline_task"]
+        ):
+            raise AirflowException("'pipeline_name' is not allowed in conjunction with 'pipeline_id'")
+        normalise_json_content(self)
     def execute(self, context: Context):
+        self._setup_and_validate_json()
         if (
             "pipeline_task" in self.json
             and self.json["pipeline_task"].get("pipeline_id") is None
@@ -570,7 +575,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
             pipeline_name = self.json["pipeline_task"]["pipeline_name"]
             self.json["pipeline_task"]["pipeline_id"] = self._hook.find_pipeline_id_by_name(pipeline_name)
             del self.json["pipeline_task"]["pipeline_name"]
-        json_normalised = normalise_json_content(self.json)
+        json_normalised = _normalise_json_content(self.json)
         self.run_id = self._hook.submit_run(json_normalised)
         if self.deferrable:
             _handle_deferrable_databricks_operator_execution(self, self._hook, self.log, context)
@@ -606,7 +611,7 @@ class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
     def execute(self, context):
         hook = self._get_hook(caller="DatabricksSubmitRunDeferrableOperator")
-        json_normalised = normalise_json_content(self.json)
+        json_normalised = _normalise_json_content(self.json)
         self.run_id = hook.submit_run(json_normalised)
         _handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
@@ -806,27 +811,16 @@ class DatabricksRunNowOperator(BaseOperator):
         self.deferrable = deferrable
         self.repair_run = repair_run
         self.cancel_previous_runs = cancel_previous_runs
-        if job_id is not None:
-            self.json["job_id"] = job_id
-        if job_name is not None:
-            self.json["job_name"] = job_name
-        if "job_id" in self.json and "job_name" in self.json:
-            raise AirflowException("Argument 'job_name' is not allowed with argument 'job_id'")
-        if notebook_params is not None:
-            self.json["notebook_params"] = notebook_params
-        if python_params is not None:
-            self.json["python_params"] = python_params
-        if python_named_params is not None:
-            self.json["python_named_params"] = python_named_params
-        if jar_params is not None:
-            self.json["jar_params"] = jar_params
-        if spark_submit_params is not None:
-            self.json["spark_submit_params"] = spark_submit_params
-        if idempotency_token is not None:
-            self.json["idempotency_token"] = idempotency_token
-        if self.json:
-            self.json = normalise_json_content(self.json)
+        self.overridden_json_params = {
+            "job_id": job_id,
+            "job_name": job_name,
+            "notebook_params": notebook_params,
+            "python_params": python_params,
+            "python_named_params": python_named_params,
+            "jar_params": jar_params,
+            "spark_submit_params": spark_submit_params,
+            "idempotency_token": idempotency_token,
+        }
         # This variable will be used in case our task gets killed.
         self.run_id: int | None = None
         self.do_xcom_push = do_xcom_push
@@ -844,7 +838,16 @@ class DatabricksRunNowOperator(BaseOperator):
             caller=caller,
         )
+    def _setup_and_validate_json(self):
+        _handle_overridden_json_params(self)
+        if "job_id" in self.json and "job_name" in self.json:
+            raise AirflowException("Argument 'job_name' is not allowed with argument 'job_id'")
+        normalise_json_content(self)
     def execute(self, context: Context):
+        self._setup_and_validate_json()
         hook = self._hook
         if "job_name" in self.json:
             job_id = hook.find_job_id_by_name(self.json["job_name"])

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/airflow/providers/databricks/utils/databricks.py RENAMED Viewed

@@ -21,7 +21,7 @@ from airflow.exceptions import AirflowException
 from airflow.providers.databricks.hooks.databricks import RunState
-def normalise_json_content(content, json_path: str = "json") -> str | bool | list | dict:
+def _normalise_json_content(content, json_path: str = "json") -> str | bool | list | dict:
     """
     Normalize content or all values of content if it is a dict to a string.
@@ -33,7 +33,7 @@ def normalise_json_content(content, json_path: str = "json") -> str | bool | lis
     The only one exception is when we have boolean values, they can not be converted
     to string type because databricks does not understand 'True' or 'False' values.
     """
-    normalise = normalise_json_content
+    normalise = _normalise_json_content
     if isinstance(content, (str, bool)):
         return content
     elif isinstance(content, (int, float)):

{apache_airflow_providers_databricks-6.6.0 → apache_airflow_providers_databricks-6.7.0}/pyproject.toml RENAMED Viewed

@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
 [project]
 name = "apache-airflow-providers-databricks"
-version = "6.6.0"
+version = "6.7.0"
 description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
 readme = "README.rst"
 authors = [
@@ -68,8 +68,8 @@ dependencies = [
 ]
 [project.urls]
-"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0"
-"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.6.0/changelog.html"
+"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0"
+"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-databricks/6.7.0/changelog.html"
 "Bug Tracker" = "https://github.com/apache/airflow/issues"
 "Source Code" = "https://github.com/apache/airflow"
 "Slack Chat" = "https://s.apache.org/airflow-slack"
@@ -85,6 +85,9 @@ provider_info = "airflow.providers.databricks.get_provider_info:get_provider_inf
 "sdk" = [
     "databricks-sdk==0.10.0",
 ]
+"azure-identity" = [
+    "azure-identity>=1.3.1",
+]
 [tool.flit.module]
 name = "airflow.providers.databricks"