PyPI - apache-airflow-providers-google - Versions diffs - 17.1.0rc1__py3-none-any.whl → 17.2.0rc1__py3-none-any.whl - Mend

apache-airflow-providers-google 17.1.0rc1py3-none-any.whl → 17.2.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

airflow/providers/google/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
 __all__ = ["__version__"]
-__version__ = "17.1.0"
+__version__ = "17.2.0"
 if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
     "2.10.0"

airflow/providers/google/cloud/hooks/bigquery.py CHANGED Viewed

@@ -1743,14 +1743,6 @@ class BigQueryCursor(BigQueryBaseCursor):
                 f" Please only use one or more of the following options: {allowed_schema_update_options}"
             )
-        if schema_update_options:
-            if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
-                raise ValueError(
-                    "schema_update_options is only "
-                    "allowed if write_disposition is "
-                    "'WRITE_APPEND' or 'WRITE_TRUNCATE'."
-                )
         if destination_dataset_table:
             destination_project, destination_dataset, destination_table = self.hook.split_tablename(
                 table_input=destination_dataset_table, default_project_id=self.project_id

airflow/providers/google/cloud/hooks/cloud_composer.py CHANGED Viewed

@@ -642,7 +642,12 @@ class CloudComposerAsyncHook(GoogleBaseHook):
                 self.log.exception("Exception occurred while polling CMD result")
                 raise AirflowException(ex)
-            result_dict = PollAirflowCommandResponse.to_dict(result)
+            try:
+                result_dict = PollAirflowCommandResponse.to_dict(result)
+            except Exception as ex:
+                self.log.exception("Exception occurred while transforming PollAirflowCommandResponse")
+                raise AirflowException(ex)
             if result_dict["output_end"]:
                 return result_dict

airflow/providers/google/cloud/hooks/cloud_sql.py CHANGED Viewed

@@ -1175,9 +1175,9 @@ class CloudSQLDatabaseHook(BaseHook):
                 raise ValueError("The db_hook should be set")
             if not isinstance(self.db_hook, PostgresHook):
                 raise ValueError(f"The db_hook should be PostgresHook and is {type(self.db_hook)}")
-            conn = getattr(self.db_hook, "conn")
-            if conn and conn.notices:
-                for output in self.db_hook.conn.notices:
+            conn = getattr(self.db_hook, "conn", None)
+            if conn and hasattr(conn, "notices") and conn.notices:
+                for output in conn.notices:
                     self.log.info(output)
     def reserve_free_tcp_port(self) -> None:

airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py CHANGED Viewed

@@ -36,13 +36,13 @@ from copy import deepcopy
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any
-from google.api_core import protobuf_helpers
 from google.cloud.storage_transfer_v1 import (
     ListTransferJobsRequest,
     StorageTransferServiceAsyncClient,
     TransferJob,
     TransferOperation,
 )
+from google.protobuf.json_format import MessageToDict
 from googleapiclient.discovery import Resource, build
 from googleapiclient.errors import HttpError
@@ -603,7 +603,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
         self,
         request_filter: dict | None = None,
         **kwargs,
-    ) -> list[TransferOperation]:
+    ) -> list[dict[str, Any]]:
         """
         Get a transfer operation in Google Storage Transfer Service.
@@ -660,7 +660,12 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
             )
         transfer_operations = [
-            protobuf_helpers.from_any_pb(TransferOperation, op.metadata) for op in operations
+            MessageToDict(
+                getattr(op, "_pb", op),
+                preserving_proto_field_name=True,
+                use_integers_for_enums=True,
+            )
+            for op in operations
         ]
         return transfer_operations
@@ -677,7 +682,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
     @staticmethod
     async def operations_contain_expected_statuses(
-        operations: list[TransferOperation], expected_statuses: set[str] | str
+        operations: list[dict[str, Any]], expected_statuses: set[str] | str
     ) -> bool:
         """
         Check whether an operation exists with the expected status.
@@ -696,7 +701,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
         if not operations:
             return False
-        current_statuses = {operation.status.name for operation in operations}
+        current_statuses = {TransferOperation.Status(op["metadata"]["status"]).name for op in operations}
         if len(current_statuses - expected_statuses_set) != len(current_statuses):
             return True

airflow/providers/google/cloud/hooks/dataflow.py CHANGED Viewed

@@ -56,8 +56,8 @@ from airflow.providers.google.common.hooks.base_google import (
     GoogleBaseAsyncHook,
     GoogleBaseHook,
 )
+from airflow.providers.google.version_compat import timeout
 from airflow.utils.log.logging_mixin import LoggingMixin
-from airflow.utils.timeout import timeout
 if TYPE_CHECKING:
     from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager

airflow/providers/google/cloud/hooks/spanner.py CHANGED Viewed

@@ -19,6 +19,7 @@
 from __future__ import annotations
+from collections import OrderedDict
 from collections.abc import Callable, Sequence
 from typing import TYPE_CHECKING, NamedTuple
@@ -388,7 +389,7 @@ class SpannerHook(GoogleBaseHook, DbApiHook):
         database_id: str,
         queries: list[str],
         project_id: str,
-    ) -> None:
+    ) -> list[int]:
         """
         Execute an arbitrary DML query (INSERT, UPDATE, DELETE).
@@ -398,12 +399,31 @@ class SpannerHook(GoogleBaseHook, DbApiHook):
         :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner
             database. If set to None or missing, the default project_id from the Google Cloud connection
             is used.
+        :return: list of numbers of affected rows by DML query
         """
-        self._get_client(project_id=project_id).instance(instance_id=instance_id).database(
-            database_id=database_id
-        ).run_in_transaction(lambda transaction: self._execute_sql_in_transaction(transaction, queries))
+        db = (
+            self._get_client(project_id=project_id)
+            .instance(instance_id=instance_id)
+            .database(database_id=database_id)
+        )
+        def _tx_runner(tx: Transaction) -> dict[str, int]:
+            return self._execute_sql_in_transaction(tx, queries)
+        result = db.run_in_transaction(_tx_runner)
+        result_rows_count_per_query = []
+        for i, (sql, rc) in enumerate(result.items(), start=1):
+            if not sql.startswith("SELECT"):
+                preview = sql if len(sql) <= 300 else sql[:300] + "…"
+                self.log.info("[DML %d/%d] affected rows=%d | %s", i, len(result), rc, preview)
+                result_rows_count_per_query.append(rc)
+        return result_rows_count_per_query
     @staticmethod
-    def _execute_sql_in_transaction(transaction: Transaction, queries: list[str]):
+    def _execute_sql_in_transaction(transaction: Transaction, queries: list[str]) -> dict[str, int]:
+        counts: OrderedDict[str, int] = OrderedDict()
         for sql in queries:
-            transaction.execute_update(sql)
+            rc = transaction.execute_update(sql)
+            counts[sql] = rc
+        return counts

airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py CHANGED Viewed

@@ -350,6 +350,9 @@ class GenerativeModelHook(GoogleBaseHook):
         :param generation_config: Optional. Generation configuration settings.
         :param safety_settings: Optional. Per request settings for blocking unsafe content.
         """
+        # During run of the system test it was found out that names from xcom, e.g. 3402922389 can be
+        # treated as int and throw an error TypeError: expected string or bytes-like object, got 'int'
+        cached_content_name = str(cached_content_name)
         vertexai.init(project=project_id, location=location, credentials=self.get_credentials())
         cached_context_model = self.get_cached_context_model(cached_content_name=cached_content_name)

airflow/providers/google/cloud/openlineage/utils.py CHANGED Viewed

@@ -214,7 +214,20 @@ def extract_ds_name_from_gcs_path(path: str) -> str:
 def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
     """Get facets from BigQuery table object."""
+    return get_facets_from_bq_table_for_given_fields(table, selected_fields=None)
+def get_facets_from_bq_table_for_given_fields(
+    table: Table, selected_fields: list[str] | None
+) -> dict[str, DatasetFacet]:
+    """
+    Get facets from BigQuery table object for selected fields only.
+    If selected_fields is None, include all fields.
+    """
     facets: dict[str, DatasetFacet] = {}
+    selected_fields_set = set(selected_fields) if selected_fields else None
     if table.schema:
         facets["schema"] = SchemaDatasetFacet(
             fields=[
@@ -222,6 +235,7 @@ def get_facets_from_bq_table(table: Table) -> dict[str, DatasetFacet]:
                     name=schema_field.name, type=schema_field.field_type, description=schema_field.description
                 )
                 for schema_field in table.schema
+                if selected_fields_set is None or schema_field.name in selected_fields_set
             ]
         )
     if table.description:

airflow/providers/google/cloud/operators/bigquery.py CHANGED Viewed

@@ -2370,11 +2370,19 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
         if self.project_id is None:
             self.project_id = hook.project_id
+        # Handle missing logical_date. Example: asset-triggered DAGs (Airflow 3)
+        logical_date = context.get("logical_date")
+        if logical_date is None:
+            # Use dag_run.run_after as fallback when logical_date is not available
+            dag_run = context.get("dag_run")
+            if dag_run and hasattr(dag_run, "run_after"):
+                logical_date = dag_run.run_after
         self.job_id = hook.generate_job_id(
             job_id=self.job_id,
             dag_id=self.dag_id,
             task_id=self.task_id,
-            logical_date=context["logical_date"],
+            logical_date=logical_date,
             configuration=self.configuration,
             force_rerun=self.force_rerun,
         )

airflow/providers/google/cloud/operators/cloud_composer.py CHANGED Viewed

@@ -764,9 +764,15 @@ class CloudComposerRunAirflowCLICommandOperator(GoogleCloudBaseOperator):
             metadata=self.metadata,
             poll_interval=self.poll_interval,
         )
-        result_str = self._merge_cmd_output_result(result)
-        self.log.info("Command execution result:\n%s", result_str)
-        return result
+        exit_code = result.get("exit_info", {}).get("exit_code")
+        if exit_code == 0:
+            result_str = self._merge_cmd_output_result(result)
+            self.log.info("Command execution result:\n%s", result_str)
+            return result
+        error_output = "".join(line["content"] for line in result.get("error", []))
+        message = f"Airflow CLI command failed with exit code {exit_code}.\nError output:\n{error_output}"
+        raise AirflowException(message)
     def execute_complete(self, context: Context, event: dict) -> dict:
         if event and event["status"] == "error":

airflow/providers/google/cloud/operators/dataplex.py CHANGED Viewed

@@ -1082,11 +1082,11 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
         """
         job_state = event["job_state"]
         job_id = event["job_id"]
-        if job_state == DataScanJob.State.FAILED:
+        if job_state == DataScanJob.State.FAILED.name:  # type: ignore
             raise AirflowException(f"Job failed:\n{job_id}")
-        if job_state == DataScanJob.State.CANCELLED:
+        if job_state == DataScanJob.State.CANCELLED.name:  # type: ignore
             raise AirflowException(f"Job was cancelled:\n{job_id}")
-        if job_state == DataScanJob.State.SUCCEEDED:
+        if job_state == DataScanJob.State.SUCCEEDED.name:  # type: ignore
             job = event["job"]
             if not job["data_quality_result"]["passed"]:
                 if self.fail_on_dq_failure:
@@ -1260,11 +1260,11 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
         job_state = event["job_state"]
         job_id = event["job_id"]
         job = event["job"]
-        if job_state == DataScanJob.State.FAILED:
+        if job_state == DataScanJob.State.FAILED.name:  # type: ignore
             raise AirflowException(f"Job failed:\n{job_id}")
-        if job_state == DataScanJob.State.CANCELLED:
+        if job_state == DataScanJob.State.CANCELLED.name:  # type: ignore
             raise AirflowException(f"Job was cancelled:\n{job_id}")
-        if job_state == DataScanJob.State.SUCCEEDED:
+        if job_state == DataScanJob.State.SUCCEEDED.name:  # type: ignore
             if not job["data_quality_result"]["passed"]:
                 if self.fail_on_dq_failure:
                     raise AirflowDataQualityScanException(
@@ -1639,12 +1639,12 @@ class DataplexRunDataProfileScanOperator(GoogleCloudBaseOperator):
                 result_timeout=self.result_timeout,
             )
-            if job.state == DataScanJob.State.FAILED:
+            if job.state == DataScanJob.State.FAILED.name:  # type: ignore
                 raise AirflowException(f"Data Profile job failed: {job_id}")
-            if job.state == DataScanJob.State.SUCCEEDED:
+            if job.state == DataScanJob.State.SUCCEEDED.name:  # type: ignore
                 self.log.info("Data Profile job executed successfully.")
             else:
-                self.log.info("Data Profile job execution returned status: %s", job.status)
+                self.log.info("Data Profile job execution returned status: %s", job.state)
         return job_id
@@ -1657,11 +1657,11 @@ class DataplexRunDataProfileScanOperator(GoogleCloudBaseOperator):
         """
         job_state = event["job_state"]
         job_id = event["job_id"]
-        if job_state == DataScanJob.State.FAILED:
+        if job_state == DataScanJob.State.FAILED.name:  # type: ignore
             raise AirflowException(f"Job failed:\n{job_id}")
-        if job_state == DataScanJob.State.CANCELLED:
+        if job_state == DataScanJob.State.CANCELLED.name:  # type: ignore
             raise AirflowException(f"Job was cancelled:\n{job_id}")
-        if job_state == DataScanJob.State.SUCCEEDED:
+        if job_state == DataScanJob.State.SUCCEEDED.name:  # type: ignore
             self.log.info("Data Profile job executed successfully.")
         return job_id

airflow/providers/google/cloud/operators/dataproc.py CHANGED Viewed

@@ -213,6 +213,7 @@ class ClusterGenerator:
     :param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
         see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
     :param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
+    :param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
     """
     def __init__(
@@ -261,6 +262,8 @@ class ClusterGenerator:
         secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
         secondary_worker_accelerator_type: str | None = None,
         secondary_worker_accelerator_count: int | None = None,
+        *,
+        cluster_tier: str | None = None,
         **kwargs,
     ) -> None:
         self.project_id = project_id
@@ -308,6 +311,7 @@ class ClusterGenerator:
         self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
         self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
         self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
+        self.cluster_tier = cluster_tier
         if self.custom_image and self.image_version:
             raise ValueError("The custom_image and image_version can't be both set")
@@ -513,6 +517,9 @@ class ClusterGenerator:
         if self.driver_pool_size > 0:
             cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
+        if self.cluster_tier:
+            cluster_data["cluster_tier"] = self.cluster_tier
         cluster_data = self._build_gce_cluster_config(cluster_data)
         if self.single_node:
@@ -1945,9 +1952,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
         job_state = event["job_state"]
         job_id = event["job_id"]
         job = event["job"]
-        if job_state == JobStatus.State.ERROR:
+        if job_state == JobStatus.State.ERROR.name:  # type: ignore
             raise AirflowException(f"Job {job_id} failed:\n{job}")
-        if job_state == JobStatus.State.CANCELLED:
+        if job_state == JobStatus.State.CANCELLED.name:  # type: ignore
             raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
         self.log.info("%s completed successfully.", self.task_id)
         return job_id
@@ -2455,7 +2462,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
                 if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
                     break
-        self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
+        self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
         return Batch.to_dict(batch)
     @cached_property
@@ -2480,19 +2487,19 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
             self.operation.cancel()
     def handle_batch_status(
-        self, context: Context, state: Batch.State, batch_id: str, state_message: str | None = None
+        self, context: Context, state: str, batch_id: str, state_message: str | None = None
     ) -> None:
         # The existing batch may be a number of states other than 'SUCCEEDED'\
         # wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
         # finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
         link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
-        if state == Batch.State.FAILED:
+        if state == Batch.State.FAILED.name:  # type: ignore
             raise AirflowException(
                 f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
             )
-        if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
+        if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name):  # type: ignore
             raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
-        if state == Batch.State.STATE_UNSPECIFIED:
+        if state == Batch.State.STATE_UNSPECIFIED.name:  # type: ignore
             raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
         self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
@@ -2566,7 +2573,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
         dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
         task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
-        labels_regex = re.compile(r"^[a-z][\w-]{0,63}$")
+        labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
         if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
             return

airflow/providers/google/cloud/operators/pubsub.py CHANGED Viewed

@@ -26,6 +26,7 @@ This module contains Google PubSub operators.
 from __future__ import annotations
 from collections.abc import Callable, Sequence
+from functools import cached_property
 from typing import TYPE_CHECKING, Any
 from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
@@ -52,6 +53,7 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
 if TYPE_CHECKING:
     from google.api_core.retry import Retry
+    from airflow.providers.openlineage.extractors import OperatorLineage
     from airflow.utils.context import Context
@@ -359,15 +361,18 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
         self.timeout = timeout
         self.metadata = metadata
         self.impersonation_chain = impersonation_chain
+        self._resolved_subscription_name: str | None = None
-    def execute(self, context: Context) -> str:
-        hook = PubSubHook(
+    @cached_property
+    def pubsub_hook(self):
+        return PubSubHook(
             gcp_conn_id=self.gcp_conn_id,
             impersonation_chain=self.impersonation_chain,
         )
+    def execute(self, context: Context) -> str:
         self.log.info("Creating subscription for topic %s", self.topic)
-        result = hook.create_subscription(
+        result = self.pubsub_hook.create_subscription(
             project_id=self.project_id,
             topic=self.topic,
             subscription=self.subscription,
@@ -389,13 +394,34 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
         )
         self.log.info("Created subscription for topic %s", self.topic)
+        # Store resolved subscription for Open Lineage
+        self._resolved_subscription_name = self.subscription or result
         PubSubSubscriptionLink.persist(
             context=context,
-            subscription_id=self.subscription or result,  # result returns subscription name
-            project_id=self.project_id or hook.project_id,
+            subscription_id=self._resolved_subscription_name,  # result returns subscription name
+            project_id=self.project_id or self.pubsub_hook.project_id,
         )
         return result
+    def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
+        from airflow.providers.common.compat.openlineage.facet import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        topic_project_id = self.project_id or self.pubsub_hook.project_id
+        subscription_project_id = self.subscription_project_id or topic_project_id
+        return OperatorLineage(
+            inputs=[Dataset(namespace="pubsub", name=f"topic:{topic_project_id}:{self.topic}")],
+            outputs=[
+                Dataset(
+                    namespace="pubsub",
+                    name=f"subscription:{subscription_project_id}:{self._resolved_subscription_name}",
+                )
+            ],
+        )
 class PubSubDeleteTopicOperator(GoogleCloudBaseOperator):
     """
@@ -692,17 +718,28 @@ class PubSubPublishMessageOperator(GoogleCloudBaseOperator):
         self.enable_message_ordering = enable_message_ordering
         self.impersonation_chain = impersonation_chain
-    def execute(self, context: Context) -> None:
-        hook = PubSubHook(
+    @cached_property
+    def pubsub_hook(self):
+        return PubSubHook(
             gcp_conn_id=self.gcp_conn_id,
             impersonation_chain=self.impersonation_chain,
             enable_message_ordering=self.enable_message_ordering,
         )
+    def execute(self, context: Context) -> None:
         self.log.info("Publishing to topic %s", self.topic)
-        hook.publish(project_id=self.project_id, topic=self.topic, messages=self.messages)
+        self.pubsub_hook.publish(project_id=self.project_id, topic=self.topic, messages=self.messages)
         self.log.info("Published to topic %s", self.topic)
+    def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
+        from airflow.providers.common.compat.openlineage.facet import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        project_id = self.project_id or self.pubsub_hook.project_id
+        output_dataset = [Dataset(namespace="pubsub", name=f"topic:{project_id}:{self.topic}")]
+        return OperatorLineage(outputs=output_dataset)
 class PubSubPullOperator(GoogleCloudBaseOperator):
     """
@@ -853,3 +890,13 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
         messages_json = [ReceivedMessage.to_dict(m) for m in pulled_messages]
         return messages_json
+    def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
+        from airflow.providers.common.compat.openlineage.facet import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        output_dataset = [
+            Dataset(namespace="pubsub", name=f"subscription:{self.project_id}:{self.subscription}")
+        ]
+        return OperatorLineage(outputs=output_dataset)

airflow/providers/google/cloud/operators/spanner.py CHANGED Viewed

@@ -280,8 +280,8 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
             self.instance_id,
             self.database_id,
         )
-        self.log.info(queries)
-        hook.execute_dml(
+        self.log.info("Executing queries: %s", queries)
+        result_rows_count_per_query = hook.execute_dml(
             project_id=self.project_id,
             instance_id=self.instance_id,
             database_id=self.database_id,
@@ -293,6 +293,7 @@ class SpannerQueryDatabaseInstanceOperator(GoogleCloudBaseOperator):
             database_id=self.database_id,
             project_id=self.project_id or hook.project_id,
         )
+        return result_rows_count_per_query
     @staticmethod
     def sanitize_queries(queries: list[str]) -> None:

airflow/providers/google/cloud/operators/vertex_ai/generative_model.py CHANGED Viewed

@@ -58,7 +58,7 @@ class TextEmbeddingModelGetEmbeddingsOperator(GoogleCloudBaseOperator):
         account from the list granting this role to the originating account (templated).
     """
-    template_fields = ("location", "project_id", "impersonation_chain", "prompt")
+    template_fields = ("location", "project_id", "impersonation_chain", "prompt", "pretrained_model")
     def __init__(
         self,
@@ -211,7 +211,14 @@ class SupervisedFineTuningTrainOperator(GoogleCloudBaseOperator):
         account from the list granting this role to the originating account (templated).
     """
-    template_fields = ("location", "project_id", "impersonation_chain", "train_dataset", "validation_dataset")
+    template_fields = (
+        "location",
+        "project_id",
+        "impersonation_chain",
+        "train_dataset",
+        "validation_dataset",
+        "source_model",
+    )
     def __init__(
         self,

airflow/providers/google/cloud/sensors/cloud_composer.py CHANGED Viewed

@@ -61,6 +61,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
         Or [datetime(2024,3,22,0,0,0)] in this case sensor will check for states from specific time in the
         past till current time execution.
         Default value datetime.timedelta(days=1).
+    :param composer_dag_run_id: The Run ID of executable task. The 'execution_range' param is ignored, if both specified.
     :param gcp_conn_id: The connection ID to use when fetching connection info.
     :param impersonation_chain: Optional service account to impersonate using short-term
         credentials, or chained list of accounts required to get the access_token
@@ -91,6 +92,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
         composer_dag_id: str,
         allowed_states: Iterable[str] | None = None,
         execution_range: timedelta | list[datetime] | None = None,
+        composer_dag_run_id: str | None = None,
         gcp_conn_id: str = "google_cloud_default",
         impersonation_chain: str | Sequence[str] | None = None,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
@@ -104,11 +106,17 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
         self.composer_dag_id = composer_dag_id
         self.allowed_states = list(allowed_states) if allowed_states else [TaskInstanceState.SUCCESS.value]
         self.execution_range = execution_range
+        self.composer_dag_run_id = composer_dag_run_id
         self.gcp_conn_id = gcp_conn_id
         self.impersonation_chain = impersonation_chain
         self.deferrable = deferrable
         self.poll_interval = poll_interval
+        if self.composer_dag_run_id and self.execution_range:
+            self.log.warning(
+                "The composer_dag_run_id parameter and execution_range parameter do not work together. This run will ignore execution_range parameter and count only specified composer_dag_run_id parameter."
+            )
     def _get_logical_dates(self, context) -> tuple[datetime, datetime]:
         if isinstance(self.execution_range, timedelta):
             if self.execution_range < timedelta(0):
@@ -128,6 +136,20 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
         dag_runs = self._pull_dag_runs()
+        if len(dag_runs) == 0:
+            self.log.info("Dag runs are empty. Sensor waits for dag runs...")
+            return False
+        if self.composer_dag_run_id:
+            self.log.info(
+                "Sensor waits for allowed states %s for specified RunID: %s",
+                self.allowed_states,
+                self.composer_dag_run_id,
+            )
+            composer_dag_run_id_status = self._check_composer_dag_run_id_states(
+                dag_runs=dag_runs,
+            )
+            return composer_dag_run_id_status
         self.log.info("Sensor waits for allowed states: %s", self.allowed_states)
         allowed_states_status = self._check_dag_runs_states(
             dag_runs=dag_runs,
@@ -189,16 +211,24 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
         image_version = environment_config["config"]["software_config"]["image_version"]
         return int(image_version.split("airflow-")[1].split(".")[0])
+    def _check_composer_dag_run_id_states(self, dag_runs: list[dict]) -> bool:
+        for dag_run in dag_runs:
+            if dag_run["run_id"] == self.composer_dag_run_id and dag_run["state"] in self.allowed_states:
+                return True
+        return False
     def execute(self, context: Context) -> None:
         self._composer_airflow_version = self._get_composer_airflow_version()
         if self.deferrable:
             start_date, end_date = self._get_logical_dates(context)
             self.defer(
+                timeout=self.timeout,
                 trigger=CloudComposerDAGRunTrigger(
                     project_id=self.project_id,
                     region=self.region,
                     environment_id=self.environment_id,
                     composer_dag_id=self.composer_dag_id,
+                    composer_dag_run_id=self.composer_dag_run_id,
                     start_date=start_date,
                     end_date=end_date,
                     allowed_states=self.allowed_states,

apache-airflow-providers-google 17.1.0rc1__py3-none-any.whl → 17.2.0rc1__py3-none-any.whl

apache-airflow-providers-google 17.1.0rc1py3-none-any.whl → 17.2.0rc1py3-none-any.whl