PyPI - apache-airflow-providers-google - Versions diffs - 10.12.0rc1__py3-none-any.whl → 10.13.0__py3-none-any.whl - Mend

apache-airflow-providers-google 10.12.0rc1py3-none-any.whl → 10.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

airflow/providers/google/cloud/operators/gcs.py CHANGED Viewed

@@ -188,11 +188,11 @@ class GCSListObjectsOperator(GoogleCloudBaseOperator):
         folder in ``data`` bucket. ::
             GCS_Files = GCSListOperator(
-                task_id='GCS_Files',
-                bucket='data',
-                prefix='sales/sales-2017/',
-                match_glob='**/*/.avro',
-                gcp_conn_id=google_cloud_conn_id
+                task_id="GCS_Files",
+                bucket="data",
+                prefix="sales/sales-2017/",
+                match_glob="**/*/.avro",
+                gcp_conn_id=google_cloud_conn_id,
             )
     """
@@ -313,6 +313,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
             )
             raise ValueError(err_message)
+        self._objects: list[str] = []
         super().__init__(**kwargs)
     def execute(self, context: Context) -> None:
@@ -322,13 +323,47 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
         )
         if self.objects is not None:
-            objects = self.objects
+            self._objects = self.objects
         else:
-            objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
-        self.log.info("Deleting %s objects from %s", len(objects), self.bucket_name)
-        for object_name in objects:
+            self._objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
+        self.log.info("Deleting %s objects from %s", len(self._objects), self.bucket_name)
+        for object_name in self._objects:
             hook.delete(bucket_name=self.bucket_name, object_name=object_name)
+    def get_openlineage_facets_on_complete(self, task_instance):
+        """Implementing on_complete as execute() resolves object names."""
+        from openlineage.client.facet import (
+            LifecycleStateChange,
+            LifecycleStateChangeDatasetFacet,
+            LifecycleStateChangeDatasetFacetPreviousIdentifier,
+        )
+        from openlineage.client.run import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        if not self._objects:
+            return OperatorLineage()
+        bucket_url = f"gs://{self.bucket_name}"
+        input_datasets = [
+            Dataset(
+                namespace=bucket_url,
+                name=object_name,
+                facets={
+                    "lifecycleStateChange": LifecycleStateChangeDatasetFacet(
+                        lifecycleStateChange=LifecycleStateChange.DROP.value,
+                        previousIdentifier=LifecycleStateChangeDatasetFacetPreviousIdentifier(
+                            namespace=bucket_url,
+                            name=object_name,
+                        ),
+                    )
+                },
+            )
+            for object_name in self._objects
+        ]
+        return OperatorLineage(inputs=input_datasets)
 class GCSBucketCreateAclEntryOperator(GoogleCloudBaseOperator):
     """
@@ -596,6 +631,22 @@ class GCSFileTransformOperator(GoogleCloudBaseOperator):
                 filename=destination_file.name,
             )
+    def get_openlineage_facets_on_start(self):
+        from openlineage.client.run import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        input_dataset = Dataset(
+            namespace=f"gs://{self.source_bucket}",
+            name=self.source_object,
+        )
+        output_dataset = Dataset(
+            namespace=f"gs://{self.destination_bucket}",
+            name=self.destination_object,
+        )
+        return OperatorLineage(inputs=[input_dataset], outputs=[output_dataset])
 class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
     """
@@ -722,6 +773,9 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
         self.upload_continue_on_fail = upload_continue_on_fail
         self.upload_num_attempts = upload_num_attempts
+        self._source_object_names: list[str] = []
+        self._destination_object_names: list[str] = []
     def execute(self, context: Context) -> list[str]:
         # Define intervals and prefixes.
         try:
@@ -773,7 +827,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
         )
         # Fetch list of files.
-        blobs_to_transform = source_hook.list_by_timespan(
+        self._source_object_names = source_hook.list_by_timespan(
             bucket_name=self.source_bucket,
             prefix=source_prefix_interp,
             timespan_start=timespan_start,
@@ -785,7 +839,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
             temp_output_dir_path = Path(temp_output_dir)
             # TODO: download in parallel.
-            for blob_to_transform in blobs_to_transform:
+            for blob_to_transform in self._source_object_names:
                 destination_file = temp_input_dir_path / blob_to_transform
                 destination_file.parent.mkdir(parents=True, exist_ok=True)
                 try:
@@ -822,8 +876,6 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
             self.log.info("Transformation succeeded. Output temporarily located at %s", temp_output_dir_path)
-            files_uploaded = []
             # TODO: upload in parallel.
             for upload_file in temp_output_dir_path.glob("**/*"):
                 if upload_file.is_dir():
@@ -844,12 +896,35 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
                         chunk_size=self.chunk_size,
                         num_max_attempts=self.upload_num_attempts,
                     )
-                    files_uploaded.append(str(upload_file_name))
+                    self._destination_object_names.append(str(upload_file_name))
                 except GoogleCloudError:
                     if not self.upload_continue_on_fail:
                         raise
-            return files_uploaded
+            return self._destination_object_names
+    def get_openlineage_facets_on_complete(self, task_instance):
+        """Implementing on_complete as execute() resolves object names."""
+        from openlineage.client.run import Dataset
+        from airflow.providers.openlineage.extractors import OperatorLineage
+        input_datasets = [
+            Dataset(
+                namespace=f"gs://{self.source_bucket}",
+                name=object_name,
+            )
+            for object_name in self._source_object_names
+        ]
+        output_datasets = [
+            Dataset(
+                namespace=f"gs://{self.destination_bucket}",
+                name=object_name,
+            )
+            for object_name in self._destination_object_names
+        ]
+        return OperatorLineage(inputs=input_datasets, outputs=output_datasets)
 class GCSDeleteBucketOperator(GoogleCloudBaseOperator):

airflow/providers/google/cloud/operators/kubernetes_engine.py CHANGED Viewed

@@ -192,15 +192,14 @@ class GKECreateClusterOperator(GoogleCloudBaseOperator):
     The **minimum** required to define a cluster to create is:
     ``dict()`` ::
-        cluster_def = {'name': 'my-cluster-name',
-                       'initial_node_count': 1}
+        cluster_def = {"name": "my-cluster-name", "initial_node_count": 1}
     or
     ``Cluster`` proto ::
         from google.cloud.container_v1.types import Cluster
-        cluster_def = Cluster(name='my-cluster-name', initial_node_count=1)
+        cluster_def = Cluster(name="my-cluster-name", initial_node_count=1)
     **Operator Creation**: ::

airflow/providers/google/cloud/operators/pubsub.py CHANGED Viewed

@@ -58,24 +58,22 @@ class PubSubCreateTopicOperator(GoogleCloudBaseOperator):
     By default, if the topic already exists, this operator will
     not cause the DAG to fail. ::
-        with DAG('successful DAG') as dag:
-            (
-                PubSubCreateTopicOperator(project_id='my-project', topic='my_new_topic')
-                >> PubSubCreateTopicOperator(project_id='my-project', topic='my_new_topic')
-            )
+        with DAG("successful DAG") as dag:
+            create_topic = PubSubCreateTopicOperator(project_id="my-project", topic="my_new_topic")
+            create_topic_again = PubSubCreateTopicOperator(project_id="my-project", topic="my_new_topic")
+            create_topic >> create_topic_again
     The operator can be configured to fail if the topic already exists. ::
-        with DAG('failing DAG') as dag:
-            (
-                PubSubCreateTopicOperator(project_id='my-project', topic='my_new_topic')
-                >> PubSubCreateTopicOperator(
-                    project_id='my-project',
-                    topic='my_new_topic',
-                    fail_if_exists=True,
-                )
+        with DAG("failing DAG") as dag:
+            create_topic = PubSubCreateTopicOperator(project_id="my-project", topic="my_new_topic")
+            create_topic_again = PubSubCreateTopicOperator(
+                project_id="my-project", topic="my_new_topic", fail_if_exists=True
             )
+            create_topic >> create_topic_again
     Both ``project_id`` and ``topic`` are templated so you can use Jinja templating in their values.
     :param project_id: Optional, the Google Cloud project ID where the topic will be created.
@@ -197,43 +195,35 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
     By default, if the subscription already exists, this operator will
     not cause the DAG to fail. However, the topic must exist in the project. ::
-        with DAG('successful DAG') as dag:
-            (
-                PubSubCreateSubscriptionOperator(
-                    project_id='my-project',
-                    topic='my-topic',
-                    subscription='my-subscription'
-                )
-                >> PubSubCreateSubscriptionOperator(
-                    project_id='my-project',
-                    topic='my-topic',
-                    subscription='my-subscription',
-                )
+        with DAG("successful DAG") as dag:
+            create_subscription = PubSubCreateSubscriptionOperator(
+                project_id="my-project", topic="my-topic", subscription="my-subscription"
+            )
+            create_subscription_again = PubSubCreateSubscriptionOperator(
+                project_id="my-project", topic="my-topic", subscription="my-subscription"
             )
+            create_subscription >> create_subscription_again
     The operator can be configured to fail if the subscription already exists.
     ::
-        with DAG('failing DAG') as dag:
-            (
-                PubSubCreateSubscriptionOperator(
-                    project_id='my-project',
-                    topic='my-topic',
-                    subscription='my-subscription',
-                )
-                >> PubSubCreateSubscriptionOperator(
-                    project_id='my-project',
-                    topic='my-topic',
-                    subscription='my-subscription',
-                    fail_if_exists=True,
-                )
+        with DAG("failing DAG") as dag:
+            create_subscription = PubSubCreateSubscriptionOperator(
+                project_id="my-project", topic="my-topic", subscription="my-subscription"
             )
+            create_subscription_again = PubSubCreateSubscriptionOperator(
+                project_id="my-project", topic="my-topic", subscription="my-subscription", fail_if_exists=True
+            )
+            create_subscription >> create_subscription_again
     Finally, subscription is not required. If not passed, the operator will
     generated a universally unique identifier for the subscription's name. ::
-        with DAG('DAG') as dag:
-            PubSubCreateSubscriptionOperator(project_id='my-project', topic='my-topic')
+        with DAG("DAG") as dag:
+            PubSubCreateSubscriptionOperator(project_id="my-project", topic="my-topic")
     ``project_id``, ``topic``, ``subscription``, ``subscription_project_id`` and
     ``impersonation_chain`` are templated so you can use Jinja templating in their values.
@@ -410,14 +400,16 @@ class PubSubDeleteTopicOperator(GoogleCloudBaseOperator):
     By default, if the topic does not exist, this operator will
     not cause the DAG to fail. ::
-        with DAG('successful DAG') as dag:
-            PubSubDeleteTopicOperator(project_id='my-project', topic='non_existing_topic')
+        with DAG("successful DAG") as dag:
+            PubSubDeleteTopicOperator(project_id="my-project", topic="non_existing_topic")
     The operator can be configured to fail if the topic does not exist. ::
-        with DAG('failing DAG') as dag:
+        with DAG("failing DAG") as dag:
             PubSubDeleteTopicOperator(
-                project_id='my-project', topic='non_existing_topic', fail_if_not_exists=True,
+                project_id="my-project",
+                topic="non_existing_topic",
+                fail_if_not_exists=True,
             )
     Both ``project_id`` and ``topic`` are templated so you can use Jinja templating in their values.
@@ -506,16 +498,18 @@ class PubSubDeleteSubscriptionOperator(GoogleCloudBaseOperator):
     By default, if the subscription does not exist, this operator will
     not cause the DAG to fail. ::
-        with DAG('successful DAG') as dag:
-            PubSubDeleteSubscriptionOperator(project_id='my-project', subscription='non-existing')
+        with DAG("successful DAG") as dag:
+            PubSubDeleteSubscriptionOperator(project_id="my-project", subscription="non-existing")
     The operator can be configured to fail if the subscription already exists.
     ::
-        with DAG('failing DAG') as dag:
+        with DAG("failing DAG") as dag:
             PubSubDeleteSubscriptionOperator(
-                project_id='my-project', subscription='non-existing', fail_if_not_exists=True,
+                project_id="my-project",
+                subscription="non-existing",
+                fail_if_not_exists=True,
             )
     ``project_id``, and ``subscription`` are templated so you can use Jinja templating in their values.
@@ -605,15 +599,13 @@ class PubSubPublishMessageOperator(GoogleCloudBaseOperator):
     in a single Google Cloud project. If the topic does not exist, this
     task will fail. ::
-        m1 = {'data': b'Hello, World!',
-              'attributes': {'type': 'greeting'}
-             }
-        m2 = {'data': b'Knock, knock'}
-        m3 = {'attributes': {'foo': ''}}
+        m1 = {"data": b"Hello, World!", "attributes": {"type": "greeting"}}
+        m2 = {"data": b"Knock, knock"}
+        m3 = {"attributes": {"foo": ""}}
         t1 = PubSubPublishMessageOperator(
-            project_id='my-project',
-            topic='my_topic',
+            project_id="my-project",
+            topic="my_topic",
             messages=[m1, m2, m3],
             create_topic=True,
             dag=dag,

airflow/providers/google/cloud/secrets/secret_manager.py CHANGED Viewed

@@ -20,12 +20,16 @@ from __future__ import annotations
 import logging
 import re
 import warnings
+from typing import Sequence
 from google.auth.exceptions import DefaultCredentialsError
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
 from airflow.providers.google.cloud._internal_client.secret_manager_client import _SecretManagerClient
-from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
+from airflow.providers.google.cloud.utils.credentials_provider import (
+    _get_target_principal_and_delegates,
+    get_credentials_and_project_id,
+)
 from airflow.secrets import BaseSecretsBackend
 from airflow.utils.log.logging_mixin import LoggingMixin
@@ -76,6 +80,14 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
     :param project_id: Project ID to read the secrets from. If not passed, the project ID from credentials
         will be used.
     :param sep: Separator used to concatenate connections_prefix and conn_id. Default: "-"
+    :param impersonation_chain: Optional service account to impersonate using
+        short-term credentials, or chained list of accounts required to get the
+        access token of the last account in the list, which will be impersonated
+        in the request. If set as a string, the account must grant the
+        originating account the Service Account Token Creator IAM role. If set
+        as a sequence, the identities from the list must grant Service Account
+        Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account.
     """
     def __init__(
@@ -89,6 +101,7 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
         gcp_scopes: str | None = None,
         project_id: str | None = None,
         sep: str = "-",
+        impersonation_chain: str | Sequence[str] | None = None,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
@@ -103,11 +116,19 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
                     f"follows that pattern {SECRET_ID_PATTERN}"
                 )
         try:
+            if impersonation_chain:
+                target_principal, delegates = _get_target_principal_and_delegates(impersonation_chain)
+            else:
+                target_principal = None
+                delegates = None
             self.credentials, self.project_id = get_credentials_and_project_id(
                 keyfile_dict=gcp_keyfile_dict,
                 key_path=gcp_key_path,
                 credential_config_file=gcp_credential_config_file,
                 scopes=gcp_scopes,
+                target_principal=target_principal,
+                delegates=delegates,
             )
         except (DefaultCredentialsError, FileNotFoundError):
             log.exception(

airflow/providers/google/cloud/sensors/cloud_composer.py CHANGED Viewed

@@ -19,9 +19,10 @@
 from __future__ import annotations
+import warnings
 from typing import TYPE_CHECKING, Any, Sequence
-from airflow.exceptions import AirflowException, AirflowSkipException
+from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.google.cloud.triggers.cloud_composer import CloudComposerExecutionTrigger
 from airflow.sensors.base import BaseSensorOperator
@@ -33,6 +34,11 @@ class CloudComposerEnvironmentSensor(BaseSensorOperator):
     """
     Check the status of the Cloud Composer Environment task.
+    This Sensor is deprecated. You can achieve the same functionality by using Cloud Composer Operators
+    CloudComposerCreateEnvironmentOperator, CloudComposerDeleteEnvironmentOperator and
+    CloudComposerUpdateEnvironmentOperator in  deferrable or non-deferrable mode, since every operator
+    gives user a possibility to wait (asynchronously or synchronously) until Operation will be finished.
     :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
     :param region: Required. The ID of the Google Cloud region that the service belongs to.
     :param operation_name: The name of the operation resource
@@ -59,6 +65,13 @@ class CloudComposerEnvironmentSensor(BaseSensorOperator):
         pooling_period_seconds: int = 30,
         **kwargs,
     ):
+        warnings.warn(
+            f"The `{self.__class__.__name__}` operator is deprecated. You can achieve the same functionality "
+            f"by using operators in deferrable or non-deferrable mode, since every operator for Cloud "
+            f"Composer will wait for the operation to complete.",
+            AirflowProviderDeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(**kwargs)
         self.project_id = project_id
         self.region = region

airflow/providers/google/cloud/sensors/dataplex.py CHANGED Viewed

@@ -259,3 +259,121 @@ class DataplexDataQualityJobStatusSensor(BaseSensorOperator):
                     raise AirflowSkipException(message)
                 raise AirflowDataQualityScanException(message)
         return job_status == DataScanJob.State.SUCCEEDED
+class DataplexDataProfileJobStatusSensor(BaseSensorOperator):
+    """
+    Check the status of the Dataplex DataProfile job.
+    :param project_id: Required. The ID of the Google Cloud project that the task belongs to.
+    :param region: Required. The ID of the Google Cloud region that the task belongs to.
+    :param data_scan_id: Required. Data Quality scan identifier.
+    :param job_id: Required. Job ID.
+    :param api_version: The version of the api that will be requested for example 'v3'.
+    :param retry: A retry object used  to retry requests. If `None` is specified, requests
+        will not be retried.
+    :param metadata: Additional metadata that is provided to the method.
+    :param gcp_conn_id: The connection ID to use when fetching connection info.
+    :param impersonation_chain: Optional service account to impersonate using short-term
+        credentials, or chained list of accounts required to get the access_token
+        of the last account in the list, which will be impersonated in the request.
+        If set as a string, the account must grant the originating account
+        the Service Account Token Creator IAM role.
+        If set as a sequence, the identities from the list must grant
+        Service Account Token Creator IAM role to the directly preceding identity, with first
+        account from the list granting this role to the originating account (templated).
+    :param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result.
+        Throws exception if there is no result found after specified amount of seconds.
+    :return: Boolean indicating if the job run has reached the ``DataScanJob.State.SUCCEEDED``.
+    """
+    template_fields = ["job_id"]
+    def __init__(
+        self,
+        project_id: str,
+        region: str,
+        data_scan_id: str,
+        job_id: str,
+        api_version: str = "v1",
+        retry: Retry | _MethodDefault = DEFAULT,
+        metadata: Sequence[tuple[str, str]] = (),
+        gcp_conn_id: str = "google_cloud_default",
+        impersonation_chain: str | Sequence[str] | None = None,
+        result_timeout: float = 60.0 * 10,
+        start_sensor_time: float | None = None,
+        *args,
+        **kwargs,
+    ) -> None:
+        super().__init__(*args, **kwargs)
+        self.project_id = project_id
+        self.region = region
+        self.data_scan_id = data_scan_id
+        self.job_id = job_id
+        self.api_version = api_version
+        self.retry = retry
+        self.metadata = metadata
+        self.gcp_conn_id = gcp_conn_id
+        self.impersonation_chain = impersonation_chain
+        self.result_timeout = result_timeout
+        self.start_sensor_time = start_sensor_time
+    def _duration(self):
+        if not self.start_sensor_time:
+            self.start_sensor_time = time.monotonic()
+        return time.monotonic() - self.start_sensor_time
+    def poke(self, context: Context) -> bool:
+        self.log.info("Waiting for job %s to be %s", self.job_id, DataScanJob.State.SUCCEEDED)
+        if self.result_timeout:
+            duration = self._duration()
+            if duration > self.result_timeout:
+                # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
+                message = (
+                    f"Timeout: Data Profile scan {self.job_id} is not ready after {self.result_timeout}s"
+                )
+                if self.soft_fail:
+                    raise AirflowSkipException(message)
+                raise AirflowDataQualityScanResultTimeoutException(message)
+        hook = DataplexHook(
+            gcp_conn_id=self.gcp_conn_id,
+            api_version=self.api_version,
+            impersonation_chain=self.impersonation_chain,
+        )
+        try:
+            job = hook.get_data_scan_job(
+                project_id=self.project_id,
+                region=self.region,
+                data_scan_id=self.data_scan_id,
+                job_id=self.job_id,
+                timeout=self.timeout,
+                retry=self.retry,
+                metadata=self.metadata,
+            )
+        except GoogleAPICallError as e:
+            # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
+            message = f"Error occurred when trying to retrieve Data Profile scan job: {self.data_scan_id}"
+            if self.soft_fail:
+                raise AirflowSkipException(message, e)
+            raise AirflowException(message, e)
+        job_status = job.state
+        self.log.info(
+            "Current status of the Dataplex Data Profile scan job %s => %s", self.job_id, job_status
+        )
+        if job_status == DataScanJob.State.FAILED:
+            # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
+            message = f"Data Profile scan job failed: {self.job_id}"
+            if self.soft_fail:
+                raise AirflowSkipException(message)
+            raise AirflowException(message)
+        if job_status == DataScanJob.State.CANCELLED:
+            # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
+            message = f"Data Profile scan job cancelled: {self.job_id}"
+            if self.soft_fail:
+                raise AirflowSkipException(message)
+            raise AirflowException(message)
+        return job_status == DataScanJob.State.SUCCEEDED

airflow/providers/google/cloud/sensors/gcs.py CHANGED Viewed

@@ -50,6 +50,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
     :param bucket: The Google Cloud Storage bucket where the object is.
     :param object: The name of the object to check in the Google cloud
         storage bucket.
+    :param use_glob: When set to True the object parameter is interpreted as glob
     :param google_cloud_conn_id: The connection ID to use when
         connecting to Google Cloud Storage.
     :param impersonation_chain: Optional service account to impersonate using short-term
@@ -75,6 +76,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
         *,
         bucket: str,
         object: str,
+        use_glob: bool = False,
         google_cloud_conn_id: str = "google_cloud_default",
         impersonation_chain: str | Sequence[str] | None = None,
         retry: Retry = DEFAULT_RETRY,
@@ -84,7 +86,9 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
         super().__init__(**kwargs)
         self.bucket = bucket
         self.object = object
+        self.use_glob = use_glob
         self.google_cloud_conn_id = google_cloud_conn_id
+        self._matches: list[str] = []
         self.impersonation_chain = impersonation_chain
         self.retry = retry
@@ -96,7 +100,11 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
             gcp_conn_id=self.google_cloud_conn_id,
             impersonation_chain=self.impersonation_chain,
         )
-        return hook.exists(self.bucket, self.object, self.retry)
+        if self.use_glob:
+            self._matches = hook.list(self.bucket, match_glob=self.object)
+            return bool(self._matches)
+        else:
+            return hook.exists(self.bucket, self.object, self.retry)
     def execute(self, context: Context) -> None:
         """Airflow runs this method on the worker and defers using the trigger."""
@@ -109,6 +117,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
                     trigger=GCSBlobTrigger(
                         bucket=self.bucket,
                         object_name=self.object,
+                        use_glob=self.use_glob,
                         poke_interval=self.poke_interval,
                         google_cloud_conn_id=self.google_cloud_conn_id,
                         hook_params={

airflow/providers/google/cloud/transfers/adls_to_gcs.py CHANGED Viewed

@@ -58,12 +58,12 @@ class ADLSToGCSOperator(ADLSListOperator):
         resulting gcs path will be ``gs://mybucket/hello/world.avro`` ::
             copy_single_file = AdlsToGoogleCloudStorageOperator(
-                task_id='copy_single_file',
-                src_adls='hello/world.avro',
-                dest_gcs='gs://mybucket',
+                task_id="copy_single_file",
+                src_adls="hello/world.avro",
+                dest_gcs="gs://mybucket",
                 replace=False,
-                azure_data_lake_conn_id='azure_data_lake_default',
-                gcp_conn_id='google_cloud_default'
+                azure_data_lake_conn_id="azure_data_lake_default",
+                gcp_conn_id="google_cloud_default",
             )
         The following Operator would copy all parquet files from ADLS

apache-airflow-providers-google 10.12.0rc1__py3-none-any.whl → 10.13.0__py3-none-any.whl

apache-airflow-providers-google 10.12.0rc1py3-none-any.whl → 10.13.0py3-none-any.whl