apache-airflow-providers-google 10.12.0rc1__py3-none-any.whl → 10.13.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/cloud/fs/gcs.py +16 -13
  3. airflow/providers/google/cloud/hooks/bigquery_dts.py +2 -1
  4. airflow/providers/google/cloud/hooks/cloud_build.py +2 -1
  5. airflow/providers/google/cloud/hooks/cloud_composer.py +4 -3
  6. airflow/providers/google/cloud/hooks/compute_ssh.py +18 -6
  7. airflow/providers/google/cloud/hooks/dataflow.py +1 -1
  8. airflow/providers/google/cloud/hooks/dataplex.py +2 -1
  9. airflow/providers/google/cloud/hooks/dataproc.py +19 -18
  10. airflow/providers/google/cloud/hooks/gcs.py +2 -0
  11. airflow/providers/google/cloud/operators/cloud_composer.py +1 -1
  12. airflow/providers/google/cloud/operators/cloud_run.py +3 -3
  13. airflow/providers/google/cloud/operators/dataplex.py +530 -1
  14. airflow/providers/google/cloud/operators/dataproc.py +10 -8
  15. airflow/providers/google/cloud/operators/gcs.py +85 -10
  16. airflow/providers/google/cloud/secrets/secret_manager.py +22 -1
  17. airflow/providers/google/cloud/sensors/cloud_composer.py +14 -1
  18. airflow/providers/google/cloud/sensors/dataplex.py +118 -0
  19. airflow/providers/google/cloud/triggers/cloud_run.py +7 -7
  20. airflow/providers/google/cloud/triggers/dataplex.py +82 -0
  21. airflow/providers/google/cloud/triggers/dataproc.py +2 -5
  22. airflow/providers/google/common/hooks/base_google.py +6 -4
  23. airflow/providers/google/get_provider_info.py +11 -10
  24. {apache_airflow_providers_google-10.12.0rc1.dist-info → apache_airflow_providers_google-10.13.0rc2.dist-info}/METADATA +24 -24
  25. {apache_airflow_providers_google-10.12.0rc1.dist-info → apache_airflow_providers_google-10.13.0rc2.dist-info}/RECORD +27 -27
  26. {apache_airflow_providers_google-10.12.0rc1.dist-info → apache_airflow_providers_google-10.13.0rc2.dist-info}/WHEEL +0 -0
  27. {apache_airflow_providers_google-10.12.0rc1.dist-info → apache_airflow_providers_google-10.13.0rc2.dist-info}/entry_points.txt +0 -0
@@ -313,6 +313,7 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
313
313
  )
314
314
  raise ValueError(err_message)
315
315
 
316
+ self._objects: list[str] = []
316
317
  super().__init__(**kwargs)
317
318
 
318
319
  def execute(self, context: Context) -> None:
@@ -322,13 +323,47 @@ class GCSDeleteObjectsOperator(GoogleCloudBaseOperator):
322
323
  )
323
324
 
324
325
  if self.objects is not None:
325
- objects = self.objects
326
+ self._objects = self.objects
326
327
  else:
327
- objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
328
- self.log.info("Deleting %s objects from %s", len(objects), self.bucket_name)
329
- for object_name in objects:
328
+ self._objects = hook.list(bucket_name=self.bucket_name, prefix=self.prefix)
329
+ self.log.info("Deleting %s objects from %s", len(self._objects), self.bucket_name)
330
+ for object_name in self._objects:
330
331
  hook.delete(bucket_name=self.bucket_name, object_name=object_name)
331
332
 
333
+ def get_openlineage_facets_on_complete(self, task_instance):
334
+ """Implementing on_complete as execute() resolves object names."""
335
+ from openlineage.client.facet import (
336
+ LifecycleStateChange,
337
+ LifecycleStateChangeDatasetFacet,
338
+ LifecycleStateChangeDatasetFacetPreviousIdentifier,
339
+ )
340
+ from openlineage.client.run import Dataset
341
+
342
+ from airflow.providers.openlineage.extractors import OperatorLineage
343
+
344
+ if not self._objects:
345
+ return OperatorLineage()
346
+
347
+ bucket_url = f"gs://{self.bucket_name}"
348
+ input_datasets = [
349
+ Dataset(
350
+ namespace=bucket_url,
351
+ name=object_name,
352
+ facets={
353
+ "lifecycleStateChange": LifecycleStateChangeDatasetFacet(
354
+ lifecycleStateChange=LifecycleStateChange.DROP.value,
355
+ previousIdentifier=LifecycleStateChangeDatasetFacetPreviousIdentifier(
356
+ namespace=bucket_url,
357
+ name=object_name,
358
+ ),
359
+ )
360
+ },
361
+ )
362
+ for object_name in self._objects
363
+ ]
364
+
365
+ return OperatorLineage(inputs=input_datasets)
366
+
332
367
 
333
368
  class GCSBucketCreateAclEntryOperator(GoogleCloudBaseOperator):
334
369
  """
@@ -596,6 +631,22 @@ class GCSFileTransformOperator(GoogleCloudBaseOperator):
596
631
  filename=destination_file.name,
597
632
  )
598
633
 
634
+ def get_openlineage_facets_on_start(self):
635
+ from openlineage.client.run import Dataset
636
+
637
+ from airflow.providers.openlineage.extractors import OperatorLineage
638
+
639
+ input_dataset = Dataset(
640
+ namespace=f"gs://{self.source_bucket}",
641
+ name=self.source_object,
642
+ )
643
+ output_dataset = Dataset(
644
+ namespace=f"gs://{self.destination_bucket}",
645
+ name=self.destination_object,
646
+ )
647
+
648
+ return OperatorLineage(inputs=[input_dataset], outputs=[output_dataset])
649
+
599
650
 
600
651
  class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
601
652
  """
@@ -722,6 +773,9 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
722
773
  self.upload_continue_on_fail = upload_continue_on_fail
723
774
  self.upload_num_attempts = upload_num_attempts
724
775
 
776
+ self._source_object_names: list[str] = []
777
+ self._destination_object_names: list[str] = []
778
+
725
779
  def execute(self, context: Context) -> list[str]:
726
780
  # Define intervals and prefixes.
727
781
  try:
@@ -773,7 +827,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
773
827
  )
774
828
 
775
829
  # Fetch list of files.
776
- blobs_to_transform = source_hook.list_by_timespan(
830
+ self._source_object_names = source_hook.list_by_timespan(
777
831
  bucket_name=self.source_bucket,
778
832
  prefix=source_prefix_interp,
779
833
  timespan_start=timespan_start,
@@ -785,7 +839,7 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
785
839
  temp_output_dir_path = Path(temp_output_dir)
786
840
 
787
841
  # TODO: download in parallel.
788
- for blob_to_transform in blobs_to_transform:
842
+ for blob_to_transform in self._source_object_names:
789
843
  destination_file = temp_input_dir_path / blob_to_transform
790
844
  destination_file.parent.mkdir(parents=True, exist_ok=True)
791
845
  try:
@@ -822,8 +876,6 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
822
876
 
823
877
  self.log.info("Transformation succeeded. Output temporarily located at %s", temp_output_dir_path)
824
878
 
825
- files_uploaded = []
826
-
827
879
  # TODO: upload in parallel.
828
880
  for upload_file in temp_output_dir_path.glob("**/*"):
829
881
  if upload_file.is_dir():
@@ -844,12 +896,35 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
844
896
  chunk_size=self.chunk_size,
845
897
  num_max_attempts=self.upload_num_attempts,
846
898
  )
847
- files_uploaded.append(str(upload_file_name))
899
+ self._destination_object_names.append(str(upload_file_name))
848
900
  except GoogleCloudError:
849
901
  if not self.upload_continue_on_fail:
850
902
  raise
851
903
 
852
- return files_uploaded
904
+ return self._destination_object_names
905
+
906
+ def get_openlineage_facets_on_complete(self, task_instance):
907
+ """Implementing on_complete as execute() resolves object names."""
908
+ from openlineage.client.run import Dataset
909
+
910
+ from airflow.providers.openlineage.extractors import OperatorLineage
911
+
912
+ input_datasets = [
913
+ Dataset(
914
+ namespace=f"gs://{self.source_bucket}",
915
+ name=object_name,
916
+ )
917
+ for object_name in self._source_object_names
918
+ ]
919
+ output_datasets = [
920
+ Dataset(
921
+ namespace=f"gs://{self.destination_bucket}",
922
+ name=object_name,
923
+ )
924
+ for object_name in self._destination_object_names
925
+ ]
926
+
927
+ return OperatorLineage(inputs=input_datasets, outputs=output_datasets)
853
928
 
854
929
 
855
930
  class GCSDeleteBucketOperator(GoogleCloudBaseOperator):
@@ -20,12 +20,16 @@ from __future__ import annotations
20
20
  import logging
21
21
  import re
22
22
  import warnings
23
+ from typing import Sequence
23
24
 
24
25
  from google.auth.exceptions import DefaultCredentialsError
25
26
 
26
27
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
27
28
  from airflow.providers.google.cloud._internal_client.secret_manager_client import _SecretManagerClient
28
- from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
29
+ from airflow.providers.google.cloud.utils.credentials_provider import (
30
+ _get_target_principal_and_delegates,
31
+ get_credentials_and_project_id,
32
+ )
29
33
  from airflow.secrets import BaseSecretsBackend
30
34
  from airflow.utils.log.logging_mixin import LoggingMixin
31
35
 
@@ -76,6 +80,14 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
76
80
  :param project_id: Project ID to read the secrets from. If not passed, the project ID from credentials
77
81
  will be used.
78
82
  :param sep: Separator used to concatenate connections_prefix and conn_id. Default: "-"
83
+ :param impersonation_chain: Optional service account to impersonate using
84
+ short-term credentials, or chained list of accounts required to get the
85
+ access token of the last account in the list, which will be impersonated
86
+ in the request. If set as a string, the account must grant the
87
+ originating account the Service Account Token Creator IAM role. If set
88
+ as a sequence, the identities from the list must grant Service Account
89
+ Token Creator IAM role to the directly preceding identity, with first
90
+ account from the list granting this role to the originating account.
79
91
  """
80
92
 
81
93
  def __init__(
@@ -89,6 +101,7 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
89
101
  gcp_scopes: str | None = None,
90
102
  project_id: str | None = None,
91
103
  sep: str = "-",
104
+ impersonation_chain: str | Sequence[str] | None = None,
92
105
  **kwargs,
93
106
  ) -> None:
94
107
  super().__init__(**kwargs)
@@ -103,11 +116,19 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin):
103
116
  f"follows that pattern {SECRET_ID_PATTERN}"
104
117
  )
105
118
  try:
119
+ if impersonation_chain:
120
+ target_principal, delegates = _get_target_principal_and_delegates(impersonation_chain)
121
+ else:
122
+ target_principal = None
123
+ delegates = None
124
+
106
125
  self.credentials, self.project_id = get_credentials_and_project_id(
107
126
  keyfile_dict=gcp_keyfile_dict,
108
127
  key_path=gcp_key_path,
109
128
  credential_config_file=gcp_credential_config_file,
110
129
  scopes=gcp_scopes,
130
+ target_principal=target_principal,
131
+ delegates=delegates,
111
132
  )
112
133
  except (DefaultCredentialsError, FileNotFoundError):
113
134
  log.exception(
@@ -19,9 +19,10 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
+ import warnings
22
23
  from typing import TYPE_CHECKING, Any, Sequence
23
24
 
24
- from airflow.exceptions import AirflowException, AirflowSkipException
25
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
25
26
  from airflow.providers.google.cloud.triggers.cloud_composer import CloudComposerExecutionTrigger
26
27
  from airflow.sensors.base import BaseSensorOperator
27
28
 
@@ -33,6 +34,11 @@ class CloudComposerEnvironmentSensor(BaseSensorOperator):
33
34
  """
34
35
  Check the status of the Cloud Composer Environment task.
35
36
 
37
+ This Sensor is deprecated. You can achieve the same functionality by using Cloud Composer Operators
38
+ CloudComposerCreateEnvironmentOperator, CloudComposerDeleteEnvironmentOperator and
39
+ CloudComposerUpdateEnvironmentOperator in deferrable or non-deferrable mode, since every operator
40
+ gives user a possibility to wait (asynchronously or synchronously) until Operation will be finished.
41
+
36
42
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
37
43
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
38
44
  :param operation_name: The name of the operation resource
@@ -59,6 +65,13 @@ class CloudComposerEnvironmentSensor(BaseSensorOperator):
59
65
  pooling_period_seconds: int = 30,
60
66
  **kwargs,
61
67
  ):
68
+ warnings.warn(
69
+ f"The `{self.__class__.__name__}` operator is deprecated. You can achieve the same functionality "
70
+ f"by using operators in deferrable or non-deferrable mode, since every operator for Cloud "
71
+ f"Composer will wait for the operation to complete.",
72
+ AirflowProviderDeprecationWarning,
73
+ stacklevel=2,
74
+ )
62
75
  super().__init__(**kwargs)
63
76
  self.project_id = project_id
64
77
  self.region = region
@@ -259,3 +259,121 @@ class DataplexDataQualityJobStatusSensor(BaseSensorOperator):
259
259
  raise AirflowSkipException(message)
260
260
  raise AirflowDataQualityScanException(message)
261
261
  return job_status == DataScanJob.State.SUCCEEDED
262
+
263
+
264
+ class DataplexDataProfileJobStatusSensor(BaseSensorOperator):
265
+ """
266
+ Check the status of the Dataplex DataProfile job.
267
+
268
+ :param project_id: Required. The ID of the Google Cloud project that the task belongs to.
269
+ :param region: Required. The ID of the Google Cloud region that the task belongs to.
270
+ :param data_scan_id: Required. Data Quality scan identifier.
271
+ :param job_id: Required. Job ID.
272
+ :param api_version: The version of the api that will be requested for example 'v3'.
273
+ :param retry: A retry object used to retry requests. If `None` is specified, requests
274
+ will not be retried.
275
+ :param metadata: Additional metadata that is provided to the method.
276
+ :param gcp_conn_id: The connection ID to use when fetching connection info.
277
+ :param impersonation_chain: Optional service account to impersonate using short-term
278
+ credentials, or chained list of accounts required to get the access_token
279
+ of the last account in the list, which will be impersonated in the request.
280
+ If set as a string, the account must grant the originating account
281
+ the Service Account Token Creator IAM role.
282
+ If set as a sequence, the identities from the list must grant
283
+ Service Account Token Creator IAM role to the directly preceding identity, with first
284
+ account from the list granting this role to the originating account (templated).
285
+ :param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result.
286
+ Throws exception if there is no result found after specified amount of seconds.
287
+
288
+ :return: Boolean indicating if the job run has reached the ``DataScanJob.State.SUCCEEDED``.
289
+ """
290
+
291
+ template_fields = ["job_id"]
292
+
293
+ def __init__(
294
+ self,
295
+ project_id: str,
296
+ region: str,
297
+ data_scan_id: str,
298
+ job_id: str,
299
+ api_version: str = "v1",
300
+ retry: Retry | _MethodDefault = DEFAULT,
301
+ metadata: Sequence[tuple[str, str]] = (),
302
+ gcp_conn_id: str = "google_cloud_default",
303
+ impersonation_chain: str | Sequence[str] | None = None,
304
+ result_timeout: float = 60.0 * 10,
305
+ start_sensor_time: float | None = None,
306
+ *args,
307
+ **kwargs,
308
+ ) -> None:
309
+ super().__init__(*args, **kwargs)
310
+ self.project_id = project_id
311
+ self.region = region
312
+ self.data_scan_id = data_scan_id
313
+ self.job_id = job_id
314
+ self.api_version = api_version
315
+ self.retry = retry
316
+ self.metadata = metadata
317
+ self.gcp_conn_id = gcp_conn_id
318
+ self.impersonation_chain = impersonation_chain
319
+ self.result_timeout = result_timeout
320
+ self.start_sensor_time = start_sensor_time
321
+
322
+ def _duration(self):
323
+ if not self.start_sensor_time:
324
+ self.start_sensor_time = time.monotonic()
325
+ return time.monotonic() - self.start_sensor_time
326
+
327
+ def poke(self, context: Context) -> bool:
328
+ self.log.info("Waiting for job %s to be %s", self.job_id, DataScanJob.State.SUCCEEDED)
329
+ if self.result_timeout:
330
+ duration = self._duration()
331
+ if duration > self.result_timeout:
332
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
333
+ message = (
334
+ f"Timeout: Data Profile scan {self.job_id} is not ready after {self.result_timeout}s"
335
+ )
336
+ if self.soft_fail:
337
+ raise AirflowSkipException(message)
338
+ raise AirflowDataQualityScanResultTimeoutException(message)
339
+
340
+ hook = DataplexHook(
341
+ gcp_conn_id=self.gcp_conn_id,
342
+ api_version=self.api_version,
343
+ impersonation_chain=self.impersonation_chain,
344
+ )
345
+
346
+ try:
347
+ job = hook.get_data_scan_job(
348
+ project_id=self.project_id,
349
+ region=self.region,
350
+ data_scan_id=self.data_scan_id,
351
+ job_id=self.job_id,
352
+ timeout=self.timeout,
353
+ retry=self.retry,
354
+ metadata=self.metadata,
355
+ )
356
+ except GoogleAPICallError as e:
357
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
358
+ message = f"Error occurred when trying to retrieve Data Profile scan job: {self.data_scan_id}"
359
+ if self.soft_fail:
360
+ raise AirflowSkipException(message, e)
361
+ raise AirflowException(message, e)
362
+
363
+ job_status = job.state
364
+ self.log.info(
365
+ "Current status of the Dataplex Data Profile scan job %s => %s", self.job_id, job_status
366
+ )
367
+ if job_status == DataScanJob.State.FAILED:
368
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
369
+ message = f"Data Profile scan job failed: {self.job_id}"
370
+ if self.soft_fail:
371
+ raise AirflowSkipException(message)
372
+ raise AirflowException(message)
373
+ if job_status == DataScanJob.State.CANCELLED:
374
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
375
+ message = f"Data Profile scan job cancelled: {self.job_id}"
376
+ if self.soft_fail:
377
+ raise AirflowSkipException(message)
378
+ raise AirflowException(message)
379
+ return job_status == DataScanJob.State.SUCCEEDED
@@ -102,21 +102,21 @@ class CloudRunJobFinishedTrigger(BaseTrigger):
102
102
  while timeout is None or timeout > 0:
103
103
  operation: operations_pb2.Operation = await hook.get_operation(self.operation_name)
104
104
  if operation.done:
105
- # An operation can only have one of those two combinations: if it is succeeded, then
106
- # the response field will be populated, else, then the error field will be.
107
- if operation.response is not None:
105
+ # An operation can only have one of those two combinations: if it is failed, then
106
+ # the error field will be populated, else, then the response field will be.
107
+ if operation.error.SerializeToString():
108
108
  yield TriggerEvent(
109
109
  {
110
- "status": RunJobStatus.SUCCESS,
110
+ "status": RunJobStatus.FAIL.value,
111
+ "operation_error_code": operation.error.code,
112
+ "operation_error_message": operation.error.message,
111
113
  "job_name": self.job_name,
112
114
  }
113
115
  )
114
116
  else:
115
117
  yield TriggerEvent(
116
118
  {
117
- "status": RunJobStatus.FAIL,
118
- "operation_error_code": operation.error.code,
119
- "operation_error_message": operation.error.message,
119
+ "status": RunJobStatus.SUCCESS.value,
120
120
  "job_name": self.job_name,
121
121
  }
122
122
  )
@@ -107,3 +107,85 @@ class DataplexDataQualityJobTrigger(BaseTrigger):
107
107
  def _convert_to_dict(self, job: DataScanJob) -> dict:
108
108
  """Returns a representation of a DataScanJob instance as a dict."""
109
109
  return DataScanJob.to_dict(job)
110
+
111
+
112
+ class DataplexDataProfileJobTrigger(BaseTrigger):
113
+ """
114
+ DataplexDataProfileJobTrigger runs on the trigger worker and waits for the job to be `SUCCEEDED` state.
115
+
116
+ :param job_id: Optional. The ID of a Dataplex job.
117
+ :param data_scan_id: Required. DataScan identifier.
118
+ :param project_id: Google Cloud Project where the job is running.
119
+ :param region: The ID of the Google Cloud region that the job belongs to.
120
+ :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform.
121
+ :param impersonation_chain: Optional service account to impersonate using short-term
122
+ credentials, or chained list of accounts required to get the access_token
123
+ of the last account in the list, which will be impersonated in the request.
124
+ If set as a string, the account must grant the originating account
125
+ the Service Account Token Creator IAM role.
126
+ If set as a sequence, the identities from the list must grant
127
+ Service Account Token Creator IAM role to the directly preceding identity, with first
128
+ account from the list granting this role to the originating account (templated).
129
+ :param polling_interval_seconds: polling period in seconds to check for the status.
130
+ """
131
+
132
+ def __init__(
133
+ self,
134
+ job_id: str | None,
135
+ data_scan_id: str,
136
+ project_id: str | None,
137
+ region: str,
138
+ gcp_conn_id: str = "google_cloud_default",
139
+ polling_interval_seconds: int = 10,
140
+ impersonation_chain: str | Sequence[str] | None = None,
141
+ **kwargs,
142
+ ):
143
+ super().__init__(**kwargs)
144
+ self.job_id = job_id
145
+ self.data_scan_id = data_scan_id
146
+ self.project_id = project_id
147
+ self.region = region
148
+ self.gcp_conn_id = gcp_conn_id
149
+ self.polling_interval_seconds = polling_interval_seconds
150
+ self.impersonation_chain = impersonation_chain
151
+
152
+ def serialize(self):
153
+ return (
154
+ "airflow.providers.google.cloud.triggers.dataplex.DataplexDataProfileJobTrigger",
155
+ {
156
+ "job_id": self.job_id,
157
+ "data_scan_id": self.data_scan_id,
158
+ "project_id": self.project_id,
159
+ "region": self.region,
160
+ "gcp_conn_id": self.gcp_conn_id,
161
+ "impersonation_chain": self.impersonation_chain,
162
+ "polling_interval_seconds": self.polling_interval_seconds,
163
+ },
164
+ )
165
+
166
+ async def run(self) -> AsyncIterator[TriggerEvent]:
167
+ hook = DataplexAsyncHook(
168
+ gcp_conn_id=self.gcp_conn_id,
169
+ impersonation_chain=self.impersonation_chain,
170
+ )
171
+ while True:
172
+ job = await hook.get_data_scan_job(
173
+ project_id=self.project_id,
174
+ region=self.region,
175
+ job_id=self.job_id,
176
+ data_scan_id=self.data_scan_id,
177
+ )
178
+ state = job.state
179
+ if state in (DataScanJob.State.FAILED, DataScanJob.State.SUCCEEDED, DataScanJob.State.CANCELLED):
180
+ break
181
+ self.log.info(
182
+ "Current state is: %s, sleeping for %s seconds.",
183
+ DataScanJob.State(state).name,
184
+ self.polling_interval_seconds,
185
+ )
186
+ await asyncio.sleep(self.polling_interval_seconds)
187
+ yield TriggerEvent({"job_id": self.job_id, "job_state": state, "job": self._convert_to_dict(job)})
188
+
189
+ def _convert_to_dict(self, job: DataScanJob) -> dict:
190
+ """Returns a representation of a DataScanJob instance as a dict."""
191
+ return DataScanJob.to_dict(job)
@@ -25,7 +25,6 @@ from typing import Any, AsyncIterator, Sequence
25
25
  from google.api_core.exceptions import NotFound
26
26
  from google.cloud.dataproc_v1 import Batch, ClusterStatus, JobStatus
27
27
 
28
- from airflow.exceptions import AirflowException
29
28
  from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook
30
29
  from airflow.triggers.base import BaseTrigger, TriggerEvent
31
30
 
@@ -98,12 +97,10 @@ class DataprocSubmitTrigger(DataprocBaseTrigger):
98
97
  )
99
98
  state = job.status.state
100
99
  self.log.info("Dataproc job: %s is in state: %s", self.job_id, state)
101
- if state in (JobStatus.State.DONE, JobStatus.State.CANCELLED):
100
+ if state in (JobStatus.State.DONE, JobStatus.State.CANCELLED, JobStatus.State.ERROR):
102
101
  break
103
- elif state == JobStatus.State.ERROR:
104
- raise AirflowException(f"Dataproc job execution failed {self.job_id}")
105
102
  await asyncio.sleep(self.polling_interval_seconds)
106
- yield TriggerEvent({"job_id": self.job_id, "job_state": state})
103
+ yield TriggerEvent({"job_id": self.job_id, "job_state": state, "job": job})
107
104
 
108
105
 
109
106
  class DataprocClusterTrigger(DataprocBaseTrigger):
@@ -188,8 +188,8 @@ class GoogleBaseHook(BaseHook):
188
188
  conn_type = "google_cloud_platform"
189
189
  hook_name = "Google Cloud"
190
190
 
191
- @staticmethod
192
- def get_connection_form_widgets() -> dict[str, Any]:
191
+ @classmethod
192
+ def get_connection_form_widgets(cls) -> dict[str, Any]:
193
193
  """Returns connection widgets to add to connection form."""
194
194
  from flask_appbuilder.fieldwidgets import BS3PasswordFieldWidget, BS3TextFieldWidget
195
195
  from flask_babel import lazy_gettext
@@ -221,8 +221,8 @@ class GoogleBaseHook(BaseHook):
221
221
  ),
222
222
  }
223
223
 
224
- @staticmethod
225
- def get_ui_field_behaviour() -> dict[str, Any]:
224
+ @classmethod
225
+ def get_ui_field_behaviour(cls) -> dict[str, Any]:
226
226
  """Returns custom field behaviour."""
227
227
  return {
228
228
  "hidden_fields": ["host", "schema", "login", "password", "port", "extra"],
@@ -267,6 +267,8 @@ class GoogleBaseHook(BaseHook):
267
267
 
268
268
  if not self.impersonation_chain:
269
269
  self.impersonation_chain = self._get_field("impersonation_chain", None)
270
+ if isinstance(self.impersonation_chain, str) and "," in self.impersonation_chain:
271
+ self.impersonation_chain = [s.strip() for s in self.impersonation_chain.split(",")]
270
272
 
271
273
  target_principal, delegates = _get_target_principal_and_delegates(self.impersonation_chain)
272
274
 
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Google",
29
29
  "description": "Google services including:\n\n - `Google Ads <https://ads.google.com/>`__\n - `Google Cloud (GCP) <https://cloud.google.com/>`__\n - `Google Firebase <https://firebase.google.com/>`__\n - `Google LevelDB <https://github.com/google/leveldb/>`__\n - `Google Marketing Platform <https://marketingplatform.google.com/>`__\n - `Google Workspace <https://workspace.google.com/>`__ (formerly Google Suite)\n",
30
30
  "suspended": False,
31
- "source-date-epoch": 1700827465,
31
+ "source-date-epoch": 1701983383,
32
32
  "versions": [
33
+ "10.13.0",
33
34
  "10.12.0",
34
35
  "10.11.1",
35
36
  "10.11.0",
@@ -80,30 +81,30 @@ def get_provider_info():
80
81
  "1.0.0",
81
82
  ],
82
83
  "dependencies": [
83
- "apache-airflow>=2.5.0",
84
+ "apache-airflow>=2.6.0",
84
85
  "apache-airflow-providers-common-sql>=1.7.2",
85
86
  "asgiref>=3.5.2",
86
87
  "gcloud-aio-auth>=4.0.0,<5.0.0",
87
88
  "gcloud-aio-bigquery>=6.1.2",
88
89
  "gcloud-aio-storage",
89
- "gcsfs>=2023.9.2",
90
+ "gcsfs>=2023.10.0",
90
91
  "google-ads>=22.1.0",
91
92
  "google-api-core>=2.11.0",
92
93
  "google-api-python-client>=1.6.0",
93
94
  "google-auth>=1.0.0",
94
95
  "google-auth-httplib2>=0.0.1",
95
96
  "google-cloud-aiplatform>=1.22.1",
96
- "google-cloud-automl>=2.11.0",
97
- "google-cloud-bigquery-datatransfer>=3.11.0",
97
+ "google-cloud-automl>=2.12.0",
98
+ "google-cloud-bigquery-datatransfer>=3.13.0",
98
99
  "google-cloud-bigtable>=2.17.0",
99
- "google-cloud-build>=3.13.0",
100
+ "google-cloud-build>=3.22.0",
100
101
  "google-cloud-compute>=1.10.0",
101
102
  "google-cloud-container>=2.17.4",
102
103
  "google-cloud-datacatalog>=3.11.1",
103
- "google-cloud-dataflow-client>=0.8.2",
104
+ "google-cloud-dataflow-client>=0.8.6",
104
105
  "google-cloud-dataform>=0.5.0",
105
- "google-cloud-dataplex>=1.4.2",
106
- "google-cloud-dataproc>=5.5.0",
106
+ "google-cloud-dataplex>=1.10.0",
107
+ "google-cloud-dataproc>=5.8.0",
107
108
  "google-cloud-dataproc-metastore>=1.12.0",
108
109
  "google-cloud-dlp>=3.12.0",
109
110
  "google-cloud-kms>=2.15.0",
@@ -111,7 +112,7 @@ def get_provider_info():
111
112
  "google-cloud-logging>=3.5.0",
112
113
  "google-cloud-memcache>=1.7.0",
113
114
  "google-cloud-monitoring>=2.14.1",
114
- "google-cloud-orchestration-airflow>=1.7.0",
115
+ "google-cloud-orchestration-airflow>=1.10.0",
115
116
  "google-cloud-os-login>=2.9.1",
116
117
  "google-cloud-pubsub>=2.15.0",
117
118
  "google-cloud-redis>=2.12.0",