apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/cloud/hooks/automl.py +1 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +64 -33
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  19. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  20. airflow/providers/google/cloud/operators/automl.py +13 -12
  21. airflow/providers/google/cloud/operators/bigquery.py +36 -22
  22. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  23. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  24. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  25. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  26. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  27. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  28. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  29. airflow/providers/google/cloud/operators/compute.py +12 -11
  30. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  31. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  32. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  33. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  34. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  35. airflow/providers/google/cloud/operators/dataproc.py +19 -16
  36. airflow/providers/google/cloud/operators/datastore.py +8 -7
  37. airflow/providers/google/cloud/operators/dlp.py +31 -30
  38. airflow/providers/google/cloud/operators/functions.py +4 -3
  39. airflow/providers/google/cloud/operators/gcs.py +66 -41
  40. airflow/providers/google/cloud/operators/kubernetes_engine.py +232 -12
  41. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  42. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  43. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  44. airflow/providers/google/cloud/operators/spanner.py +7 -6
  45. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  46. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  47. airflow/providers/google/cloud/operators/tasks.py +14 -13
  48. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  49. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  51. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  52. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  53. airflow/providers/google/cloud/operators/vision.py +13 -12
  54. airflow/providers/google/cloud/operators/workflows.py +10 -9
  55. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  56. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  58. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  59. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  60. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  61. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  62. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  63. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  64. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  65. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  66. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  67. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  71. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  72. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  73. airflow/providers/google/cloud/triggers/bigquery.py +14 -3
  74. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  75. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  76. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  77. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  78. airflow/providers/google/cloud/triggers/dataproc.py +110 -26
  79. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  80. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  81. airflow/providers/google/common/hooks/base_google.py +45 -7
  82. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  83. airflow/providers/google/firebase/operators/firestore.py +2 -1
  84. airflow/providers/google/get_provider_info.py +3 -2
  85. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/METADATA +8 -8
  86. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/RECORD +88 -89
  87. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/WHEEL +0 -0
  89. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/entry_points.txt +0 -0
@@ -25,10 +25,12 @@ import time
25
25
  from typing import Any, AsyncIterator, Sequence
26
26
 
27
27
  from google.api_core.exceptions import NotFound
28
- from google.cloud.dataproc_v1 import Batch, ClusterStatus, JobStatus
28
+ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
29
29
 
30
- from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook
30
+ from airflow.exceptions import AirflowException
31
+ from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook, DataprocHook
31
32
  from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
33
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
32
34
  from airflow.triggers.base import BaseTrigger, TriggerEvent
33
35
 
34
36
 
@@ -38,10 +40,12 @@ class DataprocBaseTrigger(BaseTrigger):
38
40
  def __init__(
39
41
  self,
40
42
  region: str,
41
- project_id: str | None = None,
43
+ project_id: str = PROVIDE_PROJECT_ID,
42
44
  gcp_conn_id: str = "google_cloud_default",
43
45
  impersonation_chain: str | Sequence[str] | None = None,
44
46
  polling_interval_seconds: int = 30,
47
+ cancel_on_kill: bool = True,
48
+ delete_on_error: bool = True,
45
49
  ):
46
50
  super().__init__()
47
51
  self.region = region
@@ -49,6 +53,8 @@ class DataprocBaseTrigger(BaseTrigger):
49
53
  self.gcp_conn_id = gcp_conn_id
50
54
  self.impersonation_chain = impersonation_chain
51
55
  self.polling_interval_seconds = polling_interval_seconds
56
+ self.cancel_on_kill = cancel_on_kill
57
+ self.delete_on_error = delete_on_error
52
58
 
53
59
  def get_async_hook(self):
54
60
  return DataprocAsyncHook(
@@ -56,6 +62,16 @@ class DataprocBaseTrigger(BaseTrigger):
56
62
  impersonation_chain=self.impersonation_chain,
57
63
  )
58
64
 
65
+ def get_sync_hook(self):
66
+ # The synchronous hook is utilized to delete the cluster when a task is cancelled.
67
+ # This is because the asynchronous hook deletion is not awaited when the trigger task
68
+ # is cancelled. The call for deleting the cluster or job through the sync hook is not a blocking
69
+ # call, which means it does not wait until the cluster or job is deleted.
70
+ return DataprocHook(
71
+ gcp_conn_id=self.gcp_conn_id,
72
+ impersonation_chain=self.impersonation_chain,
73
+ )
74
+
59
75
 
60
76
  class DataprocSubmitTrigger(DataprocBaseTrigger):
61
77
  """
@@ -90,20 +106,39 @@ class DataprocSubmitTrigger(DataprocBaseTrigger):
90
106
  "gcp_conn_id": self.gcp_conn_id,
91
107
  "impersonation_chain": self.impersonation_chain,
92
108
  "polling_interval_seconds": self.polling_interval_seconds,
109
+ "cancel_on_kill": self.cancel_on_kill,
93
110
  },
94
111
  )
95
112
 
96
113
  async def run(self):
97
- while True:
98
- job = await self.get_async_hook().get_job(
99
- project_id=self.project_id, region=self.region, job_id=self.job_id
100
- )
101
- state = job.status.state
102
- self.log.info("Dataproc job: %s is in state: %s", self.job_id, state)
103
- if state in (JobStatus.State.DONE, JobStatus.State.CANCELLED, JobStatus.State.ERROR):
104
- break
105
- await asyncio.sleep(self.polling_interval_seconds)
106
- yield TriggerEvent({"job_id": self.job_id, "job_state": state, "job": job})
114
+ try:
115
+ while True:
116
+ job = await self.get_async_hook().get_job(
117
+ project_id=self.project_id, region=self.region, job_id=self.job_id
118
+ )
119
+ state = job.status.state
120
+ self.log.info("Dataproc job: %s is in state: %s", self.job_id, state)
121
+ if state in (JobStatus.State.DONE, JobStatus.State.CANCELLED, JobStatus.State.ERROR):
122
+ break
123
+ await asyncio.sleep(self.polling_interval_seconds)
124
+ yield TriggerEvent({"job_id": self.job_id, "job_state": state, "job": job})
125
+ except asyncio.CancelledError:
126
+ self.log.info("Task got cancelled.")
127
+ try:
128
+ if self.job_id and self.cancel_on_kill:
129
+ self.log.info("Cancelling the job: %s", self.job_id)
130
+ # The synchronous hook is utilized to delete the cluster when a task is cancelled. This
131
+ # is because the asynchronous hook deletion is not awaited when the trigger task is
132
+ # cancelled. The call for deleting the cluster or job through the sync hook is not a
133
+ # blocking call, which means it does not wait until the cluster or job is deleted.
134
+ self.get_sync_hook().cancel_job(
135
+ job_id=self.job_id, project_id=self.project_id, region=self.region
136
+ )
137
+ self.log.info("Job: %s is cancelled", self.job_id)
138
+ yield TriggerEvent({"job_id": self.job_id, "job_state": ClusterStatus.State.DELETING})
139
+ except Exception as e:
140
+ self.log.error("Failed to cancel the job: %s with error : %s", self.job_id, str(e))
141
+ raise e
107
142
 
108
143
 
109
144
  class DataprocClusterTrigger(DataprocBaseTrigger):
@@ -139,24 +174,73 @@ class DataprocClusterTrigger(DataprocBaseTrigger):
139
174
  "gcp_conn_id": self.gcp_conn_id,
140
175
  "impersonation_chain": self.impersonation_chain,
141
176
  "polling_interval_seconds": self.polling_interval_seconds,
177
+ "delete_on_error": self.delete_on_error,
142
178
  },
143
179
  )
144
180
 
145
181
  async def run(self) -> AsyncIterator[TriggerEvent]:
146
- while True:
147
- cluster = await self.get_async_hook().get_cluster(
148
- project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
182
+ try:
183
+ while True:
184
+ cluster = await self.fetch_cluster()
185
+ state = cluster.status.state
186
+ if state == ClusterStatus.State.ERROR:
187
+ await self.delete_when_error_occurred(cluster)
188
+ yield TriggerEvent(
189
+ {
190
+ "cluster_name": self.cluster_name,
191
+ "cluster_state": ClusterStatus.State.DELETING,
192
+ "cluster": cluster,
193
+ }
194
+ )
195
+ return
196
+ elif state == ClusterStatus.State.RUNNING:
197
+ yield TriggerEvent(
198
+ {
199
+ "cluster_name": self.cluster_name,
200
+ "cluster_state": state,
201
+ "cluster": cluster,
202
+ }
203
+ )
204
+ return
205
+ self.log.info("Current state is %s", state)
206
+ self.log.info("Sleeping for %s seconds.", self.polling_interval_seconds)
207
+ await asyncio.sleep(self.polling_interval_seconds)
208
+ except asyncio.CancelledError:
209
+ try:
210
+ if self.delete_on_error:
211
+ self.log.info("Deleting cluster %s.", self.cluster_name)
212
+ # The synchronous hook is utilized to delete the cluster when a task is cancelled.
213
+ # This is because the asynchronous hook deletion is not awaited when the trigger task
214
+ # is cancelled. The call for deleting the cluster through the sync hook is not a blocking
215
+ # call, which means it does not wait until the cluster is deleted.
216
+ self.get_sync_hook().delete_cluster(
217
+ region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
218
+ )
219
+ self.log.info("Deleted cluster %s during cancellation.", self.cluster_name)
220
+ except Exception as e:
221
+ self.log.error("Error during cancellation handling: %s", e)
222
+ raise AirflowException("Error during cancellation handling: %s", e)
223
+
224
+ async def fetch_cluster(self) -> Cluster:
225
+ """Fetch the cluster status."""
226
+ return await self.get_async_hook().get_cluster(
227
+ project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
228
+ )
229
+
230
+ async def delete_when_error_occurred(self, cluster: Cluster) -> None:
231
+ """
232
+ Delete the cluster on error.
233
+
234
+ :param cluster: The cluster to delete.
235
+ """
236
+ if self.delete_on_error:
237
+ self.log.info("Deleting cluster %s.", self.cluster_name)
238
+ await self.get_async_hook().delete_cluster(
239
+ region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
149
240
  )
150
- state = cluster.status.state
151
- self.log.info("Dataproc cluster: %s is in state: %s", self.cluster_name, state)
152
- if state in (
153
- ClusterStatus.State.ERROR,
154
- ClusterStatus.State.RUNNING,
155
- ):
156
- break
157
- self.log.info("Sleeping for %s seconds.", self.polling_interval_seconds)
158
- await asyncio.sleep(self.polling_interval_seconds)
159
- yield TriggerEvent({"cluster_name": self.cluster_name, "cluster_state": state, "cluster": cluster})
241
+ self.log.info("Cluster %s has been deleted.", self.cluster_name)
242
+ else:
243
+ self.log.info("Cluster %s is not deleted as delete_on_error is set to False.", self.cluster_name)
160
244
 
161
245
 
162
246
  class DataprocBatchTrigger(DataprocBaseTrigger):
@@ -20,6 +20,7 @@ import asyncio
20
20
  from typing import Any, AsyncIterator, Sequence
21
21
 
22
22
  from airflow.providers.google.cloud.hooks.mlengine import MLEngineAsyncHook
23
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
23
24
  from airflow.triggers.base import BaseTrigger, TriggerEvent
24
25
 
25
26
 
@@ -45,7 +46,7 @@ class MLEngineStartTrainingJobTrigger(BaseTrigger):
45
46
  runtime_version: str | None = None,
46
47
  python_version: str | None = None,
47
48
  job_dir: str | None = None,
48
- project_id: str | None = None,
49
+ project_id: str = PROVIDE_PROJECT_ID,
49
50
  labels: dict[str, str] | None = None,
50
51
  gcp_conn_id: str = "google_cloud_default",
51
52
  impersonation_chain: str | Sequence[str] | None = None,
@@ -29,6 +29,7 @@ from google.cloud.aiplatform_v1 import (
29
29
 
30
30
  from airflow.exceptions import AirflowException
31
31
  from airflow.providers.google.cloud.hooks.vertex_ai.batch_prediction_job import BatchPredictionJobAsyncHook
32
+ from airflow.providers.google.cloud.hooks.vertex_ai.custom_job import CustomJobAsyncHook
32
33
  from airflow.providers.google.cloud.hooks.vertex_ai.hyperparameter_tuning_job import (
33
34
  HyperparameterTuningJobAsyncHook,
34
35
  )
@@ -189,3 +190,96 @@ class RunPipelineJobTrigger(BaseVertexAIJobTrigger):
189
190
  poll_interval=self.poll_interval,
190
191
  )
191
192
  return job
193
+
194
+
195
+ class CustomTrainingJobTrigger(BaseVertexAIJobTrigger):
196
+ """
197
+ Make async calls to Vertex AI to check the state of a running custom training job.
198
+
199
+ Return the job when it enters a completed state.
200
+ """
201
+
202
+ job_type_verbose_name = "Custom Training Job"
203
+ job_serializer_class = types.TrainingPipeline
204
+ statuses_success = {
205
+ PipelineState.PIPELINE_STATE_PAUSED,
206
+ PipelineState.PIPELINE_STATE_SUCCEEDED,
207
+ }
208
+
209
+ @cached_property
210
+ def async_hook(self) -> CustomJobAsyncHook:
211
+ return CustomJobAsyncHook(
212
+ gcp_conn_id=self.conn_id,
213
+ impersonation_chain=self.impersonation_chain,
214
+ )
215
+
216
+ async def _wait_job(self) -> types.TrainingPipeline:
217
+ pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
218
+ project_id=self.project_id,
219
+ location=self.location,
220
+ pipeline_id=self.job_id,
221
+ poll_interval=self.poll_interval,
222
+ )
223
+ return pipeline
224
+
225
+
226
+ class CustomContainerTrainingJobTrigger(BaseVertexAIJobTrigger):
227
+ """
228
+ Make async calls to Vertex AI to check the state of a running custom container training job.
229
+
230
+ Return the job when it enters a completed state.
231
+ """
232
+
233
+ job_type_verbose_name = "Custom Container Training Job"
234
+ job_serializer_class = types.TrainingPipeline
235
+ statuses_success = {
236
+ PipelineState.PIPELINE_STATE_PAUSED,
237
+ PipelineState.PIPELINE_STATE_SUCCEEDED,
238
+ }
239
+
240
+ @cached_property
241
+ def async_hook(self) -> CustomJobAsyncHook:
242
+ return CustomJobAsyncHook(
243
+ gcp_conn_id=self.conn_id,
244
+ impersonation_chain=self.impersonation_chain,
245
+ )
246
+
247
+ async def _wait_job(self) -> types.TrainingPipeline:
248
+ pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
249
+ project_id=self.project_id,
250
+ location=self.location,
251
+ pipeline_id=self.job_id,
252
+ poll_interval=self.poll_interval,
253
+ )
254
+ return pipeline
255
+
256
+
257
+ class CustomPythonPackageTrainingJobTrigger(BaseVertexAIJobTrigger):
258
+ """
259
+ Make async calls to Vertex AI to check the state of a running custom python package training job.
260
+
261
+ Return the job when it enters a completed state.
262
+ """
263
+
264
+ job_type_verbose_name = "Custom Python Package Training Job"
265
+ job_serializer_class = types.TrainingPipeline
266
+ statuses_success = {
267
+ PipelineState.PIPELINE_STATE_PAUSED,
268
+ PipelineState.PIPELINE_STATE_SUCCEEDED,
269
+ }
270
+
271
+ @cached_property
272
+ def async_hook(self) -> CustomJobAsyncHook:
273
+ return CustomJobAsyncHook(
274
+ gcp_conn_id=self.conn_id,
275
+ impersonation_chain=self.impersonation_chain,
276
+ )
277
+
278
+ async def _wait_job(self) -> types.TrainingPipeline:
279
+ pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
280
+ project_id=self.project_id,
281
+ location=self.location,
282
+ pipeline_id=self.job_id,
283
+ poll_interval=self.poll_interval,
284
+ )
285
+ return pipeline
@@ -114,6 +114,19 @@ def is_operation_in_progress_exception(exception: Exception) -> bool:
114
114
  return False
115
115
 
116
116
 
117
+ def is_refresh_credentials_exception(exception: Exception) -> bool:
118
+ """
119
+ Handle refresh credentials exceptions.
120
+
121
+ Some calls return 502 (server error) in case a new token cannot be obtained.
122
+
123
+ * Google BigQuery
124
+ """
125
+ if isinstance(exception, RefreshError):
126
+ return "Unable to acquire impersonated credentials" in str(exception)
127
+ return False
128
+
129
+
117
130
  class retry_if_temporary_quota(tenacity.retry_if_exception):
118
131
  """Retries if there was an exception for exceeding the temporary quote limit."""
119
132
 
@@ -122,12 +135,19 @@ class retry_if_temporary_quota(tenacity.retry_if_exception):
122
135
 
123
136
 
124
137
  class retry_if_operation_in_progress(tenacity.retry_if_exception):
125
- """Retries if there was an exception for exceeding the temporary quote limit."""
138
+ """Retries if there was an exception in case of operation in progress."""
126
139
 
127
140
  def __init__(self):
128
141
  super().__init__(is_operation_in_progress_exception)
129
142
 
130
143
 
144
+ class retry_if_temporary_refresh_credentials(tenacity.retry_if_exception):
145
+ """Retries if there was an exception for refreshing credentials."""
146
+
147
+ def __init__(self):
148
+ super().__init__(is_refresh_credentials_exception)
149
+
150
+
131
151
  # A fake project_id to use in functions decorated by fallback_to_default_project_id
132
152
  # This allows the 'project_id' argument to be of type str instead of str | None,
133
153
  # making it easier to type hint the function body without dealing with the None
@@ -364,14 +384,14 @@ class GoogleBaseHook(BaseHook):
364
384
  return hasattr(self, "extras") and get_field(self.extras, f) or default
365
385
 
366
386
  @property
367
- def project_id(self) -> str | None:
387
+ def project_id(self) -> str:
368
388
  """
369
389
  Returns project id.
370
390
 
371
391
  :return: id of the project
372
392
  """
373
393
  _, project_id = self.get_credentials_and_project_id()
374
- return project_id
394
+ return project_id or PROVIDE_PROJECT_ID
375
395
 
376
396
  @property
377
397
  def num_retries(self) -> int:
@@ -426,7 +446,7 @@ class GoogleBaseHook(BaseHook):
426
446
  def quota_retry(*args, **kwargs) -> Callable:
427
447
  """Provide a mechanism to repeat requests in response to exceeding a temporary quota limit."""
428
448
 
429
- def decorator(fun: Callable):
449
+ def decorator(func: Callable):
430
450
  default_kwargs = {
431
451
  "wait": tenacity.wait_exponential(multiplier=1, max=100),
432
452
  "retry": retry_if_temporary_quota(),
@@ -434,7 +454,7 @@ class GoogleBaseHook(BaseHook):
434
454
  "after": tenacity.after_log(log, logging.DEBUG),
435
455
  }
436
456
  default_kwargs.update(**kwargs)
437
- return tenacity.retry(*args, **default_kwargs)(fun)
457
+ return tenacity.retry(*args, **default_kwargs)(func)
438
458
 
439
459
  return decorator
440
460
 
@@ -442,7 +462,7 @@ class GoogleBaseHook(BaseHook):
442
462
  def operation_in_progress_retry(*args, **kwargs) -> Callable[[T], T]:
443
463
  """Provide a mechanism to repeat requests in response to operation in progress (HTTP 409) limit."""
444
464
 
445
- def decorator(fun: T):
465
+ def decorator(func: T):
446
466
  default_kwargs = {
447
467
  "wait": tenacity.wait_exponential(multiplier=1, max=300),
448
468
  "retry": retry_if_operation_in_progress(),
@@ -450,7 +470,25 @@ class GoogleBaseHook(BaseHook):
450
470
  "after": tenacity.after_log(log, logging.DEBUG),
451
471
  }
452
472
  default_kwargs.update(**kwargs)
453
- return cast(T, tenacity.retry(*args, **default_kwargs)(fun))
473
+ return cast(T, tenacity.retry(*args, **default_kwargs)(func))
474
+
475
+ return decorator
476
+
477
+ @staticmethod
478
+ def refresh_credentials_retry(*args, **kwargs) -> Callable[[T], T]:
479
+ """Provide a mechanism to repeat requests in response to a temporary refresh credential issue."""
480
+
481
+ def decorator(func: T):
482
+ default_kwargs = {
483
+ "wait": tenacity.wait_exponential(multiplier=1, max=5),
484
+ "stop": tenacity.stop_after_attempt(3),
485
+ "retry": retry_if_temporary_refresh_credentials(),
486
+ "reraise": True,
487
+ "before": tenacity.before_log(log, logging.DEBUG),
488
+ "after": tenacity.after_log(log, logging.DEBUG),
489
+ }
490
+ default_kwargs.update(**kwargs)
491
+ return cast(T, tenacity.retry(*args, **default_kwargs)(func))
454
492
 
455
493
  return decorator
456
494
 
@@ -25,7 +25,7 @@ from typing import Sequence
25
25
  from googleapiclient.discovery import build, build_from_document
26
26
 
27
27
  from airflow.exceptions import AirflowException
28
- from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
28
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
29
29
 
30
30
  # Time to sleep between active checks of the operation results
31
31
  TIME_TO_SLEEP_IN_SECONDS = 5
@@ -84,7 +84,7 @@ class CloudFirestoreHook(GoogleBaseHook):
84
84
 
85
85
  @GoogleBaseHook.fallback_to_default_project_id
86
86
  def export_documents(
87
- self, body: dict, database_id: str = "(default)", project_id: str | None = None
87
+ self, body: dict, database_id: str = "(default)", project_id: str = PROVIDE_PROJECT_ID
88
88
  ) -> None:
89
89
  """
90
90
  Start a export with the specified configuration.
@@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Sequence
20
20
 
21
21
  from airflow.exceptions import AirflowException
22
22
  from airflow.models import BaseOperator
23
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
23
24
  from airflow.providers.google.firebase.hooks.firestore import CloudFirestoreHook
24
25
 
25
26
  if TYPE_CHECKING:
@@ -64,7 +65,7 @@ class CloudFirestoreExportDatabaseOperator(BaseOperator):
64
65
  *,
65
66
  body: dict,
66
67
  database_id: str = "(default)",
67
- project_id: str | None = None,
68
+ project_id: str = PROVIDE_PROJECT_ID,
68
69
  gcp_conn_id: str = "google_cloud_default",
69
70
  api_version: str = "v1",
70
71
  impersonation_chain: str | Sequence[str] | None = None,
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "Google",
29
29
  "description": "Google services including:\n\n - `Google Ads <https://ads.google.com/>`__\n - `Google Cloud (GCP) <https://cloud.google.com/>`__\n - `Google Firebase <https://firebase.google.com/>`__\n - `Google LevelDB <https://github.com/google/leveldb/>`__\n - `Google Marketing Platform <https://marketingplatform.google.com/>`__\n - `Google Workspace <https://workspace.google.com/>`__ (formerly Google Suite)\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1712665855,
31
+ "source-date-epoch": 1714476421,
32
32
  "versions": [
33
+ "10.18.0",
33
34
  "10.17.0",
34
35
  "10.16.0",
35
36
  "10.15.0",
@@ -86,7 +87,7 @@ def get_provider_info():
86
87
  "1.0.0",
87
88
  ],
88
89
  "dependencies": [
89
- "apache-airflow>=2.6.0",
90
+ "apache-airflow>=2.7.0",
90
91
  "apache-airflow-providers-common-sql>=1.7.2",
91
92
  "asgiref>=3.5.2",
92
93
  "gcloud-aio-auth>=4.0.0,<5.0.0",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-google
3
- Version: 10.17.0rc1
3
+ Version: 10.18.0rc1
4
4
  Summary: Provider package apache-airflow-providers-google for Apache Airflow
5
5
  Keywords: airflow-provider,google,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Topic :: System :: Monitoring
24
24
  Requires-Dist: PyOpenSSL
25
25
  Requires-Dist: apache-airflow-providers-common-sql>=1.7.2rc0
26
- Requires-Dist: apache-airflow>=2.6.0rc0
26
+ Requires-Dist: apache-airflow>=2.7.0rc0
27
27
  Requires-Dist: asgiref>=3.5.2
28
28
  Requires-Dist: gcloud-aio-auth>=4.0.0,<5.0.0
29
29
  Requires-Dist: gcloud-aio-bigquery>=6.1.2
@@ -102,8 +102,8 @@ Requires-Dist: apache-airflow-providers-sftp ; extra == "sftp"
102
102
  Requires-Dist: apache-airflow-providers-ssh ; extra == "ssh"
103
103
  Requires-Dist: apache-airflow-providers-trino ; extra == "trino"
104
104
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
105
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-google/10.17.0/changelog.html
106
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-google/10.17.0
105
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/changelog.html
106
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0
107
107
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
108
108
  Project-URL: Source Code, https://github.com/apache/airflow
109
109
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -171,7 +171,7 @@ Provides-Extra: trino
171
171
 
172
172
  Package ``apache-airflow-providers-google``
173
173
 
174
- Release: ``10.17.0.rc1``
174
+ Release: ``10.18.0.rc1``
175
175
 
176
176
 
177
177
  Google services including:
@@ -191,7 +191,7 @@ This is a provider package for ``google`` provider. All classes for this provide
191
191
  are in ``airflow.providers.google`` python package.
192
192
 
193
193
  You can find package information and changelog for the provider
194
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-google/10.17.0/>`_.
194
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/>`_.
195
195
 
196
196
  Installation
197
197
  ------------
@@ -208,7 +208,7 @@ Requirements
208
208
  ======================================= =====================
209
209
  PIP package Version required
210
210
  ======================================= =====================
211
- ``apache-airflow`` ``>=2.6.0``
211
+ ``apache-airflow`` ``>=2.7.0``
212
212
  ``apache-airflow-providers-common-sql`` ``>=1.7.2``
213
213
  ``asgiref`` ``>=3.5.2``
214
214
  ``gcloud-aio-auth`` ``>=4.0.0,<5.0.0``
@@ -307,4 +307,4 @@ Dependent package
307
307
  ======================================================================================================================== ====================
308
308
 
309
309
  The changelog for the provider package can be found in the
310
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-google/10.17.0/changelog.html>`_.
310
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/changelog.html>`_.