apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/cloud/hooks/automl.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +64 -33
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +13 -12
- airflow/providers/google/cloud/operators/bigquery.py +36 -22
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +19 -16
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +232 -12
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +10 -9
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +14 -3
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +110 -26
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +3 -2
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/METADATA +8 -8
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/RECORD +88 -89
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/entry_points.txt +0 -0
@@ -25,10 +25,12 @@ import time
|
|
25
25
|
from typing import Any, AsyncIterator, Sequence
|
26
26
|
|
27
27
|
from google.api_core.exceptions import NotFound
|
28
|
-
from google.cloud.dataproc_v1 import Batch, ClusterStatus, JobStatus
|
28
|
+
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
29
29
|
|
30
|
-
from airflow.
|
30
|
+
from airflow.exceptions import AirflowException
|
31
|
+
from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook, DataprocHook
|
31
32
|
from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
|
33
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
32
34
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
33
35
|
|
34
36
|
|
@@ -38,10 +40,12 @@ class DataprocBaseTrigger(BaseTrigger):
|
|
38
40
|
def __init__(
|
39
41
|
self,
|
40
42
|
region: str,
|
41
|
-
project_id: str
|
43
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
42
44
|
gcp_conn_id: str = "google_cloud_default",
|
43
45
|
impersonation_chain: str | Sequence[str] | None = None,
|
44
46
|
polling_interval_seconds: int = 30,
|
47
|
+
cancel_on_kill: bool = True,
|
48
|
+
delete_on_error: bool = True,
|
45
49
|
):
|
46
50
|
super().__init__()
|
47
51
|
self.region = region
|
@@ -49,6 +53,8 @@ class DataprocBaseTrigger(BaseTrigger):
|
|
49
53
|
self.gcp_conn_id = gcp_conn_id
|
50
54
|
self.impersonation_chain = impersonation_chain
|
51
55
|
self.polling_interval_seconds = polling_interval_seconds
|
56
|
+
self.cancel_on_kill = cancel_on_kill
|
57
|
+
self.delete_on_error = delete_on_error
|
52
58
|
|
53
59
|
def get_async_hook(self):
|
54
60
|
return DataprocAsyncHook(
|
@@ -56,6 +62,16 @@ class DataprocBaseTrigger(BaseTrigger):
|
|
56
62
|
impersonation_chain=self.impersonation_chain,
|
57
63
|
)
|
58
64
|
|
65
|
+
def get_sync_hook(self):
|
66
|
+
# The synchronous hook is utilized to delete the cluster when a task is cancelled.
|
67
|
+
# This is because the asynchronous hook deletion is not awaited when the trigger task
|
68
|
+
# is cancelled. The call for deleting the cluster or job through the sync hook is not a blocking
|
69
|
+
# call, which means it does not wait until the cluster or job is deleted.
|
70
|
+
return DataprocHook(
|
71
|
+
gcp_conn_id=self.gcp_conn_id,
|
72
|
+
impersonation_chain=self.impersonation_chain,
|
73
|
+
)
|
74
|
+
|
59
75
|
|
60
76
|
class DataprocSubmitTrigger(DataprocBaseTrigger):
|
61
77
|
"""
|
@@ -90,20 +106,39 @@ class DataprocSubmitTrigger(DataprocBaseTrigger):
|
|
90
106
|
"gcp_conn_id": self.gcp_conn_id,
|
91
107
|
"impersonation_chain": self.impersonation_chain,
|
92
108
|
"polling_interval_seconds": self.polling_interval_seconds,
|
109
|
+
"cancel_on_kill": self.cancel_on_kill,
|
93
110
|
},
|
94
111
|
)
|
95
112
|
|
96
113
|
async def run(self):
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
114
|
+
try:
|
115
|
+
while True:
|
116
|
+
job = await self.get_async_hook().get_job(
|
117
|
+
project_id=self.project_id, region=self.region, job_id=self.job_id
|
118
|
+
)
|
119
|
+
state = job.status.state
|
120
|
+
self.log.info("Dataproc job: %s is in state: %s", self.job_id, state)
|
121
|
+
if state in (JobStatus.State.DONE, JobStatus.State.CANCELLED, JobStatus.State.ERROR):
|
122
|
+
break
|
123
|
+
await asyncio.sleep(self.polling_interval_seconds)
|
124
|
+
yield TriggerEvent({"job_id": self.job_id, "job_state": state, "job": job})
|
125
|
+
except asyncio.CancelledError:
|
126
|
+
self.log.info("Task got cancelled.")
|
127
|
+
try:
|
128
|
+
if self.job_id and self.cancel_on_kill:
|
129
|
+
self.log.info("Cancelling the job: %s", self.job_id)
|
130
|
+
# The synchronous hook is utilized to delete the cluster when a task is cancelled. This
|
131
|
+
# is because the asynchronous hook deletion is not awaited when the trigger task is
|
132
|
+
# cancelled. The call for deleting the cluster or job through the sync hook is not a
|
133
|
+
# blocking call, which means it does not wait until the cluster or job is deleted.
|
134
|
+
self.get_sync_hook().cancel_job(
|
135
|
+
job_id=self.job_id, project_id=self.project_id, region=self.region
|
136
|
+
)
|
137
|
+
self.log.info("Job: %s is cancelled", self.job_id)
|
138
|
+
yield TriggerEvent({"job_id": self.job_id, "job_state": ClusterStatus.State.DELETING})
|
139
|
+
except Exception as e:
|
140
|
+
self.log.error("Failed to cancel the job: %s with error : %s", self.job_id, str(e))
|
141
|
+
raise e
|
107
142
|
|
108
143
|
|
109
144
|
class DataprocClusterTrigger(DataprocBaseTrigger):
|
@@ -139,24 +174,73 @@ class DataprocClusterTrigger(DataprocBaseTrigger):
|
|
139
174
|
"gcp_conn_id": self.gcp_conn_id,
|
140
175
|
"impersonation_chain": self.impersonation_chain,
|
141
176
|
"polling_interval_seconds": self.polling_interval_seconds,
|
177
|
+
"delete_on_error": self.delete_on_error,
|
142
178
|
},
|
143
179
|
)
|
144
180
|
|
145
181
|
async def run(self) -> AsyncIterator[TriggerEvent]:
|
146
|
-
|
147
|
-
|
148
|
-
|
182
|
+
try:
|
183
|
+
while True:
|
184
|
+
cluster = await self.fetch_cluster()
|
185
|
+
state = cluster.status.state
|
186
|
+
if state == ClusterStatus.State.ERROR:
|
187
|
+
await self.delete_when_error_occurred(cluster)
|
188
|
+
yield TriggerEvent(
|
189
|
+
{
|
190
|
+
"cluster_name": self.cluster_name,
|
191
|
+
"cluster_state": ClusterStatus.State.DELETING,
|
192
|
+
"cluster": cluster,
|
193
|
+
}
|
194
|
+
)
|
195
|
+
return
|
196
|
+
elif state == ClusterStatus.State.RUNNING:
|
197
|
+
yield TriggerEvent(
|
198
|
+
{
|
199
|
+
"cluster_name": self.cluster_name,
|
200
|
+
"cluster_state": state,
|
201
|
+
"cluster": cluster,
|
202
|
+
}
|
203
|
+
)
|
204
|
+
return
|
205
|
+
self.log.info("Current state is %s", state)
|
206
|
+
self.log.info("Sleeping for %s seconds.", self.polling_interval_seconds)
|
207
|
+
await asyncio.sleep(self.polling_interval_seconds)
|
208
|
+
except asyncio.CancelledError:
|
209
|
+
try:
|
210
|
+
if self.delete_on_error:
|
211
|
+
self.log.info("Deleting cluster %s.", self.cluster_name)
|
212
|
+
# The synchronous hook is utilized to delete the cluster when a task is cancelled.
|
213
|
+
# This is because the asynchronous hook deletion is not awaited when the trigger task
|
214
|
+
# is cancelled. The call for deleting the cluster through the sync hook is not a blocking
|
215
|
+
# call, which means it does not wait until the cluster is deleted.
|
216
|
+
self.get_sync_hook().delete_cluster(
|
217
|
+
region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
|
218
|
+
)
|
219
|
+
self.log.info("Deleted cluster %s during cancellation.", self.cluster_name)
|
220
|
+
except Exception as e:
|
221
|
+
self.log.error("Error during cancellation handling: %s", e)
|
222
|
+
raise AirflowException("Error during cancellation handling: %s", e)
|
223
|
+
|
224
|
+
async def fetch_cluster(self) -> Cluster:
|
225
|
+
"""Fetch the cluster status."""
|
226
|
+
return await self.get_async_hook().get_cluster(
|
227
|
+
project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
|
228
|
+
)
|
229
|
+
|
230
|
+
async def delete_when_error_occurred(self, cluster: Cluster) -> None:
|
231
|
+
"""
|
232
|
+
Delete the cluster on error.
|
233
|
+
|
234
|
+
:param cluster: The cluster to delete.
|
235
|
+
"""
|
236
|
+
if self.delete_on_error:
|
237
|
+
self.log.info("Deleting cluster %s.", self.cluster_name)
|
238
|
+
await self.get_async_hook().delete_cluster(
|
239
|
+
region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
|
149
240
|
)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
ClusterStatus.State.ERROR,
|
154
|
-
ClusterStatus.State.RUNNING,
|
155
|
-
):
|
156
|
-
break
|
157
|
-
self.log.info("Sleeping for %s seconds.", self.polling_interval_seconds)
|
158
|
-
await asyncio.sleep(self.polling_interval_seconds)
|
159
|
-
yield TriggerEvent({"cluster_name": self.cluster_name, "cluster_state": state, "cluster": cluster})
|
241
|
+
self.log.info("Cluster %s has been deleted.", self.cluster_name)
|
242
|
+
else:
|
243
|
+
self.log.info("Cluster %s is not deleted as delete_on_error is set to False.", self.cluster_name)
|
160
244
|
|
161
245
|
|
162
246
|
class DataprocBatchTrigger(DataprocBaseTrigger):
|
@@ -20,6 +20,7 @@ import asyncio
|
|
20
20
|
from typing import Any, AsyncIterator, Sequence
|
21
21
|
|
22
22
|
from airflow.providers.google.cloud.hooks.mlengine import MLEngineAsyncHook
|
23
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
23
24
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
24
25
|
|
25
26
|
|
@@ -45,7 +46,7 @@ class MLEngineStartTrainingJobTrigger(BaseTrigger):
|
|
45
46
|
runtime_version: str | None = None,
|
46
47
|
python_version: str | None = None,
|
47
48
|
job_dir: str | None = None,
|
48
|
-
project_id: str
|
49
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
49
50
|
labels: dict[str, str] | None = None,
|
50
51
|
gcp_conn_id: str = "google_cloud_default",
|
51
52
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -29,6 +29,7 @@ from google.cloud.aiplatform_v1 import (
|
|
29
29
|
|
30
30
|
from airflow.exceptions import AirflowException
|
31
31
|
from airflow.providers.google.cloud.hooks.vertex_ai.batch_prediction_job import BatchPredictionJobAsyncHook
|
32
|
+
from airflow.providers.google.cloud.hooks.vertex_ai.custom_job import CustomJobAsyncHook
|
32
33
|
from airflow.providers.google.cloud.hooks.vertex_ai.hyperparameter_tuning_job import (
|
33
34
|
HyperparameterTuningJobAsyncHook,
|
34
35
|
)
|
@@ -189,3 +190,96 @@ class RunPipelineJobTrigger(BaseVertexAIJobTrigger):
|
|
189
190
|
poll_interval=self.poll_interval,
|
190
191
|
)
|
191
192
|
return job
|
193
|
+
|
194
|
+
|
195
|
+
class CustomTrainingJobTrigger(BaseVertexAIJobTrigger):
|
196
|
+
"""
|
197
|
+
Make async calls to Vertex AI to check the state of a running custom training job.
|
198
|
+
|
199
|
+
Return the job when it enters a completed state.
|
200
|
+
"""
|
201
|
+
|
202
|
+
job_type_verbose_name = "Custom Training Job"
|
203
|
+
job_serializer_class = types.TrainingPipeline
|
204
|
+
statuses_success = {
|
205
|
+
PipelineState.PIPELINE_STATE_PAUSED,
|
206
|
+
PipelineState.PIPELINE_STATE_SUCCEEDED,
|
207
|
+
}
|
208
|
+
|
209
|
+
@cached_property
|
210
|
+
def async_hook(self) -> CustomJobAsyncHook:
|
211
|
+
return CustomJobAsyncHook(
|
212
|
+
gcp_conn_id=self.conn_id,
|
213
|
+
impersonation_chain=self.impersonation_chain,
|
214
|
+
)
|
215
|
+
|
216
|
+
async def _wait_job(self) -> types.TrainingPipeline:
|
217
|
+
pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
|
218
|
+
project_id=self.project_id,
|
219
|
+
location=self.location,
|
220
|
+
pipeline_id=self.job_id,
|
221
|
+
poll_interval=self.poll_interval,
|
222
|
+
)
|
223
|
+
return pipeline
|
224
|
+
|
225
|
+
|
226
|
+
class CustomContainerTrainingJobTrigger(BaseVertexAIJobTrigger):
|
227
|
+
"""
|
228
|
+
Make async calls to Vertex AI to check the state of a running custom container training job.
|
229
|
+
|
230
|
+
Return the job when it enters a completed state.
|
231
|
+
"""
|
232
|
+
|
233
|
+
job_type_verbose_name = "Custom Container Training Job"
|
234
|
+
job_serializer_class = types.TrainingPipeline
|
235
|
+
statuses_success = {
|
236
|
+
PipelineState.PIPELINE_STATE_PAUSED,
|
237
|
+
PipelineState.PIPELINE_STATE_SUCCEEDED,
|
238
|
+
}
|
239
|
+
|
240
|
+
@cached_property
|
241
|
+
def async_hook(self) -> CustomJobAsyncHook:
|
242
|
+
return CustomJobAsyncHook(
|
243
|
+
gcp_conn_id=self.conn_id,
|
244
|
+
impersonation_chain=self.impersonation_chain,
|
245
|
+
)
|
246
|
+
|
247
|
+
async def _wait_job(self) -> types.TrainingPipeline:
|
248
|
+
pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
|
249
|
+
project_id=self.project_id,
|
250
|
+
location=self.location,
|
251
|
+
pipeline_id=self.job_id,
|
252
|
+
poll_interval=self.poll_interval,
|
253
|
+
)
|
254
|
+
return pipeline
|
255
|
+
|
256
|
+
|
257
|
+
class CustomPythonPackageTrainingJobTrigger(BaseVertexAIJobTrigger):
|
258
|
+
"""
|
259
|
+
Make async calls to Vertex AI to check the state of a running custom python package training job.
|
260
|
+
|
261
|
+
Return the job when it enters a completed state.
|
262
|
+
"""
|
263
|
+
|
264
|
+
job_type_verbose_name = "Custom Python Package Training Job"
|
265
|
+
job_serializer_class = types.TrainingPipeline
|
266
|
+
statuses_success = {
|
267
|
+
PipelineState.PIPELINE_STATE_PAUSED,
|
268
|
+
PipelineState.PIPELINE_STATE_SUCCEEDED,
|
269
|
+
}
|
270
|
+
|
271
|
+
@cached_property
|
272
|
+
def async_hook(self) -> CustomJobAsyncHook:
|
273
|
+
return CustomJobAsyncHook(
|
274
|
+
gcp_conn_id=self.conn_id,
|
275
|
+
impersonation_chain=self.impersonation_chain,
|
276
|
+
)
|
277
|
+
|
278
|
+
async def _wait_job(self) -> types.TrainingPipeline:
|
279
|
+
pipeline: types.TrainingPipeline = await self.async_hook.wait_for_training_pipeline(
|
280
|
+
project_id=self.project_id,
|
281
|
+
location=self.location,
|
282
|
+
pipeline_id=self.job_id,
|
283
|
+
poll_interval=self.poll_interval,
|
284
|
+
)
|
285
|
+
return pipeline
|
@@ -114,6 +114,19 @@ def is_operation_in_progress_exception(exception: Exception) -> bool:
|
|
114
114
|
return False
|
115
115
|
|
116
116
|
|
117
|
+
def is_refresh_credentials_exception(exception: Exception) -> bool:
|
118
|
+
"""
|
119
|
+
Handle refresh credentials exceptions.
|
120
|
+
|
121
|
+
Some calls return 502 (server error) in case a new token cannot be obtained.
|
122
|
+
|
123
|
+
* Google BigQuery
|
124
|
+
"""
|
125
|
+
if isinstance(exception, RefreshError):
|
126
|
+
return "Unable to acquire impersonated credentials" in str(exception)
|
127
|
+
return False
|
128
|
+
|
129
|
+
|
117
130
|
class retry_if_temporary_quota(tenacity.retry_if_exception):
|
118
131
|
"""Retries if there was an exception for exceeding the temporary quote limit."""
|
119
132
|
|
@@ -122,12 +135,19 @@ class retry_if_temporary_quota(tenacity.retry_if_exception):
|
|
122
135
|
|
123
136
|
|
124
137
|
class retry_if_operation_in_progress(tenacity.retry_if_exception):
|
125
|
-
"""Retries if there was an exception
|
138
|
+
"""Retries if there was an exception in case of operation in progress."""
|
126
139
|
|
127
140
|
def __init__(self):
|
128
141
|
super().__init__(is_operation_in_progress_exception)
|
129
142
|
|
130
143
|
|
144
|
+
class retry_if_temporary_refresh_credentials(tenacity.retry_if_exception):
|
145
|
+
"""Retries if there was an exception for refreshing credentials."""
|
146
|
+
|
147
|
+
def __init__(self):
|
148
|
+
super().__init__(is_refresh_credentials_exception)
|
149
|
+
|
150
|
+
|
131
151
|
# A fake project_id to use in functions decorated by fallback_to_default_project_id
|
132
152
|
# This allows the 'project_id' argument to be of type str instead of str | None,
|
133
153
|
# making it easier to type hint the function body without dealing with the None
|
@@ -364,14 +384,14 @@ class GoogleBaseHook(BaseHook):
|
|
364
384
|
return hasattr(self, "extras") and get_field(self.extras, f) or default
|
365
385
|
|
366
386
|
@property
|
367
|
-
def project_id(self) -> str
|
387
|
+
def project_id(self) -> str:
|
368
388
|
"""
|
369
389
|
Returns project id.
|
370
390
|
|
371
391
|
:return: id of the project
|
372
392
|
"""
|
373
393
|
_, project_id = self.get_credentials_and_project_id()
|
374
|
-
return project_id
|
394
|
+
return project_id or PROVIDE_PROJECT_ID
|
375
395
|
|
376
396
|
@property
|
377
397
|
def num_retries(self) -> int:
|
@@ -426,7 +446,7 @@ class GoogleBaseHook(BaseHook):
|
|
426
446
|
def quota_retry(*args, **kwargs) -> Callable:
|
427
447
|
"""Provide a mechanism to repeat requests in response to exceeding a temporary quota limit."""
|
428
448
|
|
429
|
-
def decorator(
|
449
|
+
def decorator(func: Callable):
|
430
450
|
default_kwargs = {
|
431
451
|
"wait": tenacity.wait_exponential(multiplier=1, max=100),
|
432
452
|
"retry": retry_if_temporary_quota(),
|
@@ -434,7 +454,7 @@ class GoogleBaseHook(BaseHook):
|
|
434
454
|
"after": tenacity.after_log(log, logging.DEBUG),
|
435
455
|
}
|
436
456
|
default_kwargs.update(**kwargs)
|
437
|
-
return tenacity.retry(*args, **default_kwargs)(
|
457
|
+
return tenacity.retry(*args, **default_kwargs)(func)
|
438
458
|
|
439
459
|
return decorator
|
440
460
|
|
@@ -442,7 +462,7 @@ class GoogleBaseHook(BaseHook):
|
|
442
462
|
def operation_in_progress_retry(*args, **kwargs) -> Callable[[T], T]:
|
443
463
|
"""Provide a mechanism to repeat requests in response to operation in progress (HTTP 409) limit."""
|
444
464
|
|
445
|
-
def decorator(
|
465
|
+
def decorator(func: T):
|
446
466
|
default_kwargs = {
|
447
467
|
"wait": tenacity.wait_exponential(multiplier=1, max=300),
|
448
468
|
"retry": retry_if_operation_in_progress(),
|
@@ -450,7 +470,25 @@ class GoogleBaseHook(BaseHook):
|
|
450
470
|
"after": tenacity.after_log(log, logging.DEBUG),
|
451
471
|
}
|
452
472
|
default_kwargs.update(**kwargs)
|
453
|
-
return cast(T, tenacity.retry(*args, **default_kwargs)(
|
473
|
+
return cast(T, tenacity.retry(*args, **default_kwargs)(func))
|
474
|
+
|
475
|
+
return decorator
|
476
|
+
|
477
|
+
@staticmethod
|
478
|
+
def refresh_credentials_retry(*args, **kwargs) -> Callable[[T], T]:
|
479
|
+
"""Provide a mechanism to repeat requests in response to a temporary refresh credential issue."""
|
480
|
+
|
481
|
+
def decorator(func: T):
|
482
|
+
default_kwargs = {
|
483
|
+
"wait": tenacity.wait_exponential(multiplier=1, max=5),
|
484
|
+
"stop": tenacity.stop_after_attempt(3),
|
485
|
+
"retry": retry_if_temporary_refresh_credentials(),
|
486
|
+
"reraise": True,
|
487
|
+
"before": tenacity.before_log(log, logging.DEBUG),
|
488
|
+
"after": tenacity.after_log(log, logging.DEBUG),
|
489
|
+
}
|
490
|
+
default_kwargs.update(**kwargs)
|
491
|
+
return cast(T, tenacity.retry(*args, **default_kwargs)(func))
|
454
492
|
|
455
493
|
return decorator
|
456
494
|
|
@@ -25,7 +25,7 @@ from typing import Sequence
|
|
25
25
|
from googleapiclient.discovery import build, build_from_document
|
26
26
|
|
27
27
|
from airflow.exceptions import AirflowException
|
28
|
-
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
28
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
29
29
|
|
30
30
|
# Time to sleep between active checks of the operation results
|
31
31
|
TIME_TO_SLEEP_IN_SECONDS = 5
|
@@ -84,7 +84,7 @@ class CloudFirestoreHook(GoogleBaseHook):
|
|
84
84
|
|
85
85
|
@GoogleBaseHook.fallback_to_default_project_id
|
86
86
|
def export_documents(
|
87
|
-
self, body: dict, database_id: str = "(default)", project_id: str
|
87
|
+
self, body: dict, database_id: str = "(default)", project_id: str = PROVIDE_PROJECT_ID
|
88
88
|
) -> None:
|
89
89
|
"""
|
90
90
|
Start a export with the specified configuration.
|
@@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Sequence
|
|
20
20
|
|
21
21
|
from airflow.exceptions import AirflowException
|
22
22
|
from airflow.models import BaseOperator
|
23
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
23
24
|
from airflow.providers.google.firebase.hooks.firestore import CloudFirestoreHook
|
24
25
|
|
25
26
|
if TYPE_CHECKING:
|
@@ -64,7 +65,7 @@ class CloudFirestoreExportDatabaseOperator(BaseOperator):
|
|
64
65
|
*,
|
65
66
|
body: dict,
|
66
67
|
database_id: str = "(default)",
|
67
|
-
project_id: str
|
68
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
68
69
|
gcp_conn_id: str = "google_cloud_default",
|
69
70
|
api_version: str = "v1",
|
70
71
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
28
28
|
"name": "Google",
|
29
29
|
"description": "Google services including:\n\n - `Google Ads <https://ads.google.com/>`__\n - `Google Cloud (GCP) <https://cloud.google.com/>`__\n - `Google Firebase <https://firebase.google.com/>`__\n - `Google LevelDB <https://github.com/google/leveldb/>`__\n - `Google Marketing Platform <https://marketingplatform.google.com/>`__\n - `Google Workspace <https://workspace.google.com/>`__ (formerly Google Suite)\n",
|
30
30
|
"state": "ready",
|
31
|
-
"source-date-epoch":
|
31
|
+
"source-date-epoch": 1714476421,
|
32
32
|
"versions": [
|
33
|
+
"10.18.0",
|
33
34
|
"10.17.0",
|
34
35
|
"10.16.0",
|
35
36
|
"10.15.0",
|
@@ -86,7 +87,7 @@ def get_provider_info():
|
|
86
87
|
"1.0.0",
|
87
88
|
],
|
88
89
|
"dependencies": [
|
89
|
-
"apache-airflow>=2.
|
90
|
+
"apache-airflow>=2.7.0",
|
90
91
|
"apache-airflow-providers-common-sql>=1.7.2",
|
91
92
|
"asgiref>=3.5.2",
|
92
93
|
"gcloud-aio-auth>=4.0.0,<5.0.0",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: apache-airflow-providers-google
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.18.0rc1
|
4
4
|
Summary: Provider package apache-airflow-providers-google for Apache Airflow
|
5
5
|
Keywords: airflow-provider,google,airflow,integration
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Topic :: System :: Monitoring
|
24
24
|
Requires-Dist: PyOpenSSL
|
25
25
|
Requires-Dist: apache-airflow-providers-common-sql>=1.7.2rc0
|
26
|
-
Requires-Dist: apache-airflow>=2.
|
26
|
+
Requires-Dist: apache-airflow>=2.7.0rc0
|
27
27
|
Requires-Dist: asgiref>=3.5.2
|
28
28
|
Requires-Dist: gcloud-aio-auth>=4.0.0,<5.0.0
|
29
29
|
Requires-Dist: gcloud-aio-bigquery>=6.1.2
|
@@ -102,8 +102,8 @@ Requires-Dist: apache-airflow-providers-sftp ; extra == "sftp"
|
|
102
102
|
Requires-Dist: apache-airflow-providers-ssh ; extra == "ssh"
|
103
103
|
Requires-Dist: apache-airflow-providers-trino ; extra == "trino"
|
104
104
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
105
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-google/10.
|
106
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-google/10.
|
105
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/changelog.html
|
106
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0
|
107
107
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
108
108
|
Project-URL: Source Code, https://github.com/apache/airflow
|
109
109
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
@@ -171,7 +171,7 @@ Provides-Extra: trino
|
|
171
171
|
|
172
172
|
Package ``apache-airflow-providers-google``
|
173
173
|
|
174
|
-
Release: ``10.
|
174
|
+
Release: ``10.18.0.rc1``
|
175
175
|
|
176
176
|
|
177
177
|
Google services including:
|
@@ -191,7 +191,7 @@ This is a provider package for ``google`` provider. All classes for this provide
|
|
191
191
|
are in ``airflow.providers.google`` python package.
|
192
192
|
|
193
193
|
You can find package information and changelog for the provider
|
194
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-google/10.
|
194
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/>`_.
|
195
195
|
|
196
196
|
Installation
|
197
197
|
------------
|
@@ -208,7 +208,7 @@ Requirements
|
|
208
208
|
======================================= =====================
|
209
209
|
PIP package Version required
|
210
210
|
======================================= =====================
|
211
|
-
``apache-airflow`` ``>=2.
|
211
|
+
``apache-airflow`` ``>=2.7.0``
|
212
212
|
``apache-airflow-providers-common-sql`` ``>=1.7.2``
|
213
213
|
``asgiref`` ``>=3.5.2``
|
214
214
|
``gcloud-aio-auth`` ``>=4.0.0,<5.0.0``
|
@@ -307,4 +307,4 @@ Dependent package
|
|
307
307
|
======================================================================================================================== ====================
|
308
308
|
|
309
309
|
The changelog for the provider package can be found in the
|
310
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-google/10.
|
310
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-google/10.18.0/changelog.html>`_.
|