apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -63,8 +63,7 @@ OPERATIONAL_POLL_INTERVAL = 15
|
|
63
63
|
|
64
64
|
|
65
65
|
class GKEHook(GoogleBaseHook):
|
66
|
-
"""
|
67
|
-
Hook for managing Google Kubernetes Engine cluster APIs.
|
66
|
+
"""Google Kubernetes Engine cluster APIs.
|
68
67
|
|
69
68
|
All the methods in the hook where project_id is used must be called with
|
70
69
|
keyword arguments rather than positional.
|
@@ -90,7 +89,7 @@ class GKEHook(GoogleBaseHook):
|
|
90
89
|
self.location = location
|
91
90
|
|
92
91
|
def get_cluster_manager_client(self) -> ClusterManagerClient:
|
93
|
-
"""
|
92
|
+
"""Create or get a ClusterManagerClient."""
|
94
93
|
if self._client is None:
|
95
94
|
self._client = ClusterManagerClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
96
95
|
return self._client
|
@@ -114,13 +113,13 @@ class GKEHook(GoogleBaseHook):
|
|
114
113
|
return self.get_conn()
|
115
114
|
|
116
115
|
def wait_for_operation(self, operation: Operation, project_id: str | None = None) -> Operation:
|
117
|
-
"""
|
118
|
-
Given an operation, continuously fetches the status from Google Cloud until either
|
119
|
-
completion or an error occurring.
|
116
|
+
"""Continuously fetch the status from Google Cloud.
|
120
117
|
|
121
|
-
|
122
|
-
|
123
|
-
:
|
118
|
+
This is done until the given operation completes, or raises an error.
|
119
|
+
|
120
|
+
:param operation: The Operation to wait for.
|
121
|
+
:param project_id: Google Cloud project ID.
|
122
|
+
:return: A new, updated operation fetched from Google Cloud.
|
124
123
|
"""
|
125
124
|
self.log.info("Waiting for OPERATION_NAME %s", operation.name)
|
126
125
|
time.sleep(OPERATIONAL_POLL_INTERVAL)
|
@@ -134,8 +133,7 @@ class GKEHook(GoogleBaseHook):
|
|
134
133
|
return operation
|
135
134
|
|
136
135
|
def get_operation(self, operation_name: str, project_id: str | None = None) -> Operation:
|
137
|
-
"""
|
138
|
-
Fetches the operation from Google Cloud.
|
136
|
+
"""Get an operation from Google Cloud.
|
139
137
|
|
140
138
|
:param operation_name: Name of operation to fetch
|
141
139
|
:param project_id: Google Cloud project ID
|
@@ -150,8 +148,7 @@ class GKEHook(GoogleBaseHook):
|
|
150
148
|
|
151
149
|
@staticmethod
|
152
150
|
def _append_label(cluster_proto: Cluster, key: str, val: str) -> Cluster:
|
153
|
-
"""
|
154
|
-
Append labels to provided Cluster Protobuf.
|
151
|
+
"""Append labels to provided Cluster Protobuf.
|
155
152
|
|
156
153
|
Labels must fit the regex ``[a-z]([-a-z0-9]*[a-z0-9])?`` (current
|
157
154
|
airflow version string follows semantic versioning spec: x.y.z).
|
@@ -175,24 +172,23 @@ class GKEHook(GoogleBaseHook):
|
|
175
172
|
retry: Retry | _MethodDefault = DEFAULT,
|
176
173
|
timeout: float | None = None,
|
177
174
|
) -> Operation | None:
|
178
|
-
"""
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
:param
|
187
|
-
:param
|
188
|
-
|
189
|
-
operation of deletion is not finished.
|
175
|
+
"""Deletes the cluster, the Kubernetes endpoint, and all worker nodes.
|
176
|
+
|
177
|
+
Firewalls and routes that were configured during cluster creation are
|
178
|
+
also deleted. Other Google Compute Engine resources that might be in use
|
179
|
+
by the cluster (e.g. load balancer resources) will not be deleted if
|
180
|
+
they were not present at the initial create time.
|
181
|
+
|
182
|
+
:param name: The name of the cluster to delete.
|
183
|
+
:param project_id: Google Cloud project ID.
|
184
|
+
:param wait_to_complete: If *True*, wait until the deletion is finished
|
185
|
+
before returning.
|
190
186
|
:param retry: Retry object used to determine when/if to retry requests.
|
191
187
|
If None is specified, requests will not be retried.
|
192
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
193
|
-
complete. Note that if retry is specified, the timeout applies to
|
194
|
-
individual attempt.
|
195
|
-
:return: The full url to the delete operation if successful, else None
|
188
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
189
|
+
to complete. Note that if retry is specified, the timeout applies to
|
190
|
+
each individual attempt.
|
191
|
+
:return: The full url to the delete operation if successful, else None.
|
196
192
|
"""
|
197
193
|
self.log.info("Deleting (project_id=%s, location=%s, cluster_id=%s)", project_id, self.location, name)
|
198
194
|
|
@@ -219,26 +215,27 @@ class GKEHook(GoogleBaseHook):
|
|
219
215
|
retry: Retry | _MethodDefault = DEFAULT,
|
220
216
|
timeout: float | None = None,
|
221
217
|
) -> Operation | Cluster:
|
222
|
-
"""
|
223
|
-
|
224
|
-
|
218
|
+
"""Create a cluster.
|
219
|
+
|
220
|
+
This should consist of the specified number, and the type of Google
|
221
|
+
Compute Engine instances.
|
225
222
|
|
226
223
|
:param cluster: A Cluster protobuf or dict. If dict is provided, it must
|
227
224
|
be of the same form as the protobuf message
|
228
|
-
:class:`google.cloud.container_v1.types.Cluster
|
229
|
-
:param project_id: Google Cloud project ID
|
230
|
-
:param wait_to_complete: A boolean value which makes method to sleep
|
231
|
-
operation of creation is not finished.
|
225
|
+
:class:`google.cloud.container_v1.types.Cluster`.
|
226
|
+
:param project_id: Google Cloud project ID.
|
227
|
+
:param wait_to_complete: A boolean value which makes method to sleep
|
228
|
+
while operation of creation is not finished.
|
232
229
|
:param retry: A retry object (``google.api_core.retry.Retry``) used to
|
233
|
-
retry requests.
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
:
|
239
|
-
|
240
|
-
|
241
|
-
|
230
|
+
retry requests. If None is specified, requests will not be retried.
|
231
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
232
|
+
to complete. Note that if retry is specified, the timeout applies to
|
233
|
+
each individual attempt.
|
234
|
+
:return: The full url to the new, or existing, cluster.
|
235
|
+
:raises ParseError: On JSON parsing problems when trying to convert
|
236
|
+
dict.
|
237
|
+
:raises AirflowException: cluster is not dict type nor Cluster proto
|
238
|
+
type.
|
242
239
|
"""
|
243
240
|
if isinstance(cluster, dict):
|
244
241
|
cluster = Cluster.from_json(json.dumps(cluster))
|
@@ -273,17 +270,15 @@ class GKEHook(GoogleBaseHook):
|
|
273
270
|
retry: Retry | _MethodDefault = DEFAULT,
|
274
271
|
timeout: float | None = None,
|
275
272
|
) -> Cluster:
|
276
|
-
"""
|
277
|
-
Gets details of specified cluster.
|
273
|
+
"""Get details of specified cluster.
|
278
274
|
|
279
|
-
:param name: The name of the cluster to retrieve
|
280
|
-
:param project_id: Google Cloud project ID
|
281
|
-
:param retry: A retry object used to retry requests. If None is
|
282
|
-
requests will not be retried.
|
283
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
284
|
-
complete. Note that if retry is specified, the timeout applies to
|
285
|
-
individual attempt.
|
286
|
-
:return: google.cloud.container_v1.types.Cluster
|
275
|
+
:param name: The name of the cluster to retrieve.
|
276
|
+
:param project_id: Google Cloud project ID.
|
277
|
+
:param retry: A retry object used to retry requests. If None is
|
278
|
+
specified, requests will not be retried.
|
279
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
280
|
+
to complete. Note that if retry is specified, the timeout applies to
|
281
|
+
each individual attempt.
|
287
282
|
"""
|
288
283
|
self.log.info(
|
289
284
|
"Fetching cluster (project_id=%s, location=%s, cluster_name=%s)",
|
@@ -300,7 +295,7 @@ class GKEHook(GoogleBaseHook):
|
|
300
295
|
|
301
296
|
|
302
297
|
class GKEAsyncHook(GoogleBaseAsyncHook):
|
303
|
-
"""
|
298
|
+
"""Asynchronous client of GKE."""
|
304
299
|
|
305
300
|
sync_hook_class = GKEHook
|
306
301
|
|
@@ -331,8 +326,7 @@ class GKEAsyncHook(GoogleBaseAsyncHook):
|
|
331
326
|
operation_name: str,
|
332
327
|
project_id: str = PROVIDE_PROJECT_ID,
|
333
328
|
) -> Operation:
|
334
|
-
"""
|
335
|
-
Fetches the operation from Google Cloud.
|
329
|
+
"""Fetch an operation from Google Cloud.
|
336
330
|
|
337
331
|
:param operation_name: Name of operation to fetch.
|
338
332
|
:param project_id: Google Cloud project ID.
|
@@ -348,7 +342,7 @@ class GKEAsyncHook(GoogleBaseAsyncHook):
|
|
348
342
|
|
349
343
|
|
350
344
|
class GKEPodHook(GoogleBaseHook, PodOperatorHookProtocol):
|
351
|
-
"""
|
345
|
+
"""Google Kubernetes Engine pod APIs."""
|
352
346
|
|
353
347
|
def __init__(
|
354
348
|
self,
|
@@ -377,18 +371,16 @@ class GKEPodHook(GoogleBaseHook, PodOperatorHookProtocol):
|
|
377
371
|
"""Get the namespace configured by the Airflow connection."""
|
378
372
|
|
379
373
|
def _get_namespace(self):
|
380
|
-
"""
|
374
|
+
"""For compatibility with KubernetesHook. Deprecated; do not use."""
|
381
375
|
|
382
376
|
def get_xcom_sidecar_container_image(self):
|
383
|
-
"""
|
384
|
-
Returns the xcom sidecar image defined in the connection.
|
377
|
+
"""Get the xcom sidecar image defined in the connection.
|
385
378
|
|
386
379
|
Implemented for compatibility with KubernetesHook.
|
387
380
|
"""
|
388
381
|
|
389
382
|
def get_xcom_sidecar_container_resources(self):
|
390
|
-
"""
|
391
|
-
Returns the xcom sidecar resources defined in the connection.
|
383
|
+
"""Get the xcom sidecar resources defined in the connection.
|
392
384
|
|
393
385
|
Implemented for compatibility with KubernetesHook.
|
394
386
|
"""
|
@@ -419,8 +411,7 @@ class GKEPodHook(GoogleBaseHook, PodOperatorHookProtocol):
|
|
419
411
|
return creds.token
|
420
412
|
|
421
413
|
def get_pod(self, name: str, namespace: str) -> V1Pod:
|
422
|
-
"""
|
423
|
-
Gets pod's object.
|
414
|
+
"""Get a pod object.
|
424
415
|
|
425
416
|
:param name: Name of the pod.
|
426
417
|
:param namespace: Name of the pod's namespace.
|
@@ -432,31 +423,19 @@ class GKEPodHook(GoogleBaseHook, PodOperatorHookProtocol):
|
|
432
423
|
|
433
424
|
|
434
425
|
class GKEPodAsyncHook(GoogleBaseAsyncHook):
|
435
|
-
"""
|
436
|
-
Hook for managing Google Kubernetes Engine pods APIs in asynchronous way.
|
426
|
+
"""Google Kubernetes Engine pods APIs asynchronously.
|
437
427
|
|
438
428
|
:param cluster_url: The URL pointed to the cluster.
|
439
|
-
:param ssl_ca_cert: SSL certificate
|
429
|
+
:param ssl_ca_cert: SSL certificate used for authentication to the pod.
|
440
430
|
"""
|
441
431
|
|
442
432
|
sync_hook_class = GKEPodHook
|
443
433
|
scopes = ["https://www.googleapis.com/auth/cloud-platform"]
|
444
434
|
|
445
|
-
def __init__(
|
446
|
-
self,
|
447
|
-
cluster_url: str,
|
448
|
-
ssl_ca_cert: str,
|
449
|
-
**kwargs,
|
450
|
-
):
|
451
|
-
|
435
|
+
def __init__(self, cluster_url: str, ssl_ca_cert: str, **kwargs) -> None:
|
452
436
|
self._cluster_url = cluster_url
|
453
437
|
self._ssl_ca_cert = ssl_ca_cert
|
454
|
-
|
455
|
-
kwargs.update(
|
456
|
-
cluster_url=cluster_url,
|
457
|
-
ssl_ca_cert=ssl_ca_cert,
|
458
|
-
)
|
459
|
-
super().__init__(**kwargs)
|
438
|
+
super().__init__(cluster_url=cluster_url, ssl_ca_cert=ssl_ca_cert, **kwargs)
|
460
439
|
|
461
440
|
@contextlib.asynccontextmanager
|
462
441
|
async def get_conn(self, token: Token) -> async_client.ApiClient: # type: ignore[override]
|
@@ -490,8 +469,7 @@ class GKEPodAsyncHook(GoogleBaseAsyncHook):
|
|
490
469
|
return configuration
|
491
470
|
|
492
471
|
async def get_pod(self, name: str, namespace: str) -> V1Pod:
|
493
|
-
"""
|
494
|
-
Gets pod's object.
|
472
|
+
"""Get a pod object.
|
495
473
|
|
496
474
|
:param name: Name of the pod.
|
497
475
|
:param namespace: Name of the pod's namespace.
|
@@ -506,8 +484,7 @@ class GKEPodAsyncHook(GoogleBaseAsyncHook):
|
|
506
484
|
return pod
|
507
485
|
|
508
486
|
async def delete_pod(self, name: str, namespace: str):
|
509
|
-
"""
|
510
|
-
Deletes pod's object.
|
487
|
+
"""Delete a pod.
|
511
488
|
|
512
489
|
:param name: Name of the pod.
|
513
490
|
:param namespace: Name of the pod's namespace.
|
@@ -527,10 +504,12 @@ class GKEPodAsyncHook(GoogleBaseAsyncHook):
|
|
527
504
|
raise
|
528
505
|
|
529
506
|
async def read_logs(self, name: str, namespace: str):
|
530
|
-
"""
|
531
|
-
|
532
|
-
|
533
|
-
|
507
|
+
"""Read logs inside the pod while starting containers inside.
|
508
|
+
|
509
|
+
All the logs will be outputted with its timestamp to track the logs
|
510
|
+
after the execution of the pod is completed. The method is used for
|
511
|
+
async output of the logs only in the pod failed it execution or the task
|
512
|
+
was cancelled by the user.
|
534
513
|
|
535
514
|
:param name: Name of the pod.
|
536
515
|
:param namespace: Name of the pod's namespace.
|
@@ -35,9 +35,10 @@ DATAFUSION_PIPELINE_LINK = "{uri}/pipelines/ns/default/view/{pipeline_name}"
|
|
35
35
|
|
36
36
|
|
37
37
|
class BaseGoogleLink(BaseOperatorLink):
|
38
|
-
"""
|
39
|
-
|
40
|
-
in front of every link
|
38
|
+
"""Link for Google operators.
|
39
|
+
|
40
|
+
Prevent adding ``https://console.cloud.google.com`` in front of every link
|
41
|
+
where URI is used.
|
41
42
|
"""
|
42
43
|
|
43
44
|
name: ClassVar[str]
|