apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence
|
|
27
27
|
from google.api_core.retry import Retry
|
28
28
|
from google.cloud.storage.retry import DEFAULT_RETRY
|
29
29
|
|
30
|
+
from airflow.configuration import conf
|
30
31
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
31
32
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
32
33
|
from airflow.providers.google.cloud.triggers.gcs import (
|
@@ -76,10 +77,9 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
76
77
|
google_cloud_conn_id: str = "google_cloud_default",
|
77
78
|
impersonation_chain: str | Sequence[str] | None = None,
|
78
79
|
retry: Retry = DEFAULT_RETRY,
|
79
|
-
deferrable: bool = False,
|
80
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
80
81
|
**kwargs,
|
81
82
|
) -> None:
|
82
|
-
|
83
83
|
super().__init__(**kwargs)
|
84
84
|
self.bucket = bucket
|
85
85
|
self.object = object
|
@@ -208,10 +208,9 @@ class GCSObjectUpdateSensor(BaseSensorOperator):
|
|
208
208
|
ts_func: Callable = ts_function,
|
209
209
|
google_cloud_conn_id: str = "google_cloud_default",
|
210
210
|
impersonation_chain: str | Sequence[str] | None = None,
|
211
|
-
deferrable: bool = False,
|
211
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
212
212
|
**kwargs,
|
213
213
|
) -> None:
|
214
|
-
|
215
214
|
super().__init__(**kwargs)
|
216
215
|
self.bucket = bucket
|
217
216
|
self.object = object
|
@@ -298,7 +297,7 @@ class GCSObjectsWithPrefixExistenceSensor(BaseSensorOperator):
|
|
298
297
|
prefix: str,
|
299
298
|
google_cloud_conn_id: str = "google_cloud_default",
|
300
299
|
impersonation_chain: str | Sequence[str] | None = None,
|
301
|
-
deferrable: bool = False,
|
300
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
302
301
|
**kwargs,
|
303
302
|
) -> None:
|
304
303
|
super().__init__(**kwargs)
|
@@ -412,10 +411,9 @@ class GCSUploadSessionCompleteSensor(BaseSensorOperator):
|
|
412
411
|
allow_delete: bool = True,
|
413
412
|
google_cloud_conn_id: str = "google_cloud_default",
|
414
413
|
impersonation_chain: str | Sequence[str] | None = None,
|
415
|
-
deferrable: bool = False,
|
414
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
416
415
|
**kwargs,
|
417
416
|
) -> None:
|
418
|
-
|
419
417
|
super().__init__(**kwargs)
|
420
418
|
|
421
419
|
self.bucket = bucket
|
@@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence
|
|
23
23
|
|
24
24
|
from google.cloud.pubsub_v1.types import ReceivedMessage
|
25
25
|
|
26
|
+
from airflow.configuration import conf
|
26
27
|
from airflow.exceptions import AirflowException
|
27
28
|
from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
|
28
29
|
from airflow.providers.google.cloud.triggers.pubsub import PubsubPullTrigger
|
@@ -103,10 +104,9 @@ class PubSubPullSensor(BaseSensorOperator):
|
|
103
104
|
messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
|
104
105
|
impersonation_chain: str | Sequence[str] | None = None,
|
105
106
|
poke_interval: float = 10.0,
|
106
|
-
deferrable: bool = False,
|
107
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
107
108
|
**kwargs,
|
108
109
|
) -> None:
|
109
|
-
|
110
110
|
super().__init__(**kwargs)
|
111
111
|
self.gcp_conn_id = gcp_conn_id
|
112
112
|
self.project_id = project_id
|
@@ -54,10 +54,11 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
54
54
|
:param labels: a dictionary containing labels for the job/query,
|
55
55
|
passed to BigQuery
|
56
56
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
57
|
-
|
57
|
+
|
58
|
+
.. code-block:: python
|
58
59
|
|
59
60
|
encryption_configuration = {
|
60
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
61
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
61
62
|
}
|
62
63
|
:param location: The geographic location of the job. You must specify the location to run the job if
|
63
64
|
the location to run a job is not in the US or the EU multi-regional location or
|
@@ -25,6 +25,7 @@ from google.api_core.retry import Retry
|
|
25
25
|
from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
|
26
26
|
|
27
27
|
from airflow import AirflowException
|
28
|
+
from airflow.configuration import conf
|
28
29
|
from airflow.models import BaseOperator
|
29
30
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
30
31
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
@@ -114,7 +115,7 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
114
115
|
job_id: str | None = None,
|
115
116
|
force_rerun: bool = False,
|
116
117
|
reattach_states: set[str] | None = None,
|
117
|
-
deferrable: bool = False,
|
118
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
118
119
|
**kwargs,
|
119
120
|
) -> None:
|
120
121
|
super().__init__(**kwargs)
|
@@ -42,10 +42,10 @@ class FlushAction(Enum):
|
|
42
42
|
|
43
43
|
|
44
44
|
class FacebookAdsReportToGcsOperator(BaseOperator):
|
45
|
-
"""
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
"""Fetch from Facebook Ads API.
|
46
|
+
|
47
|
+
This converts and saves the data as a temporary JSON file, and uploads the
|
48
|
+
JSON to Google Cloud Storage.
|
49
49
|
|
50
50
|
.. seealso::
|
51
51
|
For more information on the Facebook Ads API, take a look at the API docs:
|
@@ -36,6 +36,7 @@ from google.cloud.bigquery import (
|
|
36
36
|
from google.cloud.bigquery.table import EncryptionConfiguration, Table, TableReference
|
37
37
|
|
38
38
|
from airflow import AirflowException
|
39
|
+
from airflow.configuration import conf
|
39
40
|
from airflow.models import BaseOperator
|
40
41
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
41
42
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -148,10 +149,11 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
148
149
|
If autodetect is None and no schema is provided (neither via schema_fields
|
149
150
|
nor a schema_object), assume the table already exists.
|
150
151
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
151
|
-
|
152
|
+
|
153
|
+
.. code-block:: python
|
152
154
|
|
153
155
|
encryption_configuration = {
|
154
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
156
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
155
157
|
}
|
156
158
|
:param location: [Optional] The geographic location of the job. Required except for US and EU.
|
157
159
|
See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
@@ -177,6 +179,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
177
179
|
"schema_object_bucket",
|
178
180
|
"destination_project_dataset_table",
|
179
181
|
"impersonation_chain",
|
182
|
+
"src_fmt_configs",
|
180
183
|
)
|
181
184
|
template_ext: Sequence[str] = (".sql",)
|
182
185
|
ui_color = "#f0eee4"
|
@@ -216,7 +219,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
216
219
|
impersonation_chain: str | Sequence[str] | None = None,
|
217
220
|
labels=None,
|
218
221
|
description=None,
|
219
|
-
deferrable: bool = False,
|
222
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
220
223
|
result_retry: Retry = DEFAULT_RETRY,
|
221
224
|
result_timeout: float | None = None,
|
222
225
|
cancel_on_kill: bool = True,
|
@@ -226,7 +229,6 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
226
229
|
project_id: str | None = None,
|
227
230
|
**kwargs,
|
228
231
|
) -> None:
|
229
|
-
|
230
232
|
super().__init__(**kwargs)
|
231
233
|
self.hook: BigQueryHook | None = None
|
232
234
|
self.configuration: dict[str, Any] = {}
|
@@ -716,7 +718,6 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
716
718
|
def _cleanse_time_partitioning(
|
717
719
|
self, destination_dataset_table: str | None, time_partitioning_in: dict | None
|
718
720
|
) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option
|
719
|
-
|
720
721
|
if time_partitioning_in is None:
|
721
722
|
time_partitioning_in = {}
|
722
723
|
|
@@ -18,9 +18,10 @@
|
|
18
18
|
"""This module contains a Google Cloud Storage operator."""
|
19
19
|
from __future__ import annotations
|
20
20
|
|
21
|
+
import warnings
|
21
22
|
from typing import TYPE_CHECKING, Sequence
|
22
23
|
|
23
|
-
from airflow.exceptions import AirflowException
|
24
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
24
25
|
from airflow.models import BaseOperator
|
25
26
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
26
27
|
|
@@ -66,8 +67,8 @@ class GCSToGCSOperator(BaseOperator):
|
|
66
67
|
of copied to the new location. This is the equivalent of a mv command
|
67
68
|
as opposed to a cp command.
|
68
69
|
:param replace: Whether you want to replace existing destination files or not.
|
69
|
-
:param delimiter: This is used to restrict the result to only the 'files' in a given
|
70
|
-
If source_objects = ['foo/bah/'] and delimiter = '.avro', then only the 'files' in the
|
70
|
+
:param delimiter: (Deprecated) This is used to restrict the result to only the 'files' in a given
|
71
|
+
'folder'. If source_objects = ['foo/bah/'] and delimiter = '.avro', then only the 'files' in the
|
71
72
|
folder 'foo/bah/' with '.avro' delimiter will be copied to the destination object.
|
72
73
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
73
74
|
:param last_modified_time: When specified, the objects will be copied or moved,
|
@@ -90,6 +91,8 @@ class GCSToGCSOperator(BaseOperator):
|
|
90
91
|
doesn't exist. It doesn't have any effect when the source objects are folders or patterns.
|
91
92
|
:param exact_match: When specified, only exact match of the source object (filename) will be
|
92
93
|
copied.
|
94
|
+
:param match_glob: (Optional) filters objects based on the glob pattern given by the string (
|
95
|
+
e.g, ``'**/*/.json'``)
|
93
96
|
|
94
97
|
:Example:
|
95
98
|
|
@@ -116,7 +119,7 @@ class GCSToGCSOperator(BaseOperator):
|
|
116
119
|
source_objects=['sales/sales-2017'],
|
117
120
|
destination_bucket='data_backup',
|
118
121
|
destination_object='copied_sales/2017/',
|
119
|
-
|
122
|
+
match_glob='**/*.avro'
|
120
123
|
gcp_conn_id=google_cloud_conn_id
|
121
124
|
)
|
122
125
|
|
@@ -190,15 +193,34 @@ class GCSToGCSOperator(BaseOperator):
|
|
190
193
|
impersonation_chain: str | Sequence[str] | None = None,
|
191
194
|
source_object_required=False,
|
192
195
|
exact_match=False,
|
196
|
+
match_glob: str | None = None,
|
193
197
|
**kwargs,
|
194
198
|
):
|
195
199
|
super().__init__(**kwargs)
|
196
200
|
|
197
201
|
self.source_bucket = source_bucket
|
202
|
+
if source_object and WILDCARD in source_object:
|
203
|
+
warnings.warn(
|
204
|
+
"Usage of wildcard (*) in 'source_object' is deprecated, utilize 'match_glob' instead",
|
205
|
+
AirflowProviderDeprecationWarning,
|
206
|
+
stacklevel=2,
|
207
|
+
)
|
198
208
|
self.source_object = source_object
|
209
|
+
if source_objects and any([WILDCARD in obj for obj in source_objects]):
|
210
|
+
warnings.warn(
|
211
|
+
"Usage of wildcard (*) in 'source_objects' is deprecated, utilize 'match_glob' instead",
|
212
|
+
AirflowProviderDeprecationWarning,
|
213
|
+
stacklevel=2,
|
214
|
+
)
|
199
215
|
self.source_objects = source_objects
|
200
216
|
self.destination_bucket = destination_bucket
|
201
217
|
self.destination_object = destination_object
|
218
|
+
if delimiter:
|
219
|
+
warnings.warn(
|
220
|
+
"Usage of 'delimiter' is deprecated, please use 'match_glob' instead",
|
221
|
+
AirflowProviderDeprecationWarning,
|
222
|
+
stacklevel=2,
|
223
|
+
)
|
202
224
|
self.delimiter = delimiter
|
203
225
|
self.move_object = move_object
|
204
226
|
self.replace = replace
|
@@ -209,6 +231,7 @@ class GCSToGCSOperator(BaseOperator):
|
|
209
231
|
self.impersonation_chain = impersonation_chain
|
210
232
|
self.source_object_required = source_object_required
|
211
233
|
self.exact_match = exact_match
|
234
|
+
self.match_glob = match_glob
|
212
235
|
|
213
236
|
def execute(self, context: Context):
|
214
237
|
|
@@ -251,6 +274,7 @@ class GCSToGCSOperator(BaseOperator):
|
|
251
274
|
for prefix in self.source_objects:
|
252
275
|
# Check if prefix contains wildcard
|
253
276
|
if WILDCARD in prefix:
|
277
|
+
|
254
278
|
self._copy_source_with_wildcard(hook=hook, prefix=prefix)
|
255
279
|
# Now search with prefix using provided delimiter if any
|
256
280
|
else:
|
@@ -261,15 +285,19 @@ class GCSToGCSOperator(BaseOperator):
|
|
261
285
|
# and only keep those files which are present in
|
262
286
|
# Source GCS bucket and not in Destination GCS bucket
|
263
287
|
delimiter = kwargs.get("delimiter")
|
288
|
+
match_glob = kwargs.get("match_glob")
|
264
289
|
objects = kwargs.get("objects")
|
265
290
|
if self.destination_object is None:
|
266
|
-
existing_objects = hook.list(
|
291
|
+
existing_objects = hook.list(
|
292
|
+
self.destination_bucket, prefix=prefix, delimiter=delimiter, match_glob=match_glob
|
293
|
+
)
|
267
294
|
else:
|
268
295
|
self.log.info("Replaced destination_object with source_object prefix.")
|
269
296
|
destination_objects = hook.list(
|
270
297
|
self.destination_bucket,
|
271
298
|
prefix=self.destination_object,
|
272
299
|
delimiter=delimiter,
|
300
|
+
match_glob=match_glob,
|
273
301
|
)
|
274
302
|
existing_objects = [
|
275
303
|
dest_object.replace(self.destination_object, prefix, 1) for dest_object in destination_objects
|
@@ -338,11 +366,15 @@ class GCSToGCSOperator(BaseOperator):
|
|
338
366
|
gcp_conn_id=google_cloud_conn_id
|
339
367
|
)
|
340
368
|
"""
|
341
|
-
objects = hook.list(
|
369
|
+
objects = hook.list(
|
370
|
+
self.source_bucket, prefix=prefix, delimiter=self.delimiter, match_glob=self.match_glob
|
371
|
+
)
|
342
372
|
|
343
373
|
if not self.replace:
|
344
374
|
# If we are not replacing, ignore files already existing in source buckets
|
345
|
-
objects = self._ignore_existing_files(
|
375
|
+
objects = self._ignore_existing_files(
|
376
|
+
hook, prefix, objects=objects, delimiter=self.delimiter, match_glob=self.match_glob
|
377
|
+
)
|
346
378
|
|
347
379
|
# If objects is empty, and we have prefix, let's check if prefix is a blob
|
348
380
|
# and copy directly
|
@@ -397,11 +429,18 @@ class GCSToGCSOperator(BaseOperator):
|
|
397
429
|
self.log.info("Delimiter ignored because wildcard is in prefix")
|
398
430
|
prefix_, delimiter = prefix.split(WILDCARD, 1)
|
399
431
|
objects = hook.list(self.source_bucket, prefix=prefix_, delimiter=delimiter)
|
432
|
+
# TODO: After deprecating delimiter and wildcards in source objects,
|
433
|
+
# remove previous line and uncomment the following:
|
434
|
+
# match_glob = f"**/*{delimiter}" if delimiter else None
|
435
|
+
# objects = hook.list(self.source_bucket, prefix=prefix_, match_glob=match_glob)
|
400
436
|
if not self.replace:
|
401
437
|
# If we are not replacing, list all files in the Destination GCS bucket
|
402
438
|
# and only keep those files which are present in
|
403
439
|
# Source GCS bucket and not in Destination GCS bucket
|
404
440
|
objects = self._ignore_existing_files(hook, prefix_, delimiter=delimiter, objects=objects)
|
441
|
+
# TODO: After deprecating delimiter and wildcards in source objects,
|
442
|
+
# remove previous line and uncomment the following:
|
443
|
+
# objects = self._ignore_existing_files(hook, prefix_, match_glob=match_glob, objects=objects)
|
405
444
|
|
406
445
|
for source_object in objects:
|
407
446
|
if self.destination_object is None:
|
@@ -37,7 +37,7 @@ class GCSToSFTPOperator(BaseOperator):
|
|
37
37
|
"""
|
38
38
|
Transfer files from a Google Cloud Storage bucket to SFTP server.
|
39
39
|
|
40
|
-
|
40
|
+
.. code-block:: python
|
41
41
|
|
42
42
|
with models.DAG(
|
43
43
|
"example_gcs_to_sftp",
|
@@ -145,8 +145,11 @@ class GCSToSFTPOperator(BaseOperator):
|
|
145
145
|
|
146
146
|
prefix, delimiter = self.source_object.split(WILDCARD, 1)
|
147
147
|
prefix_dirname = os.path.dirname(prefix)
|
148
|
-
|
149
148
|
objects = gcs_hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter)
|
149
|
+
# TODO: After deprecating delimiter and wildcards in source objects,
|
150
|
+
# remove the previous line and uncomment the following:
|
151
|
+
# match_glob = f"**/*{delimiter}" if delimiter else None
|
152
|
+
# objects = gcs_hook.list(self.source_bucket, prefix=prefix, match_glob=match_glob)
|
150
153
|
|
151
154
|
for source_object in objects:
|
152
155
|
destination_path = self._resolve_destination_path(source_object, prefix=prefix_dirname)
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
"""This module contains Google Cloud SQL triggers."""
|
19
|
+
from __future__ import annotations
|
20
|
+
|
21
|
+
import asyncio
|
22
|
+
from typing import Sequence
|
23
|
+
|
24
|
+
from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLAsyncHook, CloudSqlOperationStatus
|
25
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
26
|
+
|
27
|
+
|
28
|
+
class CloudSQLExportTrigger(BaseTrigger):
|
29
|
+
"""
|
30
|
+
Trigger that periodically polls information from Cloud SQL API to verify job status.
|
31
|
+
Implementation leverages asynchronous transport.
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
operation_name: str,
|
37
|
+
project_id: str | None = None,
|
38
|
+
gcp_conn_id: str = "google_cloud_default",
|
39
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
40
|
+
poke_interval: int = 20,
|
41
|
+
):
|
42
|
+
super().__init__()
|
43
|
+
self.gcp_conn_id = gcp_conn_id
|
44
|
+
self.impersonation_chain = impersonation_chain
|
45
|
+
self.operation_name = operation_name
|
46
|
+
self.project_id = project_id
|
47
|
+
self.poke_interval = poke_interval
|
48
|
+
self.hook = CloudSQLAsyncHook(
|
49
|
+
gcp_conn_id=self.gcp_conn_id,
|
50
|
+
impersonation_chain=self.impersonation_chain,
|
51
|
+
)
|
52
|
+
|
53
|
+
def serialize(self):
|
54
|
+
return (
|
55
|
+
"airflow.providers.google.cloud.triggers.cloud_sql.CloudSQLExportTrigger",
|
56
|
+
{
|
57
|
+
"operation_name": self.operation_name,
|
58
|
+
"project_id": self.project_id,
|
59
|
+
"gcp_conn_id": self.gcp_conn_id,
|
60
|
+
"impersonation_chain": self.impersonation_chain,
|
61
|
+
"poke_interval": self.poke_interval,
|
62
|
+
},
|
63
|
+
)
|
64
|
+
|
65
|
+
async def run(self):
|
66
|
+
while True:
|
67
|
+
try:
|
68
|
+
operation = await self.hook.get_operation(
|
69
|
+
project_id=self.project_id, operation_name=self.operation_name
|
70
|
+
)
|
71
|
+
if operation["status"] == CloudSqlOperationStatus.DONE:
|
72
|
+
if "error" in operation:
|
73
|
+
yield TriggerEvent(
|
74
|
+
{
|
75
|
+
"operation_name": operation["name"],
|
76
|
+
"status": "error",
|
77
|
+
"message": operation["error"]["message"],
|
78
|
+
}
|
79
|
+
)
|
80
|
+
return
|
81
|
+
yield TriggerEvent(
|
82
|
+
{
|
83
|
+
"operation_name": operation["name"],
|
84
|
+
"status": "success",
|
85
|
+
}
|
86
|
+
)
|
87
|
+
return
|
88
|
+
else:
|
89
|
+
self.log.info(
|
90
|
+
"Operation status is %s, sleeping for %s seconds.",
|
91
|
+
operation["status"],
|
92
|
+
self.poke_interval,
|
93
|
+
)
|
94
|
+
await asyncio.sleep(self.poke_interval)
|
95
|
+
except Exception as e:
|
96
|
+
self.log.exception("Exception occurred while checking operation status.")
|
97
|
+
yield TriggerEvent(
|
98
|
+
{
|
99
|
+
"status": "failed",
|
100
|
+
"message": str(e),
|
101
|
+
}
|
102
|
+
)
|
@@ -18,11 +18,15 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import asyncio
|
21
|
+
import warnings
|
21
22
|
from datetime import datetime
|
22
23
|
from typing import Any, AsyncIterator, Sequence
|
23
24
|
|
24
25
|
from google.cloud.container_v1.types import Operation
|
25
26
|
|
27
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
28
|
+
from airflow.providers.cncf.kubernetes.utils.pod_manager import OnFinishAction
|
29
|
+
|
26
30
|
try:
|
27
31
|
from airflow.providers.cncf.kubernetes.triggers.pod import KubernetesPodTrigger
|
28
32
|
except ImportError:
|
@@ -44,15 +48,19 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
|
|
44
48
|
:param poll_interval: Polling period in seconds to check for the status.
|
45
49
|
:param trigger_start_time: time in Datetime format when the trigger was started
|
46
50
|
:param in_cluster: run kubernetes client with in_cluster configuration.
|
47
|
-
:param should_delete_pod: What to do when the pod reaches its final
|
48
|
-
state, or the execution is interrupted. If True (default), delete the
|
49
|
-
pod; if False, leave the pod.
|
50
51
|
:param get_logs: get the stdout of the container as logs of the tasks.
|
51
52
|
:param startup_timeout: timeout in seconds to start up the pod.
|
52
53
|
:param base_container_name: The name of the base container in the pod. This container's logs
|
53
54
|
will appear as part of this task's logs if get_logs is True. Defaults to None. If None,
|
54
55
|
will consult the class variable BASE_CONTAINER_NAME (which defaults to "base") for the base
|
55
56
|
container name to use.
|
57
|
+
:param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted.
|
58
|
+
If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod",
|
59
|
+
only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod.
|
60
|
+
:param should_delete_pod: What to do when the pod reaches its final
|
61
|
+
state, or the execution is interrupted. If True (default), delete the
|
62
|
+
pod; if False, leave the pod.
|
63
|
+
Deprecated - use `on_finish_action` instead.
|
56
64
|
"""
|
57
65
|
|
58
66
|
def __init__(
|
@@ -66,9 +74,10 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
|
|
66
74
|
cluster_context: str | None = None,
|
67
75
|
poll_interval: float = 2,
|
68
76
|
in_cluster: bool | None = None,
|
69
|
-
should_delete_pod: bool = True,
|
70
77
|
get_logs: bool = True,
|
71
78
|
startup_timeout: int = 120,
|
79
|
+
on_finish_action: str = "delete_pod",
|
80
|
+
should_delete_pod: bool | None = None,
|
72
81
|
*args,
|
73
82
|
**kwargs,
|
74
83
|
):
|
@@ -87,10 +96,22 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
|
|
87
96
|
self.poll_interval = poll_interval
|
88
97
|
self.cluster_context = cluster_context
|
89
98
|
self.in_cluster = in_cluster
|
90
|
-
self.should_delete_pod = should_delete_pod
|
91
99
|
self.get_logs = get_logs
|
92
100
|
self.startup_timeout = startup_timeout
|
93
101
|
|
102
|
+
if should_delete_pod is not None:
|
103
|
+
warnings.warn(
|
104
|
+
"`should_delete_pod` parameter is deprecated, please use `on_finish_action`",
|
105
|
+
AirflowProviderDeprecationWarning,
|
106
|
+
)
|
107
|
+
self.on_finish_action = (
|
108
|
+
OnFinishAction.DELETE_POD if should_delete_pod else OnFinishAction.KEEP_POD
|
109
|
+
)
|
110
|
+
self.should_delete_pod = should_delete_pod
|
111
|
+
else:
|
112
|
+
self.on_finish_action = OnFinishAction(on_finish_action)
|
113
|
+
self.should_delete_pod = self.on_finish_action == OnFinishAction.DELETE_POD
|
114
|
+
|
94
115
|
self._cluster_url = cluster_url
|
95
116
|
self._ssl_ca_cert = ssl_ca_cert
|
96
117
|
|
@@ -105,11 +126,12 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
|
|
105
126
|
"poll_interval": self.poll_interval,
|
106
127
|
"cluster_context": self.cluster_context,
|
107
128
|
"in_cluster": self.in_cluster,
|
108
|
-
"should_delete_pod": self.should_delete_pod,
|
109
129
|
"get_logs": self.get_logs,
|
110
130
|
"startup_timeout": self.startup_timeout,
|
111
131
|
"trigger_start_time": self.trigger_start_time,
|
112
132
|
"base_container_name": self.base_container_name,
|
133
|
+
"should_delete_pod": self.should_delete_pod,
|
134
|
+
"on_finish_action": self.on_finish_action.value,
|
113
135
|
},
|
114
136
|
)
|
115
137
|
|
@@ -16,6 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
+
from typing import Any
|
20
|
+
|
19
21
|
|
20
22
|
def bq_cast(string_field: str, bq_type: str) -> None | int | float | bool | str:
|
21
23
|
"""
|
@@ -34,3 +36,18 @@ def bq_cast(string_field: str, bq_type: str) -> None | int | float | bool | str:
|
|
34
36
|
return string_field == "true"
|
35
37
|
else:
|
36
38
|
return string_field
|
39
|
+
|
40
|
+
|
41
|
+
def convert_job_id(job_id: str | list[str], project_id: str, location: str | None) -> Any:
|
42
|
+
"""
|
43
|
+
Helper method that converts to path: project_id:location:job_id
|
44
|
+
:param project_id: Required. The ID of the Google Cloud project where workspace located.
|
45
|
+
:param location: Optional. The ID of the Google Cloud region where workspace located.
|
46
|
+
:param job_id: Required. The ID of the job.
|
47
|
+
:return: str or list[str] of project_id:location:job_id.
|
48
|
+
"""
|
49
|
+
location = location if location else "US"
|
50
|
+
if isinstance(job_id, list):
|
51
|
+
return [f"{project_id}:{location}:{i}" for i in job_id]
|
52
|
+
else:
|
53
|
+
return f"{project_id}:{location}:{job_id}"
|
@@ -29,6 +29,7 @@ def get_provider_info():
|
|
29
29
|
"description": "Google services including:\n\n - `Google Ads <https://ads.google.com/>`__\n - `Google Cloud (GCP) <https://cloud.google.com/>`__\n - `Google Firebase <https://firebase.google.com/>`__\n - `Google LevelDB <https://github.com/google/leveldb/>`__\n - `Google Marketing Platform <https://marketingplatform.google.com/>`__\n - `Google Workspace <https://workspace.google.com/>`__ (formerly Google Suite)\n",
|
30
30
|
"suspended": False,
|
31
31
|
"versions": [
|
32
|
+
"10.3.0",
|
32
33
|
"10.2.0",
|
33
34
|
"10.1.1",
|
34
35
|
"10.1.0",
|
@@ -73,7 +74,7 @@ def get_provider_info():
|
|
73
74
|
"gcloud-aio-auth>=4.0.0,<5.0.0",
|
74
75
|
"gcloud-aio-bigquery>=6.1.2",
|
75
76
|
"gcloud-aio-storage",
|
76
|
-
"google-ads>=
|
77
|
+
"google-ads>=21.2.0",
|
77
78
|
"google-api-core>=2.11.0",
|
78
79
|
"google-api-python-client>=1.6.0",
|
79
80
|
"google-auth>=1.0.0",
|
@@ -1075,6 +1076,10 @@ def get_provider_info():
|
|
1075
1076
|
"integration-name": "Google Cloud Composer",
|
1076
1077
|
"python-modules": ["airflow.providers.google.cloud.triggers.cloud_composer"],
|
1077
1078
|
},
|
1079
|
+
{
|
1080
|
+
"integration-name": "Google Cloud SQL",
|
1081
|
+
"python-modules": ["airflow.providers.google.cloud.triggers.cloud_sql"],
|
1082
|
+
},
|
1078
1083
|
{
|
1079
1084
|
"integration-name": "Google Dataflow",
|
1080
1085
|
"python-modules": ["airflow.providers.google.cloud.triggers.dataflow"],
|
@@ -1441,7 +1446,7 @@ def get_provider_info():
|
|
1441
1446
|
],
|
1442
1447
|
"additional-extras": [
|
1443
1448
|
{"name": "apache.beam", "dependencies": ["apache-beam[gcp]"]},
|
1444
|
-
{"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=
|
1449
|
+
{"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"]},
|
1445
1450
|
{"name": "leveldb", "dependencies": ["plyvel"]},
|
1446
1451
|
{"name": "oracle", "dependencies": ["apache-airflow-providers-oracle>=3.1.0"]},
|
1447
1452
|
{"name": "facebook", "dependencies": ["apache-airflow-providers-facebook>=2.2.0"]},
|
@@ -132,6 +132,10 @@ class GCSToGoogleDriveOperator(BaseOperator):
|
|
132
132
|
|
133
133
|
prefix, delimiter = self.source_object.split(WILDCARD, 1)
|
134
134
|
objects = self.gcs_hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter)
|
135
|
+
# TODO: After deprecating delimiter and wildcards in source objects,
|
136
|
+
# remove the previous line and uncomment the following:
|
137
|
+
# match_glob = f"**/*{delimiter}" if delimiter else None
|
138
|
+
# objects = self.gcs_hook.list(self.source_bucket, prefix=prefix, match_glob=match_glob)
|
135
139
|
|
136
140
|
for source_object in objects:
|
137
141
|
if self.destination_object is None:
|