apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +38 -39
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
  5. airflow/providers/google/cloud/hooks/bigquery.py +328 -318
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
  8. airflow/providers/google/cloud/hooks/dataflow.py +11 -15
  9. airflow/providers/google/cloud/hooks/dataform.py +3 -3
  10. airflow/providers/google/cloud/hooks/dataproc.py +577 -573
  11. airflow/providers/google/cloud/hooks/functions.py +60 -76
  12. airflow/providers/google/cloud/hooks/gcs.py +108 -18
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
  14. airflow/providers/google/cloud/links/datafusion.py +4 -3
  15. airflow/providers/google/cloud/operators/bigquery.py +201 -191
  16. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  17. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  18. airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
  19. airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
  20. airflow/providers/google/cloud/operators/dataflow.py +6 -4
  21. airflow/providers/google/cloud/operators/dataform.py +3 -2
  22. airflow/providers/google/cloud/operators/dataproc.py +127 -123
  23. airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
  24. airflow/providers/google/cloud/operators/gcs.py +35 -13
  25. airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
  26. airflow/providers/google/cloud/operators/mlengine.py +2 -6
  27. airflow/providers/google/cloud/operators/vision.py +47 -56
  28. airflow/providers/google/cloud/sensors/bigquery.py +3 -2
  29. airflow/providers/google/cloud/sensors/gcs.py +5 -7
  30. airflow/providers/google/cloud/sensors/pubsub.py +2 -2
  31. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
  32. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  33. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  34. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
  35. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
  36. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
  37. airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
  38. airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
  39. airflow/providers/google/cloud/utils/bigquery.py +17 -0
  40. airflow/providers/google/get_provider_info.py +7 -2
  41. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
  42. airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
  43. apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
  44. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
  45. apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
  46. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
  47. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
  48. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
  49. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
  50. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence
27
27
  from google.api_core.retry import Retry
28
28
  from google.cloud.storage.retry import DEFAULT_RETRY
29
29
 
30
+ from airflow.configuration import conf
30
31
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
31
32
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
32
33
  from airflow.providers.google.cloud.triggers.gcs import (
@@ -76,10 +77,9 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
76
77
  google_cloud_conn_id: str = "google_cloud_default",
77
78
  impersonation_chain: str | Sequence[str] | None = None,
78
79
  retry: Retry = DEFAULT_RETRY,
79
- deferrable: bool = False,
80
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
80
81
  **kwargs,
81
82
  ) -> None:
82
-
83
83
  super().__init__(**kwargs)
84
84
  self.bucket = bucket
85
85
  self.object = object
@@ -208,10 +208,9 @@ class GCSObjectUpdateSensor(BaseSensorOperator):
208
208
  ts_func: Callable = ts_function,
209
209
  google_cloud_conn_id: str = "google_cloud_default",
210
210
  impersonation_chain: str | Sequence[str] | None = None,
211
- deferrable: bool = False,
211
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
212
212
  **kwargs,
213
213
  ) -> None:
214
-
215
214
  super().__init__(**kwargs)
216
215
  self.bucket = bucket
217
216
  self.object = object
@@ -298,7 +297,7 @@ class GCSObjectsWithPrefixExistenceSensor(BaseSensorOperator):
298
297
  prefix: str,
299
298
  google_cloud_conn_id: str = "google_cloud_default",
300
299
  impersonation_chain: str | Sequence[str] | None = None,
301
- deferrable: bool = False,
300
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
302
301
  **kwargs,
303
302
  ) -> None:
304
303
  super().__init__(**kwargs)
@@ -412,10 +411,9 @@ class GCSUploadSessionCompleteSensor(BaseSensorOperator):
412
411
  allow_delete: bool = True,
413
412
  google_cloud_conn_id: str = "google_cloud_default",
414
413
  impersonation_chain: str | Sequence[str] | None = None,
415
- deferrable: bool = False,
414
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
416
415
  **kwargs,
417
416
  ) -> None:
418
-
419
417
  super().__init__(**kwargs)
420
418
 
421
419
  self.bucket = bucket
@@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence
23
23
 
24
24
  from google.cloud.pubsub_v1.types import ReceivedMessage
25
25
 
26
+ from airflow.configuration import conf
26
27
  from airflow.exceptions import AirflowException
27
28
  from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
28
29
  from airflow.providers.google.cloud.triggers.pubsub import PubsubPullTrigger
@@ -103,10 +104,9 @@ class PubSubPullSensor(BaseSensorOperator):
103
104
  messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
104
105
  impersonation_chain: str | Sequence[str] | None = None,
105
106
  poke_interval: float = 10.0,
106
- deferrable: bool = False,
107
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
107
108
  **kwargs,
108
109
  ) -> None:
109
-
110
110
  super().__init__(**kwargs)
111
111
  self.gcp_conn_id = gcp_conn_id
112
112
  self.project_id = project_id
@@ -54,10 +54,11 @@ class BigQueryToBigQueryOperator(BaseOperator):
54
54
  :param labels: a dictionary containing labels for the job/query,
55
55
  passed to BigQuery
56
56
  :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
57
- **Example**: ::
57
+
58
+ .. code-block:: python
58
59
 
59
60
  encryption_configuration = {
60
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
61
+ "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
61
62
  }
62
63
  :param location: The geographic location of the job. You must specify the location to run the job if
63
64
  the location to run a job is not in the US or the EU multi-regional location or
@@ -25,6 +25,7 @@ from google.api_core.retry import Retry
25
25
  from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
26
26
 
27
27
  from airflow import AirflowException
28
+ from airflow.configuration import conf
28
29
  from airflow.models import BaseOperator
29
30
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
30
31
  from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
@@ -114,7 +115,7 @@ class BigQueryToGCSOperator(BaseOperator):
114
115
  job_id: str | None = None,
115
116
  force_rerun: bool = False,
116
117
  reattach_states: set[str] | None = None,
117
- deferrable: bool = False,
118
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
118
119
  **kwargs,
119
120
  ) -> None:
120
121
  super().__init__(**kwargs)
@@ -42,10 +42,10 @@ class FlushAction(Enum):
42
42
 
43
43
 
44
44
  class FacebookAdsReportToGcsOperator(BaseOperator):
45
- """
46
- Fetches the results from the Facebook Ads API as desired in the params
47
- Converts and saves the data as a temporary JSON file
48
- Uploads the JSON to Google Cloud Storage.
45
+ """Fetch from Facebook Ads API.
46
+
47
+ This converts and saves the data as a temporary JSON file, and uploads the
48
+ JSON to Google Cloud Storage.
49
49
 
50
50
  .. seealso::
51
51
  For more information on the Facebook Ads API, take a look at the API docs:
@@ -36,6 +36,7 @@ from google.cloud.bigquery import (
36
36
  from google.cloud.bigquery.table import EncryptionConfiguration, Table, TableReference
37
37
 
38
38
  from airflow import AirflowException
39
+ from airflow.configuration import conf
39
40
  from airflow.models import BaseOperator
40
41
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
41
42
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -148,10 +149,11 @@ class GCSToBigQueryOperator(BaseOperator):
148
149
  If autodetect is None and no schema is provided (neither via schema_fields
149
150
  nor a schema_object), assume the table already exists.
150
151
  :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
151
- **Example**: ::
152
+
153
+ .. code-block:: python
152
154
 
153
155
  encryption_configuration = {
154
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
156
+ "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
155
157
  }
156
158
  :param location: [Optional] The geographic location of the job. Required except for US and EU.
157
159
  See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location
@@ -177,6 +179,7 @@ class GCSToBigQueryOperator(BaseOperator):
177
179
  "schema_object_bucket",
178
180
  "destination_project_dataset_table",
179
181
  "impersonation_chain",
182
+ "src_fmt_configs",
180
183
  )
181
184
  template_ext: Sequence[str] = (".sql",)
182
185
  ui_color = "#f0eee4"
@@ -216,7 +219,7 @@ class GCSToBigQueryOperator(BaseOperator):
216
219
  impersonation_chain: str | Sequence[str] | None = None,
217
220
  labels=None,
218
221
  description=None,
219
- deferrable: bool = False,
222
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
220
223
  result_retry: Retry = DEFAULT_RETRY,
221
224
  result_timeout: float | None = None,
222
225
  cancel_on_kill: bool = True,
@@ -226,7 +229,6 @@ class GCSToBigQueryOperator(BaseOperator):
226
229
  project_id: str | None = None,
227
230
  **kwargs,
228
231
  ) -> None:
229
-
230
232
  super().__init__(**kwargs)
231
233
  self.hook: BigQueryHook | None = None
232
234
  self.configuration: dict[str, Any] = {}
@@ -716,7 +718,6 @@ class GCSToBigQueryOperator(BaseOperator):
716
718
  def _cleanse_time_partitioning(
717
719
  self, destination_dataset_table: str | None, time_partitioning_in: dict | None
718
720
  ) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option
719
-
720
721
  if time_partitioning_in is None:
721
722
  time_partitioning_in = {}
722
723
 
@@ -18,9 +18,10 @@
18
18
  """This module contains a Google Cloud Storage operator."""
19
19
  from __future__ import annotations
20
20
 
21
+ import warnings
21
22
  from typing import TYPE_CHECKING, Sequence
22
23
 
23
- from airflow.exceptions import AirflowException
24
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
24
25
  from airflow.models import BaseOperator
25
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
26
27
 
@@ -66,8 +67,8 @@ class GCSToGCSOperator(BaseOperator):
66
67
  of copied to the new location. This is the equivalent of a mv command
67
68
  as opposed to a cp command.
68
69
  :param replace: Whether you want to replace existing destination files or not.
69
- :param delimiter: This is used to restrict the result to only the 'files' in a given 'folder'.
70
- If source_objects = ['foo/bah/'] and delimiter = '.avro', then only the 'files' in the
70
+ :param delimiter: (Deprecated) This is used to restrict the result to only the 'files' in a given
71
+ 'folder'. If source_objects = ['foo/bah/'] and delimiter = '.avro', then only the 'files' in the
71
72
  folder 'foo/bah/' with '.avro' delimiter will be copied to the destination object.
72
73
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
73
74
  :param last_modified_time: When specified, the objects will be copied or moved,
@@ -90,6 +91,8 @@ class GCSToGCSOperator(BaseOperator):
90
91
  doesn't exist. It doesn't have any effect when the source objects are folders or patterns.
91
92
  :param exact_match: When specified, only exact match of the source object (filename) will be
92
93
  copied.
94
+ :param match_glob: (Optional) filters objects based on the glob pattern given by the string (
95
+ e.g, ``'**/*/.json'``)
93
96
 
94
97
  :Example:
95
98
 
@@ -116,7 +119,7 @@ class GCSToGCSOperator(BaseOperator):
116
119
  source_objects=['sales/sales-2017'],
117
120
  destination_bucket='data_backup',
118
121
  destination_object='copied_sales/2017/',
119
- delimiter='.avro'
122
+ match_glob='**/*.avro'
120
123
  gcp_conn_id=google_cloud_conn_id
121
124
  )
122
125
 
@@ -190,15 +193,34 @@ class GCSToGCSOperator(BaseOperator):
190
193
  impersonation_chain: str | Sequence[str] | None = None,
191
194
  source_object_required=False,
192
195
  exact_match=False,
196
+ match_glob: str | None = None,
193
197
  **kwargs,
194
198
  ):
195
199
  super().__init__(**kwargs)
196
200
 
197
201
  self.source_bucket = source_bucket
202
+ if source_object and WILDCARD in source_object:
203
+ warnings.warn(
204
+ "Usage of wildcard (*) in 'source_object' is deprecated, utilize 'match_glob' instead",
205
+ AirflowProviderDeprecationWarning,
206
+ stacklevel=2,
207
+ )
198
208
  self.source_object = source_object
209
+ if source_objects and any([WILDCARD in obj for obj in source_objects]):
210
+ warnings.warn(
211
+ "Usage of wildcard (*) in 'source_objects' is deprecated, utilize 'match_glob' instead",
212
+ AirflowProviderDeprecationWarning,
213
+ stacklevel=2,
214
+ )
199
215
  self.source_objects = source_objects
200
216
  self.destination_bucket = destination_bucket
201
217
  self.destination_object = destination_object
218
+ if delimiter:
219
+ warnings.warn(
220
+ "Usage of 'delimiter' is deprecated, please use 'match_glob' instead",
221
+ AirflowProviderDeprecationWarning,
222
+ stacklevel=2,
223
+ )
202
224
  self.delimiter = delimiter
203
225
  self.move_object = move_object
204
226
  self.replace = replace
@@ -209,6 +231,7 @@ class GCSToGCSOperator(BaseOperator):
209
231
  self.impersonation_chain = impersonation_chain
210
232
  self.source_object_required = source_object_required
211
233
  self.exact_match = exact_match
234
+ self.match_glob = match_glob
212
235
 
213
236
  def execute(self, context: Context):
214
237
 
@@ -251,6 +274,7 @@ class GCSToGCSOperator(BaseOperator):
251
274
  for prefix in self.source_objects:
252
275
  # Check if prefix contains wildcard
253
276
  if WILDCARD in prefix:
277
+
254
278
  self._copy_source_with_wildcard(hook=hook, prefix=prefix)
255
279
  # Now search with prefix using provided delimiter if any
256
280
  else:
@@ -261,15 +285,19 @@ class GCSToGCSOperator(BaseOperator):
261
285
  # and only keep those files which are present in
262
286
  # Source GCS bucket and not in Destination GCS bucket
263
287
  delimiter = kwargs.get("delimiter")
288
+ match_glob = kwargs.get("match_glob")
264
289
  objects = kwargs.get("objects")
265
290
  if self.destination_object is None:
266
- existing_objects = hook.list(self.destination_bucket, prefix=prefix, delimiter=delimiter)
291
+ existing_objects = hook.list(
292
+ self.destination_bucket, prefix=prefix, delimiter=delimiter, match_glob=match_glob
293
+ )
267
294
  else:
268
295
  self.log.info("Replaced destination_object with source_object prefix.")
269
296
  destination_objects = hook.list(
270
297
  self.destination_bucket,
271
298
  prefix=self.destination_object,
272
299
  delimiter=delimiter,
300
+ match_glob=match_glob,
273
301
  )
274
302
  existing_objects = [
275
303
  dest_object.replace(self.destination_object, prefix, 1) for dest_object in destination_objects
@@ -338,11 +366,15 @@ class GCSToGCSOperator(BaseOperator):
338
366
  gcp_conn_id=google_cloud_conn_id
339
367
  )
340
368
  """
341
- objects = hook.list(self.source_bucket, prefix=prefix, delimiter=self.delimiter)
369
+ objects = hook.list(
370
+ self.source_bucket, prefix=prefix, delimiter=self.delimiter, match_glob=self.match_glob
371
+ )
342
372
 
343
373
  if not self.replace:
344
374
  # If we are not replacing, ignore files already existing in source buckets
345
- objects = self._ignore_existing_files(hook, prefix, objects=objects, delimiter=self.delimiter)
375
+ objects = self._ignore_existing_files(
376
+ hook, prefix, objects=objects, delimiter=self.delimiter, match_glob=self.match_glob
377
+ )
346
378
 
347
379
  # If objects is empty, and we have prefix, let's check if prefix is a blob
348
380
  # and copy directly
@@ -397,11 +429,18 @@ class GCSToGCSOperator(BaseOperator):
397
429
  self.log.info("Delimiter ignored because wildcard is in prefix")
398
430
  prefix_, delimiter = prefix.split(WILDCARD, 1)
399
431
  objects = hook.list(self.source_bucket, prefix=prefix_, delimiter=delimiter)
432
+ # TODO: After deprecating delimiter and wildcards in source objects,
433
+ # remove previous line and uncomment the following:
434
+ # match_glob = f"**/*{delimiter}" if delimiter else None
435
+ # objects = hook.list(self.source_bucket, prefix=prefix_, match_glob=match_glob)
400
436
  if not self.replace:
401
437
  # If we are not replacing, list all files in the Destination GCS bucket
402
438
  # and only keep those files which are present in
403
439
  # Source GCS bucket and not in Destination GCS bucket
404
440
  objects = self._ignore_existing_files(hook, prefix_, delimiter=delimiter, objects=objects)
441
+ # TODO: After deprecating delimiter and wildcards in source objects,
442
+ # remove previous line and uncomment the following:
443
+ # objects = self._ignore_existing_files(hook, prefix_, match_glob=match_glob, objects=objects)
405
444
 
406
445
  for source_object in objects:
407
446
  if self.destination_object is None:
@@ -37,7 +37,7 @@ class GCSToSFTPOperator(BaseOperator):
37
37
  """
38
38
  Transfer files from a Google Cloud Storage bucket to SFTP server.
39
39
 
40
- **Example**: ::
40
+ .. code-block:: python
41
41
 
42
42
  with models.DAG(
43
43
  "example_gcs_to_sftp",
@@ -145,8 +145,11 @@ class GCSToSFTPOperator(BaseOperator):
145
145
 
146
146
  prefix, delimiter = self.source_object.split(WILDCARD, 1)
147
147
  prefix_dirname = os.path.dirname(prefix)
148
-
149
148
  objects = gcs_hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter)
149
+ # TODO: After deprecating delimiter and wildcards in source objects,
150
+ # remove the previous line and uncomment the following:
151
+ # match_glob = f"**/*{delimiter}" if delimiter else None
152
+ # objects = gcs_hook.list(self.source_bucket, prefix=prefix, match_glob=match_glob)
150
153
 
151
154
  for source_object in objects:
152
155
  destination_path = self._resolve_destination_path(source_object, prefix=prefix_dirname)
@@ -0,0 +1,102 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ """This module contains Google Cloud SQL triggers."""
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ from typing import Sequence
23
+
24
+ from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLAsyncHook, CloudSqlOperationStatus
25
+ from airflow.triggers.base import BaseTrigger, TriggerEvent
26
+
27
+
28
+ class CloudSQLExportTrigger(BaseTrigger):
29
+ """
30
+ Trigger that periodically polls information from Cloud SQL API to verify job status.
31
+ Implementation leverages asynchronous transport.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ operation_name: str,
37
+ project_id: str | None = None,
38
+ gcp_conn_id: str = "google_cloud_default",
39
+ impersonation_chain: str | Sequence[str] | None = None,
40
+ poke_interval: int = 20,
41
+ ):
42
+ super().__init__()
43
+ self.gcp_conn_id = gcp_conn_id
44
+ self.impersonation_chain = impersonation_chain
45
+ self.operation_name = operation_name
46
+ self.project_id = project_id
47
+ self.poke_interval = poke_interval
48
+ self.hook = CloudSQLAsyncHook(
49
+ gcp_conn_id=self.gcp_conn_id,
50
+ impersonation_chain=self.impersonation_chain,
51
+ )
52
+
53
+ def serialize(self):
54
+ return (
55
+ "airflow.providers.google.cloud.triggers.cloud_sql.CloudSQLExportTrigger",
56
+ {
57
+ "operation_name": self.operation_name,
58
+ "project_id": self.project_id,
59
+ "gcp_conn_id": self.gcp_conn_id,
60
+ "impersonation_chain": self.impersonation_chain,
61
+ "poke_interval": self.poke_interval,
62
+ },
63
+ )
64
+
65
+ async def run(self):
66
+ while True:
67
+ try:
68
+ operation = await self.hook.get_operation(
69
+ project_id=self.project_id, operation_name=self.operation_name
70
+ )
71
+ if operation["status"] == CloudSqlOperationStatus.DONE:
72
+ if "error" in operation:
73
+ yield TriggerEvent(
74
+ {
75
+ "operation_name": operation["name"],
76
+ "status": "error",
77
+ "message": operation["error"]["message"],
78
+ }
79
+ )
80
+ return
81
+ yield TriggerEvent(
82
+ {
83
+ "operation_name": operation["name"],
84
+ "status": "success",
85
+ }
86
+ )
87
+ return
88
+ else:
89
+ self.log.info(
90
+ "Operation status is %s, sleeping for %s seconds.",
91
+ operation["status"],
92
+ self.poke_interval,
93
+ )
94
+ await asyncio.sleep(self.poke_interval)
95
+ except Exception as e:
96
+ self.log.exception("Exception occurred while checking operation status.")
97
+ yield TriggerEvent(
98
+ {
99
+ "status": "failed",
100
+ "message": str(e),
101
+ }
102
+ )
@@ -18,11 +18,15 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  import asyncio
21
+ import warnings
21
22
  from datetime import datetime
22
23
  from typing import Any, AsyncIterator, Sequence
23
24
 
24
25
  from google.cloud.container_v1.types import Operation
25
26
 
27
+ from airflow.exceptions import AirflowProviderDeprecationWarning
28
+ from airflow.providers.cncf.kubernetes.utils.pod_manager import OnFinishAction
29
+
26
30
  try:
27
31
  from airflow.providers.cncf.kubernetes.triggers.pod import KubernetesPodTrigger
28
32
  except ImportError:
@@ -44,15 +48,19 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
44
48
  :param poll_interval: Polling period in seconds to check for the status.
45
49
  :param trigger_start_time: time in Datetime format when the trigger was started
46
50
  :param in_cluster: run kubernetes client with in_cluster configuration.
47
- :param should_delete_pod: What to do when the pod reaches its final
48
- state, or the execution is interrupted. If True (default), delete the
49
- pod; if False, leave the pod.
50
51
  :param get_logs: get the stdout of the container as logs of the tasks.
51
52
  :param startup_timeout: timeout in seconds to start up the pod.
52
53
  :param base_container_name: The name of the base container in the pod. This container's logs
53
54
  will appear as part of this task's logs if get_logs is True. Defaults to None. If None,
54
55
  will consult the class variable BASE_CONTAINER_NAME (which defaults to "base") for the base
55
56
  container name to use.
57
+ :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted.
58
+ If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod",
59
+ only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod.
60
+ :param should_delete_pod: What to do when the pod reaches its final
61
+ state, or the execution is interrupted. If True (default), delete the
62
+ pod; if False, leave the pod.
63
+ Deprecated - use `on_finish_action` instead.
56
64
  """
57
65
 
58
66
  def __init__(
@@ -66,9 +74,10 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
66
74
  cluster_context: str | None = None,
67
75
  poll_interval: float = 2,
68
76
  in_cluster: bool | None = None,
69
- should_delete_pod: bool = True,
70
77
  get_logs: bool = True,
71
78
  startup_timeout: int = 120,
79
+ on_finish_action: str = "delete_pod",
80
+ should_delete_pod: bool | None = None,
72
81
  *args,
73
82
  **kwargs,
74
83
  ):
@@ -87,10 +96,22 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
87
96
  self.poll_interval = poll_interval
88
97
  self.cluster_context = cluster_context
89
98
  self.in_cluster = in_cluster
90
- self.should_delete_pod = should_delete_pod
91
99
  self.get_logs = get_logs
92
100
  self.startup_timeout = startup_timeout
93
101
 
102
+ if should_delete_pod is not None:
103
+ warnings.warn(
104
+ "`should_delete_pod` parameter is deprecated, please use `on_finish_action`",
105
+ AirflowProviderDeprecationWarning,
106
+ )
107
+ self.on_finish_action = (
108
+ OnFinishAction.DELETE_POD if should_delete_pod else OnFinishAction.KEEP_POD
109
+ )
110
+ self.should_delete_pod = should_delete_pod
111
+ else:
112
+ self.on_finish_action = OnFinishAction(on_finish_action)
113
+ self.should_delete_pod = self.on_finish_action == OnFinishAction.DELETE_POD
114
+
94
115
  self._cluster_url = cluster_url
95
116
  self._ssl_ca_cert = ssl_ca_cert
96
117
 
@@ -105,11 +126,12 @@ class GKEStartPodTrigger(KubernetesPodTrigger):
105
126
  "poll_interval": self.poll_interval,
106
127
  "cluster_context": self.cluster_context,
107
128
  "in_cluster": self.in_cluster,
108
- "should_delete_pod": self.should_delete_pod,
109
129
  "get_logs": self.get_logs,
110
130
  "startup_timeout": self.startup_timeout,
111
131
  "trigger_start_time": self.trigger_start_time,
112
132
  "base_container_name": self.base_container_name,
133
+ "should_delete_pod": self.should_delete_pod,
134
+ "on_finish_action": self.on_finish_action.value,
113
135
  },
114
136
  )
115
137
 
@@ -16,6 +16,8 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ from typing import Any
20
+
19
21
 
20
22
  def bq_cast(string_field: str, bq_type: str) -> None | int | float | bool | str:
21
23
  """
@@ -34,3 +36,18 @@ def bq_cast(string_field: str, bq_type: str) -> None | int | float | bool | str:
34
36
  return string_field == "true"
35
37
  else:
36
38
  return string_field
39
+
40
+
41
+ def convert_job_id(job_id: str | list[str], project_id: str, location: str | None) -> Any:
42
+ """
43
+ Helper method that converts to path: project_id:location:job_id
44
+ :param project_id: Required. The ID of the Google Cloud project where workspace located.
45
+ :param location: Optional. The ID of the Google Cloud region where workspace located.
46
+ :param job_id: Required. The ID of the job.
47
+ :return: str or list[str] of project_id:location:job_id.
48
+ """
49
+ location = location if location else "US"
50
+ if isinstance(job_id, list):
51
+ return [f"{project_id}:{location}:{i}" for i in job_id]
52
+ else:
53
+ return f"{project_id}:{location}:{job_id}"
@@ -29,6 +29,7 @@ def get_provider_info():
29
29
  "description": "Google services including:\n\n - `Google Ads <https://ads.google.com/>`__\n - `Google Cloud (GCP) <https://cloud.google.com/>`__\n - `Google Firebase <https://firebase.google.com/>`__\n - `Google LevelDB <https://github.com/google/leveldb/>`__\n - `Google Marketing Platform <https://marketingplatform.google.com/>`__\n - `Google Workspace <https://workspace.google.com/>`__ (formerly Google Suite)\n",
30
30
  "suspended": False,
31
31
  "versions": [
32
+ "10.3.0",
32
33
  "10.2.0",
33
34
  "10.1.1",
34
35
  "10.1.0",
@@ -73,7 +74,7 @@ def get_provider_info():
73
74
  "gcloud-aio-auth>=4.0.0,<5.0.0",
74
75
  "gcloud-aio-bigquery>=6.1.2",
75
76
  "gcloud-aio-storage",
76
- "google-ads>=20.0.0",
77
+ "google-ads>=21.2.0",
77
78
  "google-api-core>=2.11.0",
78
79
  "google-api-python-client>=1.6.0",
79
80
  "google-auth>=1.0.0",
@@ -1075,6 +1076,10 @@ def get_provider_info():
1075
1076
  "integration-name": "Google Cloud Composer",
1076
1077
  "python-modules": ["airflow.providers.google.cloud.triggers.cloud_composer"],
1077
1078
  },
1079
+ {
1080
+ "integration-name": "Google Cloud SQL",
1081
+ "python-modules": ["airflow.providers.google.cloud.triggers.cloud_sql"],
1082
+ },
1078
1083
  {
1079
1084
  "integration-name": "Google Dataflow",
1080
1085
  "python-modules": ["airflow.providers.google.cloud.triggers.dataflow"],
@@ -1441,7 +1446,7 @@ def get_provider_info():
1441
1446
  ],
1442
1447
  "additional-extras": [
1443
1448
  {"name": "apache.beam", "dependencies": ["apache-beam[gcp]"]},
1444
- {"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=6.2.0"]},
1449
+ {"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"]},
1445
1450
  {"name": "leveldb", "dependencies": ["plyvel"]},
1446
1451
  {"name": "oracle", "dependencies": ["apache-airflow-providers-oracle>=3.1.0"]},
1447
1452
  {"name": "facebook", "dependencies": ["apache-airflow-providers-facebook>=2.2.0"]},
@@ -132,6 +132,10 @@ class GCSToGoogleDriveOperator(BaseOperator):
132
132
 
133
133
  prefix, delimiter = self.source_object.split(WILDCARD, 1)
134
134
  objects = self.gcs_hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter)
135
+ # TODO: After deprecating delimiter and wildcards in source objects,
136
+ # remove the previous line and uncomment the following:
137
+ # match_glob = f"**/*{delimiter}" if delimiter else None
138
+ # objects = self.gcs_hook.list(self.source_bucket, prefix=prefix, match_glob=match_glob)
135
139
 
136
140
  for source_object in objects:
137
141
  if self.destination_object is None: