apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +38 -39
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
  5. airflow/providers/google/cloud/hooks/bigquery.py +328 -318
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
  8. airflow/providers/google/cloud/hooks/dataflow.py +11 -15
  9. airflow/providers/google/cloud/hooks/dataform.py +3 -3
  10. airflow/providers/google/cloud/hooks/dataproc.py +577 -573
  11. airflow/providers/google/cloud/hooks/functions.py +60 -76
  12. airflow/providers/google/cloud/hooks/gcs.py +108 -18
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
  14. airflow/providers/google/cloud/links/datafusion.py +4 -3
  15. airflow/providers/google/cloud/operators/bigquery.py +201 -191
  16. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  17. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  18. airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
  19. airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
  20. airflow/providers/google/cloud/operators/dataflow.py +6 -4
  21. airflow/providers/google/cloud/operators/dataform.py +3 -2
  22. airflow/providers/google/cloud/operators/dataproc.py +127 -123
  23. airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
  24. airflow/providers/google/cloud/operators/gcs.py +35 -13
  25. airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
  26. airflow/providers/google/cloud/operators/mlengine.py +2 -6
  27. airflow/providers/google/cloud/operators/vision.py +47 -56
  28. airflow/providers/google/cloud/sensors/bigquery.py +3 -2
  29. airflow/providers/google/cloud/sensors/gcs.py +5 -7
  30. airflow/providers/google/cloud/sensors/pubsub.py +2 -2
  31. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
  32. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  33. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  34. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
  35. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
  36. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
  37. airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
  38. airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
  39. airflow/providers/google/cloud/utils/bigquery.py +17 -0
  40. airflow/providers/google/get_provider_info.py +7 -2
  41. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
  42. airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
  43. apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
  44. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
  45. apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
  46. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
  47. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
  48. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
  49. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
  50. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -32,8 +32,7 @@ TIME_TO_SLEEP_IN_SECONDS = 1
32
32
 
33
33
 
34
34
  class CloudFunctionsHook(GoogleBaseHook):
35
- """
36
- Hook for the Google Cloud Functions APIs.
35
+ """Google Cloud Functions APIs.
37
36
 
38
37
  All the methods in the hook where project_id is used must be called with
39
38
  keyword arguments rather than positional.
@@ -61,19 +60,17 @@ class CloudFunctionsHook(GoogleBaseHook):
61
60
 
62
61
  @staticmethod
63
62
  def _full_location(project_id: str, location: str) -> str:
64
- """
65
- Retrieve full location of the function in the form of
66
- ``projects/<GCP_PROJECT_ID>/locations/<GCP_LOCATION>``.
63
+ """Retrieve full location of the function.
67
64
 
68
- :param project_id: The Google Cloud Project project_id where the function belongs.
65
+ :param project_id: Google Cloud Project ID where the function belongs.
69
66
  :param location: The location where the function is created.
70
- :return:
67
+ :return: The full location, in the form of
68
+ ``projects/<GCP_PROJECT_ID>/locations/<GCP_LOCATION>``.
71
69
  """
72
70
  return f"projects/{project_id}/locations/{location}"
73
71
 
74
72
  def get_conn(self) -> build:
75
- """
76
- Retrieves the connection to Cloud Functions.
73
+ """Retrieve the connection to Cloud Functions.
77
74
 
78
75
  :return: Google Cloud Function services object.
79
76
  """
@@ -85,74 +82,72 @@ class CloudFunctionsHook(GoogleBaseHook):
85
82
  return self._conn
86
83
 
87
84
  def get_function(self, name: str) -> dict:
88
- """
89
- Returns the Cloud Function with the given name.
85
+ """Get the Cloud Function with given name.
90
86
 
91
87
  :param name: Name of the function.
92
88
  :return: A Cloud Functions object representing the function.
93
89
  """
94
- # fmt: off
95
- return self.get_conn().projects().locations().functions().get(
96
- name=name).execute(num_retries=self.num_retries)
97
- # fmt: on
90
+ operation = self.get_conn().projects().locations().functions().get(name=name)
91
+ return operation.execute(num_retries=self.num_retries)
98
92
 
99
93
  @GoogleBaseHook.fallback_to_default_project_id
100
94
  def create_new_function(self, location: str, body: dict, project_id: str) -> None:
101
- """
102
- Creates a new function in Cloud Function in the location specified in the body.
95
+ """Create a new function at the location specified in the body.
103
96
 
104
97
  :param location: The location of the function.
105
98
  :param body: The body required by the Cloud Functions insert API.
106
- :param project_id: Optional, Google Cloud Project project_id where the function belongs.
107
- If set to None or missing, the default project_id from the Google Cloud connection is used.
108
- :return: None
109
- """
110
- # fmt: off
111
- response = self.get_conn().projects().locations().functions().create(
112
- location=self._full_location(project_id, location),
113
- body=body
114
- ).execute(num_retries=self.num_retries)
115
- # fmt: on
99
+ :param project_id: Google Cloud Project ID where the function belongs.
100
+ If set to None or missing, the default project ID from the Google
101
+ Cloud connection is used.
102
+ """
103
+ operation = (
104
+ self.get_conn()
105
+ .projects()
106
+ .locations()
107
+ .functions()
108
+ .create(location=self._full_location(project_id, location), body=body)
109
+ )
110
+ response = operation.execute(num_retries=self.num_retries)
116
111
  operation_name = response["name"]
117
112
  self._wait_for_operation_to_complete(operation_name=operation_name)
118
113
 
119
114
  def update_function(self, name: str, body: dict, update_mask: list[str]) -> None:
120
- """
121
- Updates Cloud Functions according to the specified update mask.
115
+ """Update Cloud Functions according to the specified update mask.
122
116
 
123
117
  :param name: The name of the function.
124
118
  :param body: The body required by the cloud function patch API.
125
119
  :param update_mask: The update mask - array of fields that should be patched.
126
- :return: None
127
120
  """
128
- # fmt: off
129
- response = self.get_conn().projects().locations().functions().patch(
130
- updateMask=",".join(update_mask),
131
- name=name,
132
- body=body
133
- ).execute(num_retries=self.num_retries)
134
- # fmt: on
121
+ operation = (
122
+ self.get_conn()
123
+ .projects()
124
+ .locations()
125
+ .functions()
126
+ .patch(updateMask=",".join(update_mask), name=name, body=body)
127
+ )
128
+ response = operation.execute(num_retries=self.num_retries)
135
129
  operation_name = response["name"]
136
130
  self._wait_for_operation_to_complete(operation_name=operation_name)
137
131
 
138
132
  @GoogleBaseHook.fallback_to_default_project_id
139
133
  def upload_function_zip(self, location: str, zip_path: str, project_id: str) -> str:
140
- """
141
- Uploads zip file with sources.
134
+ """Upload ZIP file with sources.
142
135
 
143
136
  :param location: The location where the function is created.
144
137
  :param zip_path: The path of the valid .zip file to upload.
145
- :param project_id: Optional, Google Cloud Project project_id where the function belongs.
146
- If set to None or missing, the default project_id from the Google Cloud connection is used.
138
+ :param project_id: Google Cloud Project ID where the function belongs.
139
+ If set to None or missing, the default project ID from the Google
140
+ Cloud connection is used.
147
141
  :return: The upload URL that was returned by generateUploadUrl method.
148
142
  """
149
- # fmt: off
150
-
151
- response = \
152
- self.get_conn().projects().locations().functions().generateUploadUrl(
153
- parent=self._full_location(project_id, location)
154
- ).execute(num_retries=self.num_retries)
155
- # fmt: on
143
+ operation = (
144
+ self.get_conn()
145
+ .projects()
146
+ .locations()
147
+ .functions()
148
+ .generateUploadUrl(parent=self._full_location(project_id, location))
149
+ )
150
+ response = operation.execute(num_retries=self.num_retries)
156
151
 
157
152
  upload_url = response.get("uploadUrl")
158
153
  with open(zip_path, "rb") as file:
@@ -161,7 +156,6 @@ class CloudFunctionsHook(GoogleBaseHook):
161
156
  data=file,
162
157
  # Those two headers needs to be specified according to:
163
158
  # https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions/generateUploadUrl
164
- # nopep8
165
159
  headers={
166
160
  "Content-type": "application/zip",
167
161
  "x-goog-content-length-range": "0,104857600",
@@ -170,16 +164,12 @@ class CloudFunctionsHook(GoogleBaseHook):
170
164
  return upload_url
171
165
 
172
166
  def delete_function(self, name: str) -> None:
173
- """
174
- Deletes the specified Cloud Function.
167
+ """Delete the specified Cloud Function.
175
168
 
176
169
  :param name: The name of the function.
177
- :return: None
178
170
  """
179
- # fmt: off
180
- response = self.get_conn().projects().locations().functions().delete(
181
- name=name).execute(num_retries=self.num_retries)
182
- # fmt: on
171
+ operation = self.get_conn().projects().locations().functions().delete(name=name)
172
+ response = operation.execute(num_retries=self.num_retries)
183
173
  operation_name = response["name"]
184
174
  self._wait_for_operation_to_complete(operation_name=operation_name)
185
175
 
@@ -191,32 +181,29 @@ class CloudFunctionsHook(GoogleBaseHook):
191
181
  location: str,
192
182
  project_id: str = PROVIDE_PROJECT_ID,
193
183
  ) -> dict:
194
- """
195
- Synchronously invokes a deployed Cloud Function. To be used for testing
196
- purposes as very limited traffic is allowed.
184
+ """Invoke a deployed Cloud Function.
185
+
186
+ This is done synchronously and should only be used for testing purposes,
187
+ as very limited traffic is allowed.
197
188
 
198
189
  :param function_id: ID of the function to be called
199
190
  :param input_data: Input to be passed to the function
200
191
  :param location: The location where the function is located.
201
- :param project_id: Optional, Google Cloud Project project_id where the function belongs.
202
- If set to None or missing, the default project_id from the Google Cloud connection is used.
203
- :return: None
192
+ :param project_id: Google Cloud Project ID where the function belongs.
193
+ If set to None or missing, the default project ID from the Google
194
+ Cloud connection is used.
204
195
  """
205
196
  name = f"projects/{project_id}/locations/{location}/functions/{function_id}"
206
- # fmt: off
207
- response = self.get_conn().projects().locations().functions().call(
208
- name=name,
209
- body=input_data
210
- ).execute(num_retries=self.num_retries)
211
- # fmt: on
197
+ operation = self.get_conn().projects().locations().functions().call(name=name, body=input_data)
198
+ response = operation.execute(num_retries=self.num_retries)
212
199
  if "error" in response:
213
200
  raise AirflowException(response["error"])
214
201
  return response
215
202
 
216
203
  def _wait_for_operation_to_complete(self, operation_name: str) -> dict:
217
- """
218
- Waits for the named operation to complete - checks status of the
219
- asynchronous call.
204
+ """Wait for the named operation to complete.
205
+
206
+ This is used to check the status of an asynchronous call.
220
207
 
221
208
  :param operation_name: The name of the operation.
222
209
  :return: The response returned by the operation.
@@ -224,11 +211,8 @@ class CloudFunctionsHook(GoogleBaseHook):
224
211
  """
225
212
  service = self.get_conn()
226
213
  while True:
227
- # fmt: off
228
- operation_response = service.operations().get(
229
- name=operation_name,
230
- ).execute(num_retries=self.num_retries)
231
- # fmt: on
214
+ operation = service.operations().get(name=operation_name)
215
+ operation_response = operation.execute(num_retries=self.num_retries)
232
216
  if operation_response.get("done"):
233
217
  response = operation_response.get("response")
234
218
  error = operation_response.get("error")
@@ -24,6 +24,7 @@ import json
24
24
  import os
25
25
  import shutil
26
26
  import time
27
+ import warnings
27
28
  from contextlib import contextmanager
28
29
  from datetime import datetime
29
30
  from functools import partial
@@ -44,7 +45,7 @@ from google.cloud.exceptions import GoogleCloudError
44
45
  from google.cloud.storage.retry import DEFAULT_RETRY
45
46
  from requests import Session
46
47
 
47
- from airflow.exceptions import AirflowException
48
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
48
49
  from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
49
50
  from airflow.providers.google.common.consts import CLIENT_INFO
50
51
  from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHook, GoogleBaseHook
@@ -709,6 +710,7 @@ class GCSHook(GoogleBaseHook):
709
710
  max_results: int | None = None,
710
711
  prefix: str | List[str] | None = None,
711
712
  delimiter: str | None = None,
713
+ match_glob: str | None = None,
712
714
  ):
713
715
  """
714
716
  List all objects from the bucket with the given a single prefix or multiple prefixes.
@@ -717,9 +719,19 @@ class GCSHook(GoogleBaseHook):
717
719
  :param versions: if true, list all versions of the objects
718
720
  :param max_results: max count of items to return in a single page of responses
719
721
  :param prefix: string or list of strings which filter objects whose name begin with it/them
720
- :param delimiter: filters objects based on the delimiter (for e.g '.csv')
722
+ :param delimiter: (Deprecated) filters objects based on the delimiter (for e.g '.csv')
723
+ :param match_glob: (Optional) filters objects based on the glob pattern given by the string
724
+ (e.g, ``'**/*/.json'``).
721
725
  :return: a stream of object names matching the filtering criteria
722
726
  """
727
+ if delimiter and delimiter != "/":
728
+ warnings.warn(
729
+ "Usage of 'delimiter' param is deprecated, please use 'match_glob' instead",
730
+ AirflowProviderDeprecationWarning,
731
+ stacklevel=2,
732
+ )
733
+ if match_glob and delimiter and delimiter != "/":
734
+ raise AirflowException("'match_glob' param cannot be used with 'delimiter' that differs than '/'")
723
735
  objects = []
724
736
  if isinstance(prefix, list):
725
737
  for prefix_item in prefix:
@@ -730,6 +742,7 @@ class GCSHook(GoogleBaseHook):
730
742
  max_results=max_results,
731
743
  prefix=prefix_item,
732
744
  delimiter=delimiter,
745
+ match_glob=match_glob,
733
746
  )
734
747
  )
735
748
  else:
@@ -740,6 +753,7 @@ class GCSHook(GoogleBaseHook):
740
753
  max_results=max_results,
741
754
  prefix=prefix,
742
755
  delimiter=delimiter,
756
+ match_glob=match_glob,
743
757
  )
744
758
  )
745
759
  return objects
@@ -751,6 +765,7 @@ class GCSHook(GoogleBaseHook):
751
765
  max_results: int | None = None,
752
766
  prefix: str | None = None,
753
767
  delimiter: str | None = None,
768
+ match_glob: str | None = None,
754
769
  ) -> List:
755
770
  """
756
771
  List all objects from the bucket with the give string prefix in name.
@@ -759,7 +774,9 @@ class GCSHook(GoogleBaseHook):
759
774
  :param versions: if true, list all versions of the objects
760
775
  :param max_results: max count of items to return in a single page of responses
761
776
  :param prefix: string which filters objects whose name begin with it
762
- :param delimiter: filters objects based on the delimiter (for e.g '.csv')
777
+ :param delimiter: (Deprecated) filters objects based on the delimiter (for e.g '.csv')
778
+ :param match_glob: (Optional) filters objects based on the glob pattern given by the string
779
+ (e.g, ``'**/*/.json'``).
763
780
  :return: a stream of object names matching the filtering criteria
764
781
  """
765
782
  client = self.get_conn()
@@ -768,13 +785,25 @@ class GCSHook(GoogleBaseHook):
768
785
  ids = []
769
786
  page_token = None
770
787
  while True:
771
- blobs = bucket.list_blobs(
772
- max_results=max_results,
773
- page_token=page_token,
774
- prefix=prefix,
775
- delimiter=delimiter,
776
- versions=versions,
777
- )
788
+ if match_glob:
789
+ blobs = self._list_blobs_with_match_glob(
790
+ bucket=bucket,
791
+ client=client,
792
+ match_glob=match_glob,
793
+ max_results=max_results,
794
+ page_token=page_token,
795
+ path=bucket.path + "/o",
796
+ prefix=prefix,
797
+ versions=versions,
798
+ )
799
+ else:
800
+ blobs = bucket.list_blobs(
801
+ max_results=max_results,
802
+ page_token=page_token,
803
+ prefix=prefix,
804
+ delimiter=delimiter,
805
+ versions=versions,
806
+ )
778
807
 
779
808
  blob_names = []
780
809
  for blob in blobs:
@@ -792,6 +821,52 @@ class GCSHook(GoogleBaseHook):
792
821
  break
793
822
  return ids
794
823
 
824
+ @staticmethod
825
+ def _list_blobs_with_match_glob(
826
+ bucket,
827
+ client,
828
+ path: str,
829
+ max_results: int | None = None,
830
+ page_token: str | None = None,
831
+ match_glob: str | None = None,
832
+ prefix: str | None = None,
833
+ versions: bool | None = None,
834
+ ) -> Any:
835
+ """
836
+ List blobs when match_glob param is given.
837
+ This method is a patched version of google.cloud.storage Client.list_blobs().
838
+ It is used as a temporary workaround to support "match_glob" param,
839
+ as it isn't officially supported by GCS Python client.
840
+ (follow `issue #1035<https://github.com/googleapis/python-storage/issues/1035>`__).
841
+ """
842
+ from google.api_core import page_iterator
843
+ from google.cloud.storage.bucket import _blobs_page_start, _item_to_blob
844
+
845
+ extra_params: Any = {}
846
+ if prefix is not None:
847
+ extra_params["prefix"] = prefix
848
+ if match_glob is not None:
849
+ extra_params["matchGlob"] = match_glob
850
+ if versions is not None:
851
+ extra_params["versions"] = versions
852
+ api_request = functools.partial(
853
+ client._connection.api_request, timeout=DEFAULT_TIMEOUT, retry=DEFAULT_RETRY
854
+ )
855
+
856
+ blobs: Any = page_iterator.HTTPIterator(
857
+ client=client,
858
+ api_request=api_request,
859
+ path=path,
860
+ item_to_value=_item_to_blob,
861
+ page_token=page_token,
862
+ max_results=max_results,
863
+ extra_params=extra_params,
864
+ page_start=_blobs_page_start,
865
+ )
866
+ blobs.prefixes = set()
867
+ blobs.bucket = bucket
868
+ return blobs
869
+
795
870
  def list_by_timespan(
796
871
  self,
797
872
  bucket_name: str,
@@ -801,6 +876,7 @@ class GCSHook(GoogleBaseHook):
801
876
  max_results: int | None = None,
802
877
  prefix: str | None = None,
803
878
  delimiter: str | None = None,
879
+ match_glob: str | None = None,
804
880
  ) -> List[str]:
805
881
  """
806
882
  List all objects from the bucket with the give string prefix in name that were
@@ -813,7 +889,9 @@ class GCSHook(GoogleBaseHook):
813
889
  :param max_results: max count of items to return in a single page of responses
814
890
  :param prefix: prefix string which filters objects whose name begin with
815
891
  this prefix
816
- :param delimiter: filters objects based on the delimiter (for e.g '.csv')
892
+ :param delimiter: (Deprecated) filters objects based on the delimiter (for e.g '.csv')
893
+ :param match_glob: (Optional) filters objects based on the glob pattern given by the string
894
+ (e.g, ``'**/*/.json'``).
817
895
  :return: a stream of object names matching the filtering criteria
818
896
  """
819
897
  client = self.get_conn()
@@ -823,13 +901,25 @@ class GCSHook(GoogleBaseHook):
823
901
  page_token = None
824
902
 
825
903
  while True:
826
- blobs = bucket.list_blobs(
827
- max_results=max_results,
828
- page_token=page_token,
829
- prefix=prefix,
830
- delimiter=delimiter,
831
- versions=versions,
832
- )
904
+ if match_glob:
905
+ blobs = self._list_blobs_with_match_glob(
906
+ bucket=bucket,
907
+ client=client,
908
+ match_glob=match_glob,
909
+ max_results=max_results,
910
+ page_token=page_token,
911
+ path=bucket.path + "/o",
912
+ prefix=prefix,
913
+ versions=versions,
914
+ )
915
+ else:
916
+ blobs = bucket.list_blobs(
917
+ max_results=max_results,
918
+ page_token=page_token,
919
+ prefix=prefix,
920
+ delimiter=delimiter,
921
+ versions=versions,
922
+ )
833
923
 
834
924
  blob_names = []
835
925
  for blob in blobs: