apache-airflow-providers-google 16.0.0a1__py3-none-any.whl → 16.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +43 -5
  3. airflow/providers/google/ads/operators/ads.py +1 -1
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +1 -1
  5. airflow/providers/google/cloud/hooks/bigquery.py +63 -77
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +8 -4
  7. airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +2 -2
  9. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  10. airflow/providers/google/cloud/hooks/dataprep.py +4 -1
  11. airflow/providers/google/cloud/hooks/gcs.py +5 -5
  12. airflow/providers/google/cloud/hooks/looker.py +10 -1
  13. airflow/providers/google/cloud/hooks/mlengine.py +2 -1
  14. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  15. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  16. airflow/providers/google/cloud/hooks/translate.py +1 -1
  17. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -36
  18. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
  19. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +44 -80
  20. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +11 -2
  21. airflow/providers/google/cloud/hooks/vision.py +2 -2
  22. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  23. airflow/providers/google/cloud/links/base.py +75 -11
  24. airflow/providers/google/cloud/links/bigquery.py +0 -47
  25. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  26. airflow/providers/google/cloud/links/bigtable.py +0 -48
  27. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  28. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  29. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  30. airflow/providers/google/cloud/links/cloud_run.py +27 -0
  31. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  32. airflow/providers/google/cloud/links/cloud_storage_transfer.py +16 -43
  33. airflow/providers/google/cloud/links/cloud_tasks.py +6 -25
  34. airflow/providers/google/cloud/links/compute.py +0 -58
  35. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  36. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  37. airflow/providers/google/cloud/links/dataflow.py +0 -34
  38. airflow/providers/google/cloud/links/dataform.py +0 -64
  39. airflow/providers/google/cloud/links/datafusion.py +1 -96
  40. airflow/providers/google/cloud/links/dataplex.py +0 -154
  41. airflow/providers/google/cloud/links/dataprep.py +0 -24
  42. airflow/providers/google/cloud/links/dataproc.py +14 -90
  43. airflow/providers/google/cloud/links/datastore.py +0 -31
  44. airflow/providers/google/cloud/links/kubernetes_engine.py +5 -59
  45. airflow/providers/google/cloud/links/life_sciences.py +0 -19
  46. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  47. airflow/providers/google/cloud/links/mlengine.py +0 -70
  48. airflow/providers/google/cloud/links/pubsub.py +0 -32
  49. airflow/providers/google/cloud/links/spanner.py +0 -33
  50. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  51. airflow/providers/google/cloud/links/translate.py +16 -186
  52. airflow/providers/google/cloud/links/vertex_ai.py +8 -224
  53. airflow/providers/google/cloud/links/workflows.py +0 -52
  54. airflow/providers/google/cloud/log/gcs_task_handler.py +4 -4
  55. airflow/providers/google/cloud/operators/alloy_db.py +69 -54
  56. airflow/providers/google/cloud/operators/automl.py +16 -14
  57. airflow/providers/google/cloud/operators/bigquery.py +49 -25
  58. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -4
  59. airflow/providers/google/cloud/operators/bigtable.py +35 -6
  60. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  61. airflow/providers/google/cloud/operators/cloud_build.py +74 -31
  62. airflow/providers/google/cloud/operators/cloud_composer.py +34 -35
  63. airflow/providers/google/cloud/operators/cloud_memorystore.py +68 -42
  64. airflow/providers/google/cloud/operators/cloud_run.py +9 -1
  65. airflow/providers/google/cloud/operators/cloud_sql.py +11 -15
  66. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +0 -2
  67. airflow/providers/google/cloud/operators/compute.py +7 -39
  68. airflow/providers/google/cloud/operators/datacatalog.py +156 -20
  69. airflow/providers/google/cloud/operators/dataflow.py +37 -14
  70. airflow/providers/google/cloud/operators/dataform.py +14 -4
  71. airflow/providers/google/cloud/operators/datafusion.py +4 -12
  72. airflow/providers/google/cloud/operators/dataplex.py +180 -96
  73. airflow/providers/google/cloud/operators/dataprep.py +0 -4
  74. airflow/providers/google/cloud/operators/dataproc.py +10 -16
  75. airflow/providers/google/cloud/operators/dataproc_metastore.py +95 -87
  76. airflow/providers/google/cloud/operators/datastore.py +21 -5
  77. airflow/providers/google/cloud/operators/dlp.py +3 -26
  78. airflow/providers/google/cloud/operators/functions.py +15 -6
  79. airflow/providers/google/cloud/operators/gcs.py +1 -7
  80. airflow/providers/google/cloud/operators/kubernetes_engine.py +53 -92
  81. airflow/providers/google/cloud/operators/life_sciences.py +0 -1
  82. airflow/providers/google/cloud/operators/managed_kafka.py +106 -51
  83. airflow/providers/google/cloud/operators/mlengine.py +0 -1
  84. airflow/providers/google/cloud/operators/pubsub.py +4 -5
  85. airflow/providers/google/cloud/operators/spanner.py +0 -4
  86. airflow/providers/google/cloud/operators/speech_to_text.py +0 -1
  87. airflow/providers/google/cloud/operators/stackdriver.py +0 -8
  88. airflow/providers/google/cloud/operators/tasks.py +0 -11
  89. airflow/providers/google/cloud/operators/text_to_speech.py +0 -1
  90. airflow/providers/google/cloud/operators/translate.py +37 -13
  91. airflow/providers/google/cloud/operators/translate_speech.py +0 -1
  92. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +31 -18
  93. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +28 -8
  94. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +38 -25
  95. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +69 -7
  96. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +42 -8
  97. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +531 -0
  98. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +93 -117
  99. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +10 -8
  100. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +56 -10
  101. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +29 -6
  102. airflow/providers/google/cloud/operators/vertex_ai/ray.py +9 -6
  103. airflow/providers/google/cloud/operators/workflows.py +1 -9
  104. airflow/providers/google/cloud/sensors/bigquery.py +1 -1
  105. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -1
  106. airflow/providers/google/cloud/sensors/bigtable.py +15 -3
  107. airflow/providers/google/cloud/sensors/cloud_composer.py +6 -1
  108. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -1
  109. airflow/providers/google/cloud/sensors/dataflow.py +3 -3
  110. airflow/providers/google/cloud/sensors/dataform.py +6 -1
  111. airflow/providers/google/cloud/sensors/datafusion.py +6 -1
  112. airflow/providers/google/cloud/sensors/dataplex.py +6 -1
  113. airflow/providers/google/cloud/sensors/dataprep.py +6 -1
  114. airflow/providers/google/cloud/sensors/dataproc.py +6 -1
  115. airflow/providers/google/cloud/sensors/dataproc_metastore.py +6 -1
  116. airflow/providers/google/cloud/sensors/gcs.py +9 -3
  117. airflow/providers/google/cloud/sensors/looker.py +6 -1
  118. airflow/providers/google/cloud/sensors/pubsub.py +8 -3
  119. airflow/providers/google/cloud/sensors/tasks.py +6 -1
  120. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +6 -1
  121. airflow/providers/google/cloud/sensors/workflows.py +6 -1
  122. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +1 -1
  123. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +1 -1
  124. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +10 -7
  125. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +1 -2
  126. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +0 -1
  127. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -1
  128. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  129. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +1 -1
  130. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -2
  131. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -2
  132. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  133. airflow/providers/google/cloud/transfers/gcs_to_local.py +1 -1
  134. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +1 -1
  135. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +5 -1
  136. airflow/providers/google/cloud/transfers/gdrive_to_local.py +1 -1
  137. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  138. airflow/providers/google/cloud/transfers/local_to_gcs.py +1 -1
  139. airflow/providers/google/cloud/transfers/s3_to_gcs.py +11 -5
  140. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +1 -1
  141. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +1 -1
  142. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -2
  143. airflow/providers/google/cloud/transfers/sql_to_gcs.py +1 -1
  144. airflow/providers/google/cloud/triggers/bigquery.py +32 -5
  145. airflow/providers/google/cloud/triggers/dataproc.py +62 -10
  146. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  147. airflow/providers/google/common/auth_backend/google_openid.py +2 -1
  148. airflow/providers/google/common/deprecated.py +2 -1
  149. airflow/providers/google/common/hooks/base_google.py +7 -3
  150. airflow/providers/google/common/links/storage.py +0 -22
  151. airflow/providers/google/firebase/operators/firestore.py +1 -1
  152. airflow/providers/google/get_provider_info.py +14 -16
  153. airflow/providers/google/leveldb/hooks/leveldb.py +30 -1
  154. airflow/providers/google/leveldb/operators/leveldb.py +1 -1
  155. airflow/providers/google/marketing_platform/links/analytics_admin.py +3 -6
  156. airflow/providers/google/marketing_platform/operators/analytics_admin.py +0 -1
  157. airflow/providers/google/marketing_platform/operators/campaign_manager.py +4 -4
  158. airflow/providers/google/marketing_platform/operators/display_video.py +6 -6
  159. airflow/providers/google/marketing_platform/operators/search_ads.py +1 -1
  160. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +6 -1
  161. airflow/providers/google/marketing_platform/sensors/display_video.py +6 -1
  162. airflow/providers/google/suite/operators/sheets.py +3 -3
  163. airflow/providers/google/suite/sensors/drive.py +6 -1
  164. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +1 -1
  165. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  166. airflow/providers/google/suite/transfers/local_to_drive.py +1 -1
  167. airflow/providers/google/version_compat.py +28 -0
  168. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.1.0.dist-info}/METADATA +35 -35
  169. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.1.0.dist-info}/RECORD +171 -170
  170. airflow/providers/google/cloud/links/automl.py +0 -193
  171. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.1.0.dist-info}/WHEEL +0 -0
  172. {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.1.0.dist-info}/entry_points.txt +0 -0
@@ -19,8 +19,8 @@ from __future__ import annotations
19
19
  from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
- from airflow.models import BaseOperator
23
22
  from airflow.providers.google.suite.hooks.drive import GoogleDriveHook
23
+ from airflow.providers.google.version_compat import BaseOperator
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from airflow.utils.context import Context
@@ -0,0 +1,193 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ """This module contains operator to move data from HTTP endpoint to GCS."""
19
+
20
+ from __future__ import annotations
21
+
22
+ from functools import cached_property
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
26
+ from airflow.providers.google.version_compat import BaseOperator
27
+ from airflow.providers.http.hooks.http import HttpHook
28
+
29
+ if TYPE_CHECKING:
30
+ from collections.abc import Sequence
31
+
32
+ from requests.auth import AuthBase
33
+
34
+ from airflow.utils.context import Context
35
+
36
+
37
+ class HttpToGCSOperator(BaseOperator):
38
+ """
39
+ Calls an endpoint on an HTTP system to execute an action and store the result in GCS.
40
+
41
+ :param http_conn_id: The :ref:`http connection<howto/connection:http>` to run
42
+ the operator against
43
+ :param endpoint: The relative part of the full url. (templated)
44
+ :param method: The HTTP method to use, default = "POST"
45
+ :param data: The data to pass. POST-data in POST/PUT and params
46
+ in the URL for a GET request. (templated)
47
+ :param headers: The HTTP headers to be added to the GET request
48
+ :param response_check: A check against the 'requests' response object.
49
+ The callable takes the response object as the first positional argument
50
+ and optionally any number of keyword arguments available in the context dictionary.
51
+ It should return True for 'pass' and False otherwise.
52
+ :param response_filter: A function allowing you to manipulate the response
53
+ text. e.g response_filter=lambda response: json.loads(response.text).
54
+ The callable takes the response object as the first positional argument
55
+ and optionally any number of keyword arguments available in the context dictionary.
56
+ :param extra_options: Extra options for the 'requests' library, see the
57
+ 'requests' documentation (options to modify timeout, ssl, etc.)
58
+ :param log_response: Log the response (default: False)
59
+ :param auth_type: The auth type for the service
60
+ :param tcp_keep_alive: Enable TCP Keep Alive for the connection.
61
+ :param tcp_keep_alive_idle: The TCP Keep Alive Idle parameter (corresponds to ``socket.TCP_KEEPIDLE``).
62
+ :param tcp_keep_alive_count: The TCP Keep Alive count parameter (corresponds to ``socket.TCP_KEEPCNT``)
63
+ :param tcp_keep_alive_interval: The TCP Keep Alive interval parameter (corresponds to
64
+ ``socket.TCP_KEEPINTVL``)
65
+ :param gcp_conn_id: The connection ID to use when fetching connection info.
66
+ :param impersonation_chain: Optional service account to impersonate using short-term credentials,
67
+ or chained list of accounts required to get the access_token of the last account in the list,
68
+ which will be impersonated in the request. If set as a string,
69
+ the account must grant the originating account the Service Account Token Creator IAM role.
70
+ If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity,
71
+ with first account from the list granting this role to the originating account.
72
+ :param bucket_name: The bucket to upload to.
73
+ :param object_name: The object name to set when uploading the file.
74
+ :param mime_type: The file mime type set when uploading the file.
75
+ :param gzip: Option to compress local file or file data for upload
76
+ :param encoding: bytes encoding for file data if provided as string
77
+ :param chunk_size: Blob chunk size.
78
+ :param timeout: Request timeout in seconds.
79
+ :param num_max_attempts: Number of attempts to try to upload the file.
80
+ :param metadata: The metadata to be uploaded with the file.
81
+ :param cache_contro: Cache-Control metadata field.
82
+ :param user_project: The identifier of the Google Cloud project to bill for the request. Required for Requester Pays buckets.
83
+ """
84
+
85
+ template_fields: Sequence[str] = (
86
+ "http_conn_id",
87
+ "endpoint",
88
+ "data",
89
+ "headers",
90
+ "gcp_conn_id",
91
+ "bucket_name",
92
+ "object_name",
93
+ )
94
+ template_fields_renderers = {"headers": "json", "data": "py"}
95
+ template_ext: Sequence[str] = ()
96
+ ui_color = "#f4a460"
97
+
98
+ def __init__(
99
+ self,
100
+ *,
101
+ endpoint: str | None = None,
102
+ method: str = "GET",
103
+ data: Any = None,
104
+ headers: dict[str, str] | None = None,
105
+ extra_options: dict[str, Any] | None = None,
106
+ http_conn_id: str = "http_default",
107
+ log_response: bool = False,
108
+ auth_type: type[AuthBase] | None = None,
109
+ tcp_keep_alive: bool = True,
110
+ tcp_keep_alive_idle: int = 120,
111
+ tcp_keep_alive_count: int = 20,
112
+ tcp_keep_alive_interval: int = 30,
113
+ gcp_conn_id: str = "google_cloud_default",
114
+ impersonation_chain: str | Sequence[str] | None = None,
115
+ bucket_name: str,
116
+ object_name: str,
117
+ mime_type: str | None = None,
118
+ gzip: bool = False,
119
+ encoding: str | None = None,
120
+ chunk_size: int | None = None,
121
+ timeout: int | None = None,
122
+ num_max_attempts: int = 3,
123
+ metadata: dict | None = None,
124
+ cache_control: str | None = None,
125
+ user_project: str | None = None,
126
+ **kwargs,
127
+ ):
128
+ super().__init__(**kwargs)
129
+ self.http_conn_id = http_conn_id
130
+ self.method = method
131
+ self.endpoint = endpoint
132
+ self.headers = headers or {}
133
+ self.data = data or {}
134
+ self.extra_options = extra_options or {}
135
+ self.log_response = log_response
136
+ self.auth_type = auth_type
137
+ self.tcp_keep_alive = tcp_keep_alive
138
+ self.tcp_keep_alive_idle = tcp_keep_alive_idle
139
+ self.tcp_keep_alive_count = tcp_keep_alive_count
140
+ self.tcp_keep_alive_interval = tcp_keep_alive_interval
141
+ self.gcp_conn_id = gcp_conn_id
142
+ self.impersonation_chain = impersonation_chain
143
+ self.bucket_name = bucket_name
144
+ self.object_name = object_name
145
+ self.mime_type = mime_type
146
+ self.gzip = gzip
147
+ self.encoding = encoding
148
+ self.chunk_size = chunk_size
149
+ self.timeout = timeout
150
+ self.num_max_attempts = num_max_attempts
151
+ self.metadata = metadata
152
+ self.cache_control = cache_control
153
+ self.user_project = user_project
154
+
155
+ @cached_property
156
+ def http_hook(self) -> HttpHook:
157
+ """Create and return an HttpHook."""
158
+ return HttpHook(
159
+ self.method,
160
+ http_conn_id=self.http_conn_id,
161
+ auth_type=self.auth_type,
162
+ tcp_keep_alive=self.tcp_keep_alive,
163
+ tcp_keep_alive_idle=self.tcp_keep_alive_idle,
164
+ tcp_keep_alive_count=self.tcp_keep_alive_count,
165
+ tcp_keep_alive_interval=self.tcp_keep_alive_interval,
166
+ )
167
+
168
+ @cached_property
169
+ def gcs_hook(self) -> GCSHook:
170
+ """Create and return an GCSHook."""
171
+ return GCSHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
172
+
173
+ def execute(self, context: Context):
174
+ self.log.info("Calling HTTP method")
175
+ response = self.http_hook.run(
176
+ endpoint=self.endpoint, data=self.data, headers=self.headers, extra_options=self.extra_options
177
+ )
178
+
179
+ self.log.info("Uploading to GCS")
180
+ self.gcs_hook.upload(
181
+ data=response.content,
182
+ bucket_name=self.bucket_name,
183
+ object_name=self.object_name,
184
+ mime_type=self.mime_type,
185
+ gzip=self.gzip,
186
+ encoding=self.encoding or response.encoding,
187
+ chunk_size=self.chunk_size,
188
+ timeout=self.timeout,
189
+ num_max_attempts=self.num_max_attempts,
190
+ metadata=self.metadata,
191
+ cache_control=self.cache_control,
192
+ user_project=self.user_project,
193
+ )
@@ -24,8 +24,8 @@ from collections.abc import Sequence
24
24
  from glob import glob
25
25
  from typing import TYPE_CHECKING
26
26
 
27
- from airflow.models import BaseOperator
28
27
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
28
+ from airflow.providers.google.version_compat import BaseOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from airflow.utils.context import Context
@@ -181,21 +181,27 @@ class S3ToGCSOperator(S3ListOperator):
181
181
  'The destination Google Cloud Storage path must end with a slash "/" or be empty.'
182
182
  )
183
183
 
184
- def execute(self, context: Context):
185
- self._check_inputs()
184
+ def _get_files(self, context: Context, gcs_hook: GCSHook) -> list[str]:
186
185
  # use the super method to list all the files in an S3 bucket/key
187
186
  s3_objects = super().execute(context)
188
187
 
188
+ if not self.replace:
189
+ s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
190
+
191
+ return s3_objects
192
+
193
+ def execute(self, context: Context):
194
+ self._check_inputs()
189
195
  gcs_hook = GCSHook(
190
196
  gcp_conn_id=self.gcp_conn_id,
191
197
  impersonation_chain=self.google_impersonation_chain,
192
198
  )
193
- if not self.replace:
194
- s3_objects = self.exclude_existing_objects(s3_objects=s3_objects, gcs_hook=gcs_hook)
195
-
196
199
  s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
200
+
201
+ s3_objects = self._get_files(context, gcs_hook)
197
202
  if not s3_objects:
198
203
  self.log.info("In sync, no files needed to be uploaded to Google Cloud Storage")
204
+
199
205
  elif self.deferrable:
200
206
  self.transfer_files_async(s3_objects, gcs_hook, s3_hook)
201
207
  else:
@@ -21,8 +21,8 @@ import tempfile
21
21
  from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
25
+ from airflow.providers.google.version_compat import BaseOperator
26
26
  from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -26,8 +26,8 @@ from tempfile import NamedTemporaryFile
26
26
  from typing import TYPE_CHECKING
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
30
+ from airflow.providers.google.version_compat import BaseOperator
31
31
  from airflow.providers.sftp.hooks.sftp import SFTPHook
32
32
 
33
33
  if TYPE_CHECKING:
@@ -21,9 +21,9 @@ from collections.abc import Sequence
21
21
  from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING, Any
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
26
25
  from airflow.providers.google.suite.hooks.sheets import GSheetsHook
26
+ from airflow.providers.google.version_compat import BaseOperator
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from airflow.utils.context import Context
@@ -130,5 +130,5 @@ class GoogleSheetsToGCSOperator(BaseOperator):
130
130
  gcs_path_to_file = self._upload_data(gcs_hook, sheet_hook, sheet_range, data)
131
131
  destination_array.append(gcs_path_to_file)
132
132
 
133
- self.xcom_push(context, "destination_objects", destination_array)
133
+ context["ti"].xcom_push(key="destination_objects", value=destination_array)
134
134
  return destination_array
@@ -30,8 +30,8 @@ from typing import TYPE_CHECKING, Any
30
30
  import pyarrow as pa
31
31
  import pyarrow.parquet as pq
32
32
 
33
- from airflow.models import BaseOperator
34
33
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
34
+ from airflow.providers.google.version_compat import BaseOperator
35
35
 
36
36
  if TYPE_CHECKING:
37
37
  from airflow.providers.common.compat.openlineage.facet import OutputDataset
@@ -22,10 +22,12 @@ from typing import TYPE_CHECKING, Any, SupportsAbs
22
22
 
23
23
  from aiohttp import ClientSession
24
24
  from aiohttp.client_exceptions import ClientResponseError
25
+ from asgiref.sync import sync_to_async
25
26
 
26
27
  from airflow.exceptions import AirflowException
27
28
  from airflow.models.taskinstance import TaskInstance
28
29
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryAsyncHook, BigQueryTableAsyncHook
30
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
29
31
  from airflow.triggers.base import BaseTrigger, TriggerEvent
30
32
  from airflow.utils.session import provide_session
31
33
  from airflow.utils.state import TaskInstanceState
@@ -116,16 +118,41 @@ class BigQueryInsertJobTrigger(BaseTrigger):
116
118
  )
117
119
  return task_instance
118
120
 
119
- def safe_to_cancel(self) -> bool:
121
+ async def get_task_state(self):
122
+ from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
123
+
124
+ task_states_response = await sync_to_async(RuntimeTaskInstance.get_task_states)(
125
+ dag_id=self.task_instance.dag_id,
126
+ task_ids=[self.task_instance.task_id],
127
+ run_ids=[self.task_instance.run_id],
128
+ map_index=self.task_instance.map_index,
129
+ )
130
+ try:
131
+ task_state = task_states_response[self.task_instance.run_id][self.task_instance.task_id]
132
+ except Exception:
133
+ raise AirflowException(
134
+ "TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
135
+ self.task_instance.dag_id,
136
+ self.task_instance.task_id,
137
+ self.task_instance.run_id,
138
+ self.task_instance.map_index,
139
+ )
140
+ return task_state
141
+
142
+ async def safe_to_cancel(self) -> bool:
120
143
  """
121
144
  Whether it is safe to cancel the external job which is being executed by this trigger.
122
145
 
123
146
  This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
124
147
  Because in those cases, we should NOT cancel the external job.
125
148
  """
126
- # Database query is needed to get the latest state of the task instance.
127
- task_instance = self.get_task_instance() # type: ignore[call-arg]
128
- return task_instance.state != TaskInstanceState.DEFERRED
149
+ if AIRFLOW_V_3_0_PLUS:
150
+ task_state = await self.get_task_state()
151
+ else:
152
+ # Database query is needed to get the latest state of the task instance.
153
+ task_instance = self.get_task_instance() # type: ignore[call-arg]
154
+ task_state = task_instance.state
155
+ return task_state != TaskInstanceState.DEFERRED
129
156
 
130
157
  async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
131
158
  """Get current job execution status and yields a TriggerEvent."""
@@ -155,7 +182,7 @@ class BigQueryInsertJobTrigger(BaseTrigger):
155
182
  )
156
183
  await asyncio.sleep(self.poll_interval)
157
184
  except asyncio.CancelledError:
158
- if self.job_id and self.cancel_on_kill and self.safe_to_cancel():
185
+ if self.job_id and self.cancel_on_kill and await self.safe_to_cancel():
159
186
  self.log.info(
160
187
  "The job is safe to cancel the as airflow TaskInstance is not in deferred state."
161
188
  )
@@ -25,6 +25,7 @@ import time
25
25
  from collections.abc import AsyncIterator, Sequence
26
26
  from typing import TYPE_CHECKING, Any
27
27
 
28
+ from asgiref.sync import sync_to_async
28
29
  from google.api_core.exceptions import NotFound
29
30
  from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
30
31
 
@@ -33,6 +34,7 @@ from airflow.models.taskinstance import TaskInstance
33
34
  from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook, DataprocHook
34
35
  from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
35
36
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
37
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
36
38
  from airflow.triggers.base import BaseTrigger, TriggerEvent
37
39
  from airflow.utils.session import provide_session
38
40
  from airflow.utils.state import TaskInstanceState
@@ -141,16 +143,41 @@ class DataprocSubmitTrigger(DataprocBaseTrigger):
141
143
  )
142
144
  return task_instance
143
145
 
144
- def safe_to_cancel(self) -> bool:
146
+ async def get_task_state(self):
147
+ from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
148
+
149
+ task_states_response = await sync_to_async(RuntimeTaskInstance.get_task_states)(
150
+ dag_id=self.task_instance.dag_id,
151
+ task_ids=[self.task_instance.task_id],
152
+ run_ids=[self.task_instance.run_id],
153
+ map_index=self.task_instance.map_index,
154
+ )
155
+ try:
156
+ task_state = task_states_response[self.task_instance.run_id][self.task_instance.task_id]
157
+ except Exception:
158
+ raise AirflowException(
159
+ "TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
160
+ self.task_instance.dag_id,
161
+ self.task_instance.task_id,
162
+ self.task_instance.run_id,
163
+ self.task_instance.map_index,
164
+ )
165
+ return task_state
166
+
167
+ async def safe_to_cancel(self) -> bool:
145
168
  """
146
169
  Whether it is safe to cancel the external job which is being executed by this trigger.
147
170
 
148
171
  This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
149
172
  Because in those cases, we should NOT cancel the external job.
150
173
  """
151
- # Database query is needed to get the latest state of the task instance.
152
- task_instance = self.get_task_instance() # type: ignore[call-arg]
153
- return task_instance.state != TaskInstanceState.DEFERRED
174
+ if AIRFLOW_V_3_0_PLUS:
175
+ task_state = await self.get_task_state()
176
+ else:
177
+ # Database query is needed to get the latest state of the task instance.
178
+ task_instance = self.get_task_instance() # type: ignore[call-arg]
179
+ task_state = task_instance.state
180
+ return task_state != TaskInstanceState.DEFERRED
154
181
 
155
182
  async def run(self):
156
183
  try:
@@ -167,7 +194,7 @@ class DataprocSubmitTrigger(DataprocBaseTrigger):
167
194
  except asyncio.CancelledError:
168
195
  self.log.info("Task got cancelled.")
169
196
  try:
170
- if self.job_id and self.cancel_on_kill and self.safe_to_cancel():
197
+ if self.job_id and self.cancel_on_kill and await self.safe_to_cancel():
171
198
  self.log.info(
172
199
  "Cancelling the job as it is safe to do so. Note that the airflow TaskInstance is not"
173
200
  " in deferred state."
@@ -243,16 +270,41 @@ class DataprocClusterTrigger(DataprocBaseTrigger):
243
270
  )
244
271
  return task_instance
245
272
 
246
- def safe_to_cancel(self) -> bool:
273
+ async def get_task_state(self):
274
+ from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
275
+
276
+ task_states_response = await sync_to_async(RuntimeTaskInstance.get_task_states)(
277
+ dag_id=self.task_instance.dag_id,
278
+ task_ids=[self.task_instance.task_id],
279
+ run_ids=[self.task_instance.run_id],
280
+ map_index=self.task_instance.map_index,
281
+ )
282
+ try:
283
+ task_state = task_states_response[self.task_instance.run_id][self.task_instance.task_id]
284
+ except Exception:
285
+ raise AirflowException(
286
+ "TaskInstance with dag_id: %s, task_id: %s, run_id: %s and map_index: %s is not found",
287
+ self.task_instance.dag_id,
288
+ self.task_instance.task_id,
289
+ self.task_instance.run_id,
290
+ self.task_instance.map_index,
291
+ )
292
+ return task_state
293
+
294
+ async def safe_to_cancel(self) -> bool:
247
295
  """
248
296
  Whether it is safe to cancel the external job which is being executed by this trigger.
249
297
 
250
298
  This is to avoid the case that `asyncio.CancelledError` is called because the trigger itself is stopped.
251
299
  Because in those cases, we should NOT cancel the external job.
252
300
  """
253
- # Database query is needed to get the latest state of the task instance.
254
- task_instance = self.get_task_instance() # type: ignore[call-arg]
255
- return task_instance.state != TaskInstanceState.DEFERRED
301
+ if AIRFLOW_V_3_0_PLUS:
302
+ task_state = await self.get_task_state()
303
+ else:
304
+ # Database query is needed to get the latest state of the task instance.
305
+ task_instance = self.get_task_instance() # type: ignore[call-arg]
306
+ task_state = task_instance.state
307
+ return task_state != TaskInstanceState.DEFERRED
256
308
 
257
309
  async def run(self) -> AsyncIterator[TriggerEvent]:
258
310
  try:
@@ -283,7 +335,7 @@ class DataprocClusterTrigger(DataprocBaseTrigger):
283
335
  await asyncio.sleep(self.polling_interval_seconds)
284
336
  except asyncio.CancelledError:
285
337
  try:
286
- if self.delete_on_error and self.safe_to_cancel():
338
+ if self.delete_on_error and await self.safe_to_cancel():
287
339
  self.log.info(
288
340
  "Deleting the cluster as it is safe to delete as the airflow TaskInstance is not in "
289
341
  "deferred state."
@@ -134,8 +134,7 @@ Here are the guidelines that you should follow to make validation forward-compat
134
134
  from __future__ import annotations
135
135
 
136
136
  import re
137
- from collections.abc import Sequence
138
- from typing import Callable
137
+ from collections.abc import Callable, Sequence
139
138
 
140
139
  from airflow.exceptions import AirflowException
141
140
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -20,8 +20,9 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  import logging
23
+ from collections.abc import Callable
23
24
  from functools import wraps
24
- from typing import Callable, TypeVar, cast
25
+ from typing import TypeVar, cast
25
26
 
26
27
  import google
27
28
  import google.auth.transport.requests
@@ -18,8 +18,9 @@ from __future__ import annotations
18
18
 
19
19
  import inspect
20
20
  import re
21
+ from collections.abc import Callable
21
22
  from datetime import date, datetime
22
- from typing import Any, Callable
23
+ from typing import Any
23
24
 
24
25
  from deprecated import deprecated as standard_deprecated
25
26
  from deprecated.classic import ClassicAdapter
@@ -26,10 +26,10 @@ import json
26
26
  import logging
27
27
  import os
28
28
  import tempfile
29
- from collections.abc import Generator, Sequence
29
+ from collections.abc import Callable, Generator, Sequence
30
30
  from contextlib import ExitStack, contextmanager
31
31
  from subprocess import check_output
32
- from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
32
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
33
33
 
34
34
  import google.auth
35
35
  import google.oauth2.service_account
@@ -50,12 +50,16 @@ from requests import Session
50
50
 
51
51
  from airflow import version
52
52
  from airflow.exceptions import AirflowException
53
- from airflow.hooks.base import BaseHook
54
53
  from airflow.providers.google.cloud.utils.credentials_provider import (
55
54
  _get_scopes,
56
55
  _get_target_principal_and_delegates,
57
56
  get_credentials_and_project_id,
58
57
  )
58
+
59
+ try:
60
+ from airflow.sdk import BaseHook
61
+ except ImportError:
62
+ from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
59
63
  from airflow.utils.process_utils import patch_environ
60
64
 
61
65
  if TYPE_CHECKING:
@@ -18,18 +18,12 @@
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from typing import TYPE_CHECKING
22
-
23
21
  from airflow.providers.google.cloud.links.base import BaseGoogleLink
24
22
 
25
23
  BASE_LINK = "https://console.cloud.google.com"
26
24
  GCS_STORAGE_LINK = BASE_LINK + "/storage/browser/{uri};tab=objects?project={project_id}"
27
25
  GCS_FILE_DETAILS_LINK = BASE_LINK + "/storage/browser/_details/{uri};tab=live_object?project={project_id}"
28
26
 
29
- if TYPE_CHECKING:
30
- from airflow.models import BaseOperator
31
- from airflow.utils.context import Context
32
-
33
27
 
34
28
  class StorageLink(BaseGoogleLink):
35
29
  """Helper class for constructing GCS Storage link."""
@@ -38,14 +32,6 @@ class StorageLink(BaseGoogleLink):
38
32
  key = "storage_conf"
39
33
  format_str = GCS_STORAGE_LINK
40
34
 
41
- @staticmethod
42
- def persist(context: Context, task_instance, uri: str, project_id: str | None):
43
- task_instance.xcom_push(
44
- context=context,
45
- key=StorageLink.key,
46
- value={"uri": uri, "project_id": project_id},
47
- )
48
-
49
35
 
50
36
  class FileDetailsLink(BaseGoogleLink):
51
37
  """Helper class for constructing GCS file details link."""
@@ -53,11 +39,3 @@ class FileDetailsLink(BaseGoogleLink):
53
39
  name = "GCS File Details"
54
40
  key = "file_details"
55
41
  format_str = GCS_FILE_DETAILS_LINK
56
-
57
- @staticmethod
58
- def persist(context: Context, task_instance: BaseOperator, uri: str, project_id: str | None):
59
- task_instance.xcom_push(
60
- context=context,
61
- key=FileDetailsLink.key,
62
- value={"uri": uri, "project_id": project_id},
63
- )
@@ -20,9 +20,9 @@ from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
22
  from airflow.exceptions import AirflowException
23
- from airflow.models import BaseOperator
24
23
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
25
24
  from airflow.providers.google.firebase.hooks.firestore import CloudFirestoreHook
25
+ from airflow.providers.google.version_compat import BaseOperator
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from airflow.utils.context import Context