apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 14.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/METADATA +40 -57
  155. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -24,6 +24,8 @@ import json
24
24
  import time
25
25
  import uuid
26
26
  from decimal import Decimal
27
+ from functools import cached_property
28
+ from typing import TYPE_CHECKING
27
29
 
28
30
  import pendulum
29
31
  from slugify import slugify
@@ -31,6 +33,9 @@ from slugify import slugify
31
33
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
32
34
  from airflow.providers.postgres.hooks.postgres import PostgresHook
33
35
 
36
+ if TYPE_CHECKING:
37
+ from airflow.providers.openlineage.extractors import OperatorLineage
38
+
34
39
 
35
40
  class _PostgresServerSideCursorDecorator:
36
41
  """
@@ -132,10 +137,13 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
132
137
  )
133
138
  return None
134
139
 
140
+ @cached_property
141
+ def db_hook(self) -> PostgresHook:
142
+ return PostgresHook(postgres_conn_id=self.postgres_conn_id)
143
+
135
144
  def query(self):
136
145
  """Query Postgres and returns a cursor to the results."""
137
- hook = PostgresHook(postgres_conn_id=self.postgres_conn_id)
138
- conn = hook.get_conn()
146
+ conn = self.db_hook.get_conn()
139
147
  cursor = conn.cursor(name=self._unique_name())
140
148
  cursor.execute(self.sql, self.parameters)
141
149
  if self.use_server_side_cursor:
@@ -180,3 +188,20 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator):
180
188
  if isinstance(value, Decimal):
181
189
  return float(value)
182
190
  return value
191
+
192
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
193
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
194
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
195
+ from airflow.providers.openlineage.extractors import OperatorLineage
196
+
197
+ sql_parsing_result = get_openlineage_facets_with_sql(
198
+ hook=self.db_hook,
199
+ sql=self.sql,
200
+ conn_id=self.postgres_conn_id,
201
+ database=self.db_hook.database,
202
+ )
203
+ gcs_output_datasets = self._get_openlineage_output_datasets()
204
+ if sql_parsing_result:
205
+ sql_parsing_result.outputs = gcs_output_datasets
206
+ return sql_parsing_result
207
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
@@ -21,6 +21,7 @@ from __future__ import annotations
21
21
 
22
22
  import os
23
23
  from collections.abc import Sequence
24
+ from functools import cached_property
24
25
  from tempfile import NamedTemporaryFile
25
26
  from typing import TYPE_CHECKING
26
27
 
@@ -109,6 +110,10 @@ class SFTPToGCSOperator(BaseOperator):
109
110
  self.impersonation_chain = impersonation_chain
110
111
  self.sftp_prefetch = sftp_prefetch
111
112
 
113
+ @cached_property
114
+ def sftp_hook(self):
115
+ return SFTPHook(self.sftp_conn_id)
116
+
112
117
  def execute(self, context: Context):
113
118
  self.destination_path = self._set_destination_path(self.destination_path)
114
119
  self.destination_bucket = self._set_bucket_name(self.destination_bucket)
@@ -117,8 +122,6 @@ class SFTPToGCSOperator(BaseOperator):
117
122
  impersonation_chain=self.impersonation_chain,
118
123
  )
119
124
 
120
- sftp_hook = SFTPHook(self.sftp_conn_id)
121
-
122
125
  if WILDCARD in self.source_path:
123
126
  total_wildcards = self.source_path.count(WILDCARD)
124
127
  if total_wildcards > 1:
@@ -130,7 +133,7 @@ class SFTPToGCSOperator(BaseOperator):
130
133
  prefix, delimiter = self.source_path.split(WILDCARD, 1)
131
134
  base_path = os.path.dirname(prefix)
132
135
 
133
- files, _, _ = sftp_hook.get_tree_map(base_path, prefix=prefix, delimiter=delimiter)
136
+ files, _, _ = self.sftp_hook.get_tree_map(base_path, prefix=prefix, delimiter=delimiter)
134
137
 
135
138
  for file in files:
136
139
  destination_path = file.replace(base_path, self.destination_path, 1)
@@ -140,13 +143,13 @@ class SFTPToGCSOperator(BaseOperator):
140
143
  # retain the "/" prefix, if it has.
141
144
  if not self.destination_path:
142
145
  destination_path = destination_path.lstrip("/")
143
- self._copy_single_object(gcs_hook, sftp_hook, file, destination_path)
146
+ self._copy_single_object(gcs_hook, self.sftp_hook, file, destination_path)
144
147
 
145
148
  else:
146
149
  destination_object = (
147
150
  self.destination_path if self.destination_path else self.source_path.rsplit("/", 1)[1]
148
151
  )
149
- self._copy_single_object(gcs_hook, sftp_hook, self.source_path, destination_object)
152
+ self._copy_single_object(gcs_hook, self.sftp_hook, self.source_path, destination_object)
150
153
 
151
154
  def _copy_single_object(
152
155
  self,
@@ -188,3 +191,29 @@ class SFTPToGCSOperator(BaseOperator):
188
191
  def _set_bucket_name(name: str) -> str:
189
192
  bucket = name if not name.startswith("gs://") else name[5:]
190
193
  return bucket.strip("/")
194
+
195
+ def get_openlineage_facets_on_start(self):
196
+ from airflow.providers.common.compat.openlineage.facet import Dataset
197
+ from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
198
+ from airflow.providers.openlineage.extractors import OperatorLineage
199
+
200
+ source_name = extract_ds_name_from_gcs_path(self.source_path.split(WILDCARD, 1)[0])
201
+ if self.source_path.startswith("/") and source_name != "/":
202
+ source_name = "/" + source_name
203
+
204
+ if WILDCARD not in self.source_path and not self.destination_path:
205
+ dest_name = self.source_path.rsplit("/", 1)[1]
206
+ else:
207
+ dest_name = extract_ds_name_from_gcs_path(f"{self.destination_path}")
208
+
209
+ return OperatorLineage(
210
+ inputs=[
211
+ Dataset(
212
+ namespace=f"file://{self.sftp_hook.remote_host}:{self.sftp_hook.port}",
213
+ name=source_name,
214
+ )
215
+ ],
216
+ outputs=[
217
+ Dataset(namespace="gs://" + self._set_bucket_name(self.destination_bucket), name=dest_name)
218
+ ],
219
+ )
@@ -34,6 +34,7 @@ from airflow.models import BaseOperator
34
34
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
35
35
 
36
36
  if TYPE_CHECKING:
37
+ from airflow.providers.common.compat.openlineage.facet import OutputDataset
37
38
  from airflow.utils.context import Context
38
39
 
39
40
 
@@ -151,6 +152,7 @@ class BaseSQLToGCSOperator(BaseOperator):
151
152
  self.partition_columns = partition_columns
152
153
  self.write_on_empty = write_on_empty
153
154
  self.parquet_row_group_size = parquet_row_group_size
155
+ self._uploaded_file_names: list[str] = []
154
156
 
155
157
  def execute(self, context: Context):
156
158
  if self.partition_columns:
@@ -501,3 +503,16 @@ class BaseSQLToGCSOperator(BaseOperator):
501
503
  gzip=self.gzip if is_data_file else False,
502
504
  metadata=metadata,
503
505
  )
506
+ self._uploaded_file_names.append(object_name)
507
+
508
+ def _get_openlineage_output_datasets(self) -> list[OutputDataset]:
509
+ """Retrieve OpenLineage output datasets."""
510
+ from airflow.providers.common.compat.openlineage.facet import OutputDataset
511
+ from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
512
+
513
+ return [
514
+ OutputDataset(
515
+ namespace=f"gs://{self.bucket}",
516
+ name=extract_ds_name_from_gcs_path(self.filename.split("{}", maxsplit=1)[0]),
517
+ )
518
+ ]
@@ -17,6 +17,7 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
+ from functools import cached_property
20
21
  from typing import TYPE_CHECKING, Any
21
22
 
22
23
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
@@ -26,6 +27,8 @@ if TYPE_CHECKING:
26
27
  from trino.client import TrinoResult
27
28
  from trino.dbapi import Cursor as TrinoCursor
28
29
 
30
+ from airflow.providers.openlineage.extractors import OperatorLineage
31
+
29
32
 
30
33
  class _TrinoToGCSTrinoCursorAdapter:
31
34
  """
@@ -181,10 +184,13 @@ class TrinoToGCSOperator(BaseSQLToGCSOperator):
181
184
  super().__init__(**kwargs)
182
185
  self.trino_conn_id = trino_conn_id
183
186
 
187
+ @cached_property
188
+ def db_hook(self) -> TrinoHook:
189
+ return TrinoHook(trino_conn_id=self.trino_conn_id)
190
+
184
191
  def query(self):
185
192
  """Query trino and returns a cursor to the results."""
186
- trino = TrinoHook(trino_conn_id=self.trino_conn_id)
187
- conn = trino.get_conn()
193
+ conn = self.db_hook.get_conn()
188
194
  cursor = conn.cursor()
189
195
  self.log.info("Executing: %s", self.sql)
190
196
  cursor.execute(self.sql)
@@ -207,3 +213,20 @@ class TrinoToGCSOperator(BaseSQLToGCSOperator):
207
213
  :param schema_type: BigQuery data type
208
214
  """
209
215
  return value
216
+
217
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
218
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
219
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
220
+ from airflow.providers.openlineage.extractors import OperatorLineage
221
+
222
+ sql_parsing_result = get_openlineage_facets_with_sql(
223
+ hook=self.db_hook,
224
+ sql=self.sql,
225
+ conn_id=self.trino_conn_id,
226
+ database=None,
227
+ )
228
+ gcs_output_datasets = self._get_openlineage_output_datasets()
229
+ if sql_parsing_result:
230
+ sql_parsing_result.outputs = gcs_output_datasets
231
+ return sql_parsing_result
232
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
@@ -21,10 +21,9 @@ import asyncio
21
21
  from collections.abc import AsyncIterator, Sequence
22
22
  from typing import Any
23
23
 
24
- from google.cloud.bigquery_datatransfer_v1 import TransferRun, TransferState
25
-
26
24
  from airflow.providers.google.cloud.hooks.bigquery_dts import AsyncBiqQueryDataTransferServiceHook
27
25
  from airflow.triggers.base import BaseTrigger, TriggerEvent
26
+ from google.cloud.bigquery_datatransfer_v1 import TransferRun, TransferState
28
27
 
29
28
 
30
29
  class BigQueryDataTransferRunTrigger(BaseTrigger):
@@ -20,10 +20,9 @@ import asyncio
20
20
  from collections.abc import AsyncIterator, Sequence
21
21
  from typing import Any
22
22
 
23
- from google.cloud.batch_v1 import Job, JobStatus
24
-
25
23
  from airflow.providers.google.cloud.hooks.cloud_batch import CloudBatchAsyncHook
26
24
  from airflow.triggers.base import BaseTrigger, TriggerEvent
25
+ from google.cloud.batch_v1 import Job, JobStatus
27
26
 
28
27
  DEFAULT_BATCH_LOCATION = "us-central1"
29
28
 
@@ -20,10 +20,9 @@ import asyncio
20
20
  from collections.abc import AsyncIterator, Sequence
21
21
  from typing import Any
22
22
 
23
- from google.cloud.devtools.cloudbuild_v1.types import Build
24
-
25
23
  from airflow.providers.google.cloud.hooks.cloud_build import CloudBuildAsyncHook
26
24
  from airflow.triggers.base import BaseTrigger, TriggerEvent
25
+ from google.cloud.devtools.cloudbuild_v1.types import Build
27
26
 
28
27
 
29
28
  class CloudBuildCreateBuildTrigger(BaseTrigger):
@@ -25,11 +25,11 @@ from datetime import datetime
25
25
  from typing import Any
26
26
 
27
27
  from dateutil import parser
28
- from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
29
28
 
30
29
  from airflow.exceptions import AirflowException
31
30
  from airflow.providers.google.cloud.hooks.cloud_composer import CloudComposerAsyncHook
32
31
  from airflow.triggers.base import BaseTrigger, TriggerEvent
32
+ from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
33
33
 
34
34
 
35
35
  class CloudComposerExecutionTrigger(BaseTrigger):
@@ -169,6 +169,7 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
169
169
  gcp_conn_id: str = "google_cloud_default",
170
170
  impersonation_chain: str | Sequence[str] | None = None,
171
171
  poll_interval: int = 10,
172
+ composer_airflow_version: int = 2,
172
173
  ):
173
174
  super().__init__()
174
175
  self.project_id = project_id
@@ -181,6 +182,7 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
181
182
  self.gcp_conn_id = gcp_conn_id
182
183
  self.impersonation_chain = impersonation_chain
183
184
  self.poll_interval = poll_interval
185
+ self.composer_airflow_version = composer_airflow_version
184
186
 
185
187
  self.gcp_hook = CloudComposerAsyncHook(
186
188
  gcp_conn_id=self.gcp_conn_id,
@@ -201,18 +203,24 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
201
203
  "gcp_conn_id": self.gcp_conn_id,
202
204
  "impersonation_chain": self.impersonation_chain,
203
205
  "poll_interval": self.poll_interval,
206
+ "composer_airflow_version": self.composer_airflow_version,
204
207
  },
205
208
  )
206
209
 
207
210
  async def _pull_dag_runs(self) -> list[dict]:
208
211
  """Pull the list of dag runs."""
212
+ cmd_parameters = (
213
+ ["-d", self.composer_dag_id, "-o", "json"]
214
+ if self.composer_airflow_version < 3
215
+ else [self.composer_dag_id, "-o", "json"]
216
+ )
209
217
  dag_runs_cmd = await self.gcp_hook.execute_airflow_command(
210
218
  project_id=self.project_id,
211
219
  region=self.region,
212
220
  environment_id=self.environment_id,
213
221
  command="dags",
214
222
  subcommand="list-runs",
215
- parameters=["-d", self.composer_dag_id, "-o", "json"],
223
+ parameters=cmd_parameters,
216
224
  )
217
225
  cmd_result = await self.gcp_hook.wait_command_execution_result(
218
226
  project_id=self.project_id,
@@ -232,7 +240,9 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
232
240
  for dag_run in dag_runs:
233
241
  if (
234
242
  start_date.timestamp()
235
- < parser.parse(dag_run["logical_date"]).timestamp()
243
+ < parser.parse(
244
+ dag_run["execution_date" if self.composer_airflow_version < 3 else "logical_date"]
245
+ ).timestamp()
236
246
  < end_date.timestamp()
237
247
  ) and dag_run["state"] not in self.allowed_states:
238
248
  return False
@@ -18,18 +18,18 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  import asyncio
21
- from collections.abc import AsyncIterator, Iterable
21
+ from collections.abc import AsyncIterator, Iterable, Sequence
22
22
  from typing import Any
23
23
 
24
- from google.api_core.exceptions import GoogleAPIError
25
- from google.cloud.storage_transfer_v1.types import TransferOperation
26
-
27
24
  from airflow.exceptions import AirflowException
28
25
  from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
29
26
  CloudDataTransferServiceAsyncHook,
27
+ GcpTransferOperationStatus,
30
28
  )
31
29
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
32
30
  from airflow.triggers.base import BaseTrigger, TriggerEvent
31
+ from google.api_core.exceptions import GoogleAPIError
32
+ from google.cloud.storage_transfer_v1.types import TransferOperation
33
33
 
34
34
 
35
35
  class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
@@ -132,3 +132,101 @@ class CloudStorageTransferServiceCreateJobsTrigger(BaseTrigger):
132
132
  project_id=self.project_id,
133
133
  gcp_conn_id=self.gcp_conn_id,
134
134
  )
135
+
136
+
137
+ class CloudStorageTransferServiceCheckJobStatusTrigger(BaseTrigger):
138
+ """
139
+ CloudStorageTransferServiceCheckJobStatusTrigger run on the trigger worker to check Cloud Storage Transfer job.
140
+
141
+ :param job_name: The name of the transfer job
142
+ :param expected_statuses: The expected state of the operation.
143
+ See:
144
+ https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status
145
+ :param project_id: The ID of the project that owns the Transfer Job.
146
+ :param poke_interval: Polling period in seconds to check for the status
147
+ :param gcp_conn_id: The connection ID used to connect to Google Cloud.
148
+ :param impersonation_chain: Optional service account to impersonate using short-term
149
+ credentials, or chained list of accounts required to get the access_token
150
+ of the last account in the list, which will be impersonated in the request.
151
+ If set as a string, the account must grant the originating account
152
+ the Service Account Token Creator IAM role.
153
+ If set as a sequence, the identities from the list must grant
154
+ Service Account Token Creator IAM role to the directly preceding identity, with first
155
+ account from the list granting this role to the originating account (templated).
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ job_name: str,
161
+ expected_statuses: set[str] | str | None = None,
162
+ project_id: str = PROVIDE_PROJECT_ID,
163
+ poke_interval: float = 10.0,
164
+ gcp_conn_id: str = "google_cloud_default",
165
+ impersonation_chain: str | Sequence[str] | None = None,
166
+ ):
167
+ super().__init__()
168
+ self.job_name = job_name
169
+ self.expected_statuses = expected_statuses
170
+ self.project_id = project_id
171
+ self.poke_interval = poke_interval
172
+ self.gcp_conn_id = gcp_conn_id
173
+ self.impersonation_chain = impersonation_chain
174
+
175
+ def serialize(self) -> tuple[str, dict[str, Any]]:
176
+ """Serialize CloudStorageTransferServiceCheckJobStatusTrigger arguments and classpath."""
177
+ return (
178
+ f"{self.__class__.__module__ }.{self.__class__.__qualname__}",
179
+ {
180
+ "job_name": self.job_name,
181
+ "expected_statuses": self.expected_statuses,
182
+ "project_id": self.project_id,
183
+ "poke_interval": self.poke_interval,
184
+ "gcp_conn_id": self.gcp_conn_id,
185
+ "impersonation_chain": self.impersonation_chain,
186
+ },
187
+ )
188
+
189
+ def _get_async_hook(self) -> CloudDataTransferServiceAsyncHook:
190
+ return CloudDataTransferServiceAsyncHook(
191
+ project_id=self.project_id,
192
+ gcp_conn_id=self.gcp_conn_id,
193
+ impersonation_chain=self.impersonation_chain,
194
+ )
195
+
196
+ async def run(self) -> AsyncIterator[TriggerEvent]:
197
+ """Check the status of the transfer job and yield a TriggerEvent."""
198
+ hook = self._get_async_hook()
199
+ expected_statuses = (
200
+ {GcpTransferOperationStatus.SUCCESS} if not self.expected_statuses else self.expected_statuses
201
+ )
202
+
203
+ try:
204
+ while True:
205
+ operations = await hook.list_transfer_operations(
206
+ request_filter={
207
+ "project_id": self.project_id or hook.project_id,
208
+ "job_names": [self.job_name],
209
+ }
210
+ )
211
+ check = await CloudDataTransferServiceAsyncHook.operations_contain_expected_statuses(
212
+ operations=operations,
213
+ expected_statuses=expected_statuses,
214
+ )
215
+ if check:
216
+ yield TriggerEvent(
217
+ {
218
+ "status": "success",
219
+ "message": "Transfer operation completed successfully",
220
+ "operations": operations,
221
+ }
222
+ )
223
+ return
224
+
225
+ self.log.info(
226
+ "Sleeping for %s seconds.",
227
+ self.poke_interval,
228
+ )
229
+ await asyncio.sleep(self.poke_interval)
230
+ except Exception as e:
231
+ self.log.exception("Exception occurred while checking for query completion")
232
+ yield TriggerEvent({"status": "error", "message": str(e)})
@@ -22,6 +22,8 @@ from collections.abc import Sequence
22
22
  from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
+ from airflow.providers.google.cloud.hooks.dataflow import AsyncDataflowHook, DataflowJobStatus
26
+ from airflow.triggers.base import BaseTrigger, TriggerEvent
25
27
  from google.cloud.dataflow_v1beta3 import JobState
26
28
  from google.cloud.dataflow_v1beta3.types import (
27
29
  AutoscalingEvent,
@@ -32,9 +34,6 @@ from google.cloud.dataflow_v1beta3.types import (
32
34
  MetricUpdate,
33
35
  )
34
36
 
35
- from airflow.providers.google.cloud.hooks.dataflow import AsyncDataflowHook, DataflowJobStatus
36
- from airflow.triggers.base import BaseTrigger, TriggerEvent
37
-
38
37
  if TYPE_CHECKING:
39
38
  from google.cloud.dataflow_v1beta3.services.messages_v1_beta3.pagers import ListJobMessagesAsyncPager
40
39
 
@@ -22,10 +22,9 @@ from __future__ import annotations
22
22
  import asyncio
23
23
  from collections.abc import AsyncIterator, Sequence
24
24
 
25
- from google.cloud.dataplex_v1.types import DataScanJob
26
-
27
25
  from airflow.providers.google.cloud.hooks.dataplex import DataplexAsyncHook
28
26
  from airflow.triggers.base import BaseTrigger, TriggerEvent
27
+ from google.cloud.dataplex_v1.types import DataScanJob
29
28
 
30
29
 
31
30
  class DataplexDataQualityJobTrigger(BaseTrigger):
@@ -25,9 +25,6 @@ import time
25
25
  from collections.abc import AsyncIterator, Sequence
26
26
  from typing import TYPE_CHECKING, Any
27
27
 
28
- from google.api_core.exceptions import NotFound
29
- from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
30
-
31
28
  from airflow.exceptions import AirflowException
32
29
  from airflow.models.taskinstance import TaskInstance
33
30
  from airflow.providers.google.cloud.hooks.dataproc import DataprocAsyncHook, DataprocHook
@@ -36,6 +33,8 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
36
33
  from airflow.triggers.base import BaseTrigger, TriggerEvent
37
34
  from airflow.utils.session import provide_session
38
35
  from airflow.utils.state import TaskInstanceState
36
+ from google.api_core.exceptions import NotFound
37
+ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
39
38
 
40
39
  if TYPE_CHECKING:
41
40
  from sqlalchemy.orm.session import Session
@@ -23,7 +23,6 @@ from collections.abc import AsyncIterator, Sequence
23
23
  from functools import cached_property
24
24
  from typing import TYPE_CHECKING, Any
25
25
 
26
- from google.cloud.container_v1.types import Operation
27
26
  from packaging.version import parse as parse_version
28
27
 
29
28
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -37,6 +36,7 @@ from airflow.providers.google.cloud.hooks.kubernetes_engine import (
37
36
  )
38
37
  from airflow.providers_manager import ProvidersManager
39
38
  from airflow.triggers.base import BaseTrigger, TriggerEvent
39
+ from google.cloud.container_v1.types import Operation
40
40
 
41
41
  if TYPE_CHECKING:
42
42
  from datetime import datetime
@@ -23,10 +23,9 @@ from collections.abc import AsyncIterator, Sequence
23
23
  from functools import cached_property
24
24
  from typing import Any
25
25
 
26
- from google.cloud.pubsub_v1.types import ReceivedMessage
27
-
28
26
  from airflow.providers.google.cloud.hooks.pubsub import PubSubAsyncHook
29
27
  from airflow.triggers.base import BaseTrigger, TriggerEvent
28
+ from google.cloud.pubsub_v1.types import ReceivedMessage
30
29
 
31
30
 
32
31
  class PubsubPullTrigger(BaseTrigger):
@@ -20,14 +20,6 @@ from collections.abc import AsyncIterator, Sequence
20
20
  from functools import cached_property
21
21
  from typing import TYPE_CHECKING, Any
22
22
 
23
- from google.cloud.aiplatform_v1 import (
24
- BatchPredictionJob,
25
- HyperparameterTuningJob,
26
- JobState,
27
- PipelineState,
28
- types,
29
- )
30
-
31
23
  from airflow.exceptions import AirflowException
32
24
  from airflow.providers.google.cloud.hooks.vertex_ai.batch_prediction_job import BatchPredictionJobAsyncHook
33
25
  from airflow.providers.google.cloud.hooks.vertex_ai.custom_job import CustomJobAsyncHook
@@ -36,6 +28,13 @@ from airflow.providers.google.cloud.hooks.vertex_ai.hyperparameter_tuning_job im
36
28
  )
37
29
  from airflow.providers.google.cloud.hooks.vertex_ai.pipeline_job import PipelineJobAsyncHook
38
30
  from airflow.triggers.base import BaseTrigger, TriggerEvent
31
+ from google.cloud.aiplatform_v1 import (
32
+ BatchPredictionJob,
33
+ HyperparameterTuningJob,
34
+ JobState,
35
+ PipelineState,
36
+ types,
37
+ )
39
38
 
40
39
  if TYPE_CHECKING:
41
40
  from proto import Message
@@ -29,10 +29,6 @@ from urllib.parse import urlencode
29
29
 
30
30
  import google.auth
31
31
  import google.oauth2.service_account
32
- from google.auth import impersonated_credentials # type: ignore[attr-defined]
33
- from google.auth.credentials import AnonymousCredentials, Credentials
34
- from google.auth.environment_vars import CREDENTIALS, LEGACY_PROJECT, PROJECT
35
-
36
32
  from airflow.exceptions import AirflowException
37
33
  from airflow.providers.google.cloud._internal_client.secret_manager_client import _SecretManagerClient
38
34
  from airflow.providers.google.cloud.utils.external_token_supplier import (
@@ -40,6 +36,9 @@ from airflow.providers.google.cloud.utils.external_token_supplier import (
40
36
  )
41
37
  from airflow.utils.log.logging_mixin import LoggingMixin
42
38
  from airflow.utils.process_utils import patch_environ
39
+ from google.auth import impersonated_credentials # type: ignore[attr-defined]
40
+ from google.auth.credentials import AnonymousCredentials, Credentials
41
+ from google.auth.environment_vars import CREDENTIALS, LEGACY_PROJECT, PROJECT
43
42
 
44
43
  log = logging.getLogger(__name__)
45
44
 
@@ -219,11 +218,19 @@ class _CredentialProvider(LoggingMixin):
219
218
  idp_extra_params_dict: dict[str, str] | None = None,
220
219
  ) -> None:
221
220
  super().__init__()
222
- key_options = [key_path, keyfile_dict, credential_config_file, key_secret_name, is_anonymous]
223
- if len([x for x in key_options if x]) > 1:
221
+ key_options_map = {
222
+ "key_path": key_path,
223
+ "keyfile_dict": keyfile_dict,
224
+ "credential_config_file": credential_config_file,
225
+ "key_secret_name": key_secret_name,
226
+ "is_anonymous": is_anonymous,
227
+ }
228
+ key_options_label_provided = [label for label, credential in key_options_map.items() if credential]
229
+ if len(key_options_label_provided) > 1:
224
230
  raise AirflowException(
225
- "The `keyfile_dict`, `key_path`, `credential_config_file`, `is_anonymous` and"
226
- " `key_secret_name` fields are all mutually exclusive. Please provide only one value."
231
+ f"The `keyfile_dict`, `key_path`, `credential_config_file`, `is_anonymous` and"
232
+ f" `key_secret_name` fields are all mutually exclusive. "
233
+ f"Received options: {key_options_label_provided}. Please provide only one value."
227
234
  )
228
235
  self.key_path = key_path
229
236
  self.keyfile_dict = keyfile_dict
@@ -22,6 +22,7 @@ from functools import wraps
22
22
  from typing import TYPE_CHECKING, Any
23
23
 
24
24
  import requests
25
+
25
26
  from google.auth.exceptions import RefreshError
26
27
  from google.auth.identity_pool import SubjectTokenSupplier
27
28
 
@@ -23,17 +23,17 @@ import logging
23
23
  from functools import wraps
24
24
  from typing import Callable, TypeVar, cast
25
25
 
26
+ from flask import Response, current_app, request as flask_request # type: ignore
27
+
26
28
  import google
27
29
  import google.auth.transport.requests
28
30
  import google.oauth2.id_token
29
- from flask import Response, current_app, request as flask_request # type: ignore
31
+ from airflow.configuration import conf
32
+ from airflow.providers.google.common.utils.id_token_credentials import get_default_id_token_credentials
30
33
  from google.auth import exceptions
31
34
  from google.auth.transport.requests import AuthorizedSession
32
35
  from google.oauth2 import service_account
33
36
 
34
- from airflow.configuration import conf
35
- from airflow.providers.google.common.utils.id_token_credentials import get_default_id_token_credentials
36
-
37
37
  log = logging.getLogger(__name__)
38
38
 
39
39
  _GOOGLE_ISSUERS = ("accounts.google.com", "https://accounts.google.com")
@@ -16,9 +16,8 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- from google.api_core.gapic_v1.client_info import ClientInfo
20
-
21
19
  from airflow import version
20
+ from google.api_core.gapic_v1.client_info import ClientInfo
22
21
 
23
22
  GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME = "execute_complete"
24
23