apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 14.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/METADATA +41 -58
  155. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-14.0.0.dist-info}/entry_points.txt +0 -0
@@ -23,11 +23,6 @@ import uuid
23
23
  from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
- from google.api_core.exceptions import AlreadyExists
27
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
- from google.cloud.workflows.executions_v1beta import Execution
29
- from google.cloud.workflows_v1beta import Workflow
30
-
31
26
  from airflow.providers.google.cloud.hooks.workflows import WorkflowsHook
32
27
  from airflow.providers.google.cloud.links.workflows import (
33
28
  WorkflowsExecutionLink,
@@ -36,13 +31,16 @@ from airflow.providers.google.cloud.links.workflows import (
36
31
  )
37
32
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
38
33
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
+ from google.api_core.exceptions import AlreadyExists
35
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
36
+ from google.cloud.workflows.executions_v1beta import Execution
37
+ from google.cloud.workflows_v1beta import Workflow
39
38
 
40
39
  if TYPE_CHECKING:
40
+ from airflow.utils.context import Context
41
41
  from google.api_core.retry import Retry
42
42
  from google.protobuf.field_mask_pb2 import FieldMask
43
43
 
44
- from airflow.utils.context import Context
45
-
46
44
  from airflow.utils.hashlib_wrapper import md5
47
45
 
48
46
 
@@ -21,8 +21,6 @@ from __future__ import annotations
21
21
  import logging
22
22
  from collections.abc import Sequence
23
23
 
24
- from google.auth.exceptions import DefaultCredentialsError
25
-
26
24
  from airflow.exceptions import AirflowException
27
25
  from airflow.providers.google.cloud._internal_client.secret_manager_client import _SecretManagerClient
28
26
  from airflow.providers.google.cloud.utils.credentials_provider import (
@@ -32,6 +30,7 @@ from airflow.providers.google.cloud.utils.credentials_provider import (
32
30
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
31
  from airflow.secrets import BaseSecretsBackend
34
32
  from airflow.utils.log.logging_mixin import LoggingMixin
33
+ from google.auth.exceptions import DefaultCredentialsError
35
34
 
36
35
  log = logging.getLogger(__name__)
37
36
 
@@ -22,18 +22,16 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
26
- from google.cloud.bigquery_datatransfer_v1 import TransferState
27
-
28
25
  from airflow.exceptions import AirflowException
29
26
  from airflow.providers.google.cloud.hooks.bigquery_dts import BiqQueryDataTransferServiceHook
30
27
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
31
28
  from airflow.sensors.base import BaseSensorOperator
29
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
30
+ from google.cloud.bigquery_datatransfer_v1 import TransferState
32
31
 
33
32
  if TYPE_CHECKING:
34
- from google.api_core.retry import Retry
35
-
36
33
  from airflow.utils.context import Context
34
+ from google.api_core.retry import Retry
37
35
 
38
36
 
39
37
  class BigQueryDataTransferServiceTransferRunSensor(BaseSensorOperator):
@@ -23,14 +23,13 @@ from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
25
  import google.api_core.exceptions
26
- from google.cloud.bigtable import enums
27
- from google.cloud.bigtable.table import ClusterState
28
-
29
26
  from airflow.providers.google.cloud.hooks.bigtable import BigtableHook
30
27
  from airflow.providers.google.cloud.links.bigtable import BigtableTablesLink
31
28
  from airflow.providers.google.cloud.operators.bigtable import BigtableValidationMixin
32
29
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
30
  from airflow.sensors.base import BaseSensorOperator
31
+ from google.cloud.bigtable import enums
32
+ from google.cloud.bigtable.table import ClusterState
34
33
 
35
34
  if TYPE_CHECKING:
36
35
  from airflow.utils.context import Context
@@ -22,10 +22,10 @@ from __future__ import annotations
22
22
  import json
23
23
  from collections.abc import Iterable, Sequence
24
24
  from datetime import datetime, timedelta
25
+ from functools import cached_property
25
26
  from typing import TYPE_CHECKING
26
27
 
27
28
  from dateutil import parser
28
- from google.cloud.orchestration.airflow.service_v1.types import ExecuteAirflowCommandResponse
29
29
 
30
30
  from airflow.configuration import conf
31
31
  from airflow.exceptions import AirflowException
@@ -34,6 +34,7 @@ from airflow.providers.google.cloud.triggers.cloud_composer import CloudComposer
34
34
  from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
35
35
  from airflow.sensors.base import BaseSensorOperator
36
36
  from airflow.utils.state import TaskInstanceState
37
+ from google.cloud.orchestration.airflow.service_v1.types import Environment, ExecuteAirflowCommandResponse
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  from airflow.utils.context import Context
@@ -135,19 +136,20 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
135
136
 
136
137
  def _pull_dag_runs(self) -> list[dict]:
137
138
  """Pull the list of dag runs."""
138
- hook = CloudComposerHook(
139
- gcp_conn_id=self.gcp_conn_id,
140
- impersonation_chain=self.impersonation_chain,
139
+ cmd_parameters = (
140
+ ["-d", self.composer_dag_id, "-o", "json"]
141
+ if self._composer_airflow_version < 3
142
+ else [self.composer_dag_id, "-o", "json"]
141
143
  )
142
- dag_runs_cmd = hook.execute_airflow_command(
144
+ dag_runs_cmd = self.hook.execute_airflow_command(
143
145
  project_id=self.project_id,
144
146
  region=self.region,
145
147
  environment_id=self.environment_id,
146
148
  command="dags",
147
149
  subcommand="list-runs",
148
- parameters=["-d", self.composer_dag_id, "-o", "json"],
150
+ parameters=cmd_parameters,
149
151
  )
150
- cmd_result = hook.wait_command_execution_result(
152
+ cmd_result = self.hook.wait_command_execution_result(
151
153
  project_id=self.project_id,
152
154
  region=self.region,
153
155
  environment_id=self.environment_id,
@@ -165,13 +167,27 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
165
167
  for dag_run in dag_runs:
166
168
  if (
167
169
  start_date.timestamp()
168
- < parser.parse(dag_run["logical_date"]).timestamp()
170
+ < parser.parse(
171
+ dag_run["execution_date" if self._composer_airflow_version < 3 else "logical_date"]
172
+ ).timestamp()
169
173
  < end_date.timestamp()
170
174
  ) and dag_run["state"] not in self.allowed_states:
171
175
  return False
172
176
  return True
173
177
 
178
+ def _get_composer_airflow_version(self) -> int:
179
+ """Return Composer Airflow version."""
180
+ environment_obj = self.hook.get_environment(
181
+ project_id=self.project_id,
182
+ region=self.region,
183
+ environment_id=self.environment_id,
184
+ )
185
+ environment_config = Environment.to_dict(environment_obj)
186
+ image_version = environment_config["config"]["software_config"]["image_version"]
187
+ return int(image_version.split("airflow-")[1].split(".")[0])
188
+
174
189
  def execute(self, context: Context) -> None:
190
+ self._composer_airflow_version = self._get_composer_airflow_version()
175
191
  if self.deferrable:
176
192
  start_date, end_date = self._get_logical_dates(context)
177
193
  self.defer(
@@ -186,6 +202,7 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
186
202
  gcp_conn_id=self.gcp_conn_id,
187
203
  impersonation_chain=self.impersonation_chain,
188
204
  poll_interval=self.poll_interval,
205
+ composer_airflow_version=self._composer_airflow_version,
189
206
  ),
190
207
  method_name=GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME,
191
208
  )
@@ -195,3 +212,10 @@ class CloudComposerDAGRunSensor(BaseSensorOperator):
195
212
  if event and event["status"] == "error":
196
213
  raise AirflowException(event["message"])
197
214
  self.log.info("DAG %s has executed successfully.", self.composer_dag_id)
215
+
216
+ @cached_property
217
+ def hook(self) -> CloudComposerHook:
218
+ return CloudComposerHook(
219
+ gcp_conn_id=self.gcp_conn_id,
220
+ impersonation_chain=self.impersonation_chain,
221
+ )
@@ -20,8 +20,10 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
- from typing import TYPE_CHECKING
23
+ from typing import TYPE_CHECKING, Any
24
24
 
25
+ from airflow.configuration import conf
26
+ from airflow.exceptions import AirflowException
25
27
  from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
26
28
  COUNTERS,
27
29
  METADATA,
@@ -29,6 +31,9 @@ from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import
29
31
  CloudDataTransferServiceHook,
30
32
  )
31
33
  from airflow.providers.google.cloud.links.cloud_storage_transfer import CloudStorageTransferJobLink
34
+ from airflow.providers.google.cloud.triggers.cloud_storage_transfer_service import (
35
+ CloudStorageTransferServiceCheckJobStatusTrigger,
36
+ )
32
37
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
38
  from airflow.sensors.base import BaseSensorOperator
34
39
 
@@ -60,6 +65,7 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator):
60
65
  If set as a sequence, the identities from the list must grant
61
66
  Service Account Token Creator IAM role to the directly preceding identity, with first
62
67
  account from the list granting this role to the originating account (templated).
68
+ :param deferrable: Run sensor in deferrable mode
63
69
  """
64
70
 
65
71
  # [START gcp_transfer_job_sensor_template_fields]
@@ -78,6 +84,7 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator):
78
84
  project_id: str = PROVIDE_PROJECT_ID,
79
85
  gcp_conn_id: str = "google_cloud_default",
80
86
  impersonation_chain: str | Sequence[str] | None = None,
87
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
81
88
  **kwargs,
82
89
  ) -> None:
83
90
  super().__init__(**kwargs)
@@ -88,6 +95,7 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator):
88
95
  self.project_id = project_id
89
96
  self.gcp_cloud_conn_id = gcp_conn_id
90
97
  self.impersonation_chain = impersonation_chain
98
+ self.deferrable = deferrable
91
99
 
92
100
  def poke(self, context: Context) -> bool:
93
101
  hook = CloudDataTransferServiceHook(
@@ -117,3 +125,33 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator):
117
125
  )
118
126
 
119
127
  return check
128
+
129
+ def execute(self, context: Context) -> None:
130
+ """Run on the worker and defer using the triggers if deferrable is set to True."""
131
+ if not self.deferrable:
132
+ super().execute(context)
133
+ elif not self.poke(context=context):
134
+ self.defer(
135
+ timeout=self.execution_timeout,
136
+ trigger=CloudStorageTransferServiceCheckJobStatusTrigger(
137
+ job_name=self.job_name,
138
+ expected_statuses=self.expected_statuses,
139
+ project_id=self.project_id,
140
+ poke_interval=self.poke_interval,
141
+ gcp_conn_id=self.gcp_cloud_conn_id,
142
+ impersonation_chain=self.impersonation_chain,
143
+ ),
144
+ method_name="execute_complete",
145
+ )
146
+
147
+ def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
148
+ """
149
+ Act as a callback for when the trigger fires.
150
+
151
+ This returns immediately. It relies on trigger to throw an exception,
152
+ otherwise it assumes execution was successful.
153
+ """
154
+ if event["status"] == "error":
155
+ raise AirflowException(event["message"])
156
+
157
+ self.xcom_push(key="sensed_operations", value=event["operations"], context=context)
@@ -23,13 +23,8 @@ from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
25
  if TYPE_CHECKING:
26
- from google.api_core.retry import Retry
27
-
28
26
  from airflow.utils.context import Context
29
-
30
- from google.api_core.exceptions import GoogleAPICallError
31
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
32
- from google.cloud.dataplex_v1.types import DataScanJob
27
+ from google.api_core.retry import Retry
33
28
 
34
29
  from airflow.exceptions import AirflowException
35
30
  from airflow.providers.google.cloud.hooks.dataplex import (
@@ -38,6 +33,9 @@ from airflow.providers.google.cloud.hooks.dataplex import (
38
33
  DataplexHook,
39
34
  )
40
35
  from airflow.sensors.base import BaseSensorOperator
36
+ from google.api_core.exceptions import GoogleAPICallError
37
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
38
+ from google.cloud.dataplex_v1.types import DataScanJob
41
39
 
42
40
 
43
41
  class TaskState:
@@ -23,13 +23,12 @@ import time
23
23
  from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
- from google.api_core.exceptions import ServerError
27
- from google.cloud.dataproc_v1.types import Batch, JobStatus
28
-
29
26
  from airflow.exceptions import AirflowException
30
27
  from airflow.providers.google.cloud.hooks.dataproc import DataprocHook
31
28
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
32
29
  from airflow.sensors.base import BaseSensorOperator
30
+ from google.api_core.exceptions import ServerError
31
+ from google.cloud.dataproc_v1.types import Batch, JobStatus
33
32
 
34
33
  if TYPE_CHECKING:
35
34
  from airflow.utils.context import Context
@@ -26,9 +26,8 @@ from airflow.providers.google.cloud.hooks.gcs import parse_json_from_gcs
26
26
  from airflow.sensors.base import BaseSensorOperator
27
27
 
28
28
  if TYPE_CHECKING:
29
- from google.api_core.operation import Operation
30
-
31
29
  from airflow.utils.context import Context
30
+ from google.api_core.operation import Operation
32
31
 
33
32
 
34
33
  class MetastoreHivePartitionSensor(BaseSensorOperator):
@@ -25,8 +25,6 @@ from collections.abc import Sequence
25
25
  from datetime import datetime, timedelta
26
26
  from typing import TYPE_CHECKING, Any, Callable
27
27
 
28
- from google.cloud.storage.retry import DEFAULT_RETRY
29
-
30
28
  from airflow.configuration import conf
31
29
  from airflow.exceptions import AirflowException
32
30
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -37,11 +35,11 @@ from airflow.providers.google.cloud.triggers.gcs import (
37
35
  GCSUploadSessionTrigger,
38
36
  )
39
37
  from airflow.sensors.base import BaseSensorOperator, poke_mode_only
38
+ from google.cloud.storage.retry import DEFAULT_RETRY
40
39
 
41
40
  if TYPE_CHECKING:
42
- from google.api_core.retry import Retry
43
-
44
41
  from airflow.utils.context import Context
42
+ from google.api_core.retry import Retry
45
43
 
46
44
 
47
45
  class GCSObjectExistenceSensor(BaseSensorOperator):
@@ -23,14 +23,13 @@ from collections.abc import Sequence
23
23
  from datetime import timedelta
24
24
  from typing import TYPE_CHECKING, Any, Callable
25
25
 
26
- from google.cloud import pubsub_v1
27
- from google.cloud.pubsub_v1.types import ReceivedMessage
28
-
29
26
  from airflow.configuration import conf
30
27
  from airflow.exceptions import AirflowException
31
28
  from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
32
29
  from airflow.providers.google.cloud.triggers.pubsub import PubsubPullTrigger
33
30
  from airflow.sensors.base import BaseSensorOperator
31
+ from google.cloud import pubsub_v1
32
+ from google.cloud.pubsub_v1.types import ReceivedMessage
34
33
 
35
34
  if TYPE_CHECKING:
36
35
  from airflow.utils.context import Context
@@ -19,18 +19,16 @@ from __future__ import annotations
19
19
  from collections.abc import Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
23
- from google.cloud.workflows.executions_v1beta import Execution
24
-
25
22
  from airflow.exceptions import AirflowException
26
23
  from airflow.providers.google.cloud.hooks.workflows import WorkflowsHook
27
24
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
28
25
  from airflow.sensors.base import BaseSensorOperator
26
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
27
+ from google.cloud.workflows.executions_v1beta import Execution
29
28
 
30
29
  if TYPE_CHECKING:
31
- from google.api_core.retry import Retry
32
-
33
30
  from airflow.utils.context import Context
31
+ from google.api_core.retry import Retry
34
32
 
35
33
 
36
34
  class WorkflowExecutionSensor(BaseSensorOperator):
@@ -22,9 +22,6 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
- from google.api_core.exceptions import Conflict
26
- from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
27
-
28
25
  from airflow.configuration import conf
29
26
  from airflow.exceptions import AirflowException
30
27
  from airflow.models import BaseOperator
@@ -33,11 +30,12 @@ from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
33
30
  from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
34
31
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
35
32
  from airflow.utils.helpers import merge_dicts
33
+ from google.api_core.exceptions import Conflict
34
+ from google.cloud.bigquery import DEFAULT_RETRY, UnknownJob
36
35
 
37
36
  if TYPE_CHECKING:
38
- from google.api_core.retry import Retry
39
-
40
37
  from airflow.utils.context import Context
38
+ from google.api_core.retry import Retry
41
39
 
42
40
 
43
41
  class BigQueryToGCSOperator(BaseOperator):
@@ -222,6 +220,8 @@ class BigQueryToGCSOperator(BaseOperator):
222
220
 
223
221
  try:
224
222
  self.log.info("Executing: %s", configuration)
223
+ if not self.job_id:
224
+ raise AirflowException("Job ID is None, expected a valid job ID.")
225
225
  job: BigQueryJob | UnknownJob = self._submit_job(
226
226
  hook=hook, job_id=self.job_id, configuration=configuration
227
227
  )
@@ -23,6 +23,15 @@ import json
23
23
  from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING, Any
25
25
 
26
+ from airflow.configuration import conf
27
+ from airflow.exceptions import AirflowException
28
+ from airflow.models import BaseOperator
29
+ from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
30
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
31
+ from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
32
+ from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
33
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
+ from airflow.utils.helpers import merge_dicts
26
35
  from google.api_core.exceptions import BadRequest, Conflict
27
36
  from google.cloud.bigquery import (
28
37
  DEFAULT_RETRY,
@@ -36,20 +45,9 @@ from google.cloud.bigquery import (
36
45
  )
37
46
  from google.cloud.bigquery.table import EncryptionConfiguration, Table, TableReference
38
47
 
39
- from airflow.configuration import conf
40
- from airflow.exceptions import AirflowException
41
- from airflow.models import BaseOperator
42
- from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
43
- from airflow.providers.google.cloud.hooks.gcs import GCSHook
44
- from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
45
- from airflow.providers.google.cloud.triggers.bigquery import BigQueryInsertJobTrigger
46
- from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
47
- from airflow.utils.helpers import merge_dicts
48
-
49
48
  if TYPE_CHECKING:
50
- from google.api_core.retry import Retry
51
-
52
49
  from airflow.utils.context import Context
50
+ from google.api_core.retry import Retry
53
51
 
54
52
  ALLOWED_FORMATS = [
55
53
  "CSV",
@@ -304,7 +304,7 @@ class GCSToGCSOperator(BaseOperator):
304
304
  dest_object.replace(self.destination_object, prefix, 1) for dest_object in destination_objects
305
305
  ]
306
306
 
307
- objects = set(objects) - set(existing_objects)
307
+ objects = list(set(objects) - set(existing_objects))
308
308
  if objects:
309
309
  self.log.info("%s files are going to be synced: %s.", len(objects), objects)
310
310
  else:
@@ -21,6 +21,7 @@ from __future__ import annotations
21
21
 
22
22
  import os
23
23
  from collections.abc import Sequence
24
+ from functools import cached_property
24
25
  from tempfile import NamedTemporaryFile
25
26
  from typing import TYPE_CHECKING
26
27
 
@@ -129,14 +130,16 @@ class GCSToSFTPOperator(BaseOperator):
129
130
  self.impersonation_chain = impersonation_chain
130
131
  self.sftp_dirs = None
131
132
 
133
+ @cached_property
134
+ def sftp_hook(self):
135
+ return SFTPHook(self.sftp_conn_id)
136
+
132
137
  def execute(self, context: Context):
133
138
  gcs_hook = GCSHook(
134
139
  gcp_conn_id=self.gcp_conn_id,
135
140
  impersonation_chain=self.impersonation_chain,
136
141
  )
137
142
 
138
- sftp_hook = SFTPHook(self.sftp_conn_id)
139
-
140
143
  if WILDCARD in self.source_object:
141
144
  total_wildcards = self.source_object.count(WILDCARD)
142
145
  if total_wildcards > 1:
@@ -155,12 +158,12 @@ class GCSToSFTPOperator(BaseOperator):
155
158
 
156
159
  for source_object in objects:
157
160
  destination_path = self._resolve_destination_path(source_object, prefix=prefix_dirname)
158
- self._copy_single_object(gcs_hook, sftp_hook, source_object, destination_path)
161
+ self._copy_single_object(gcs_hook, self.sftp_hook, source_object, destination_path)
159
162
 
160
163
  self.log.info("Done. Uploaded '%d' files to %s", len(objects), self.destination_path)
161
164
  else:
162
165
  destination_path = self._resolve_destination_path(self.source_object)
163
- self._copy_single_object(gcs_hook, sftp_hook, self.source_object, destination_path)
166
+ self._copy_single_object(gcs_hook, self.sftp_hook, self.source_object, destination_path)
164
167
  self.log.info("Done. Uploaded '%s' file to %s", self.source_object, destination_path)
165
168
 
166
169
  def _resolve_destination_path(self, source_object: str, prefix: str | None = None) -> str:
@@ -200,3 +203,32 @@ class GCSToSFTPOperator(BaseOperator):
200
203
  if self.move_object:
201
204
  self.log.info("Executing delete of gs://%s/%s", self.source_bucket, source_object)
202
205
  gcs_hook.delete(self.source_bucket, source_object)
206
+
207
+ def get_openlineage_facets_on_start(self):
208
+ from airflow.providers.common.compat.openlineage.facet import Dataset
209
+ from airflow.providers.google.cloud.openlineage.utils import extract_ds_name_from_gcs_path
210
+ from airflow.providers.openlineage.extractors import OperatorLineage
211
+
212
+ source_name = extract_ds_name_from_gcs_path(f"{self.source_object}")
213
+ dest_name = f"{self.destination_path}"
214
+ if self.keep_directory_structure:
215
+ dest_name = os.path.join(dest_name, source_name if source_name != "/" else "")
216
+ elif WILDCARD not in self.source_object:
217
+ dest_name = os.path.join(dest_name, os.path.basename(self.source_object))
218
+
219
+ dest_name = dest_name.rstrip("/") if dest_name != "/" else "/"
220
+
221
+ return OperatorLineage(
222
+ inputs=[
223
+ Dataset(
224
+ namespace=f"gs://{self.source_bucket}",
225
+ name=source_name,
226
+ )
227
+ ],
228
+ outputs=[
229
+ Dataset(
230
+ namespace=f"file://{self.sftp_hook.remote_host}:{self.sftp_hook.port}",
231
+ name=dest_name,
232
+ )
233
+ ],
234
+ )
@@ -22,10 +22,15 @@ from __future__ import annotations
22
22
  import datetime
23
23
  import decimal
24
24
  from collections.abc import Sequence
25
+ from functools import cached_property
26
+ from typing import TYPE_CHECKING
25
27
 
26
28
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
27
29
  from airflow.providers.microsoft.mssql.hooks.mssql import MsSqlHook
28
30
 
31
+ if TYPE_CHECKING:
32
+ from airflow.providers.openlineage.extractors import OperatorLineage
33
+
29
34
 
30
35
  class MSSQLToGCSOperator(BaseSQLToGCSOperator):
31
36
  """
@@ -75,14 +80,17 @@ class MSSQLToGCSOperator(BaseSQLToGCSOperator):
75
80
  self.mssql_conn_id = mssql_conn_id
76
81
  self.bit_fields = bit_fields or []
77
82
 
83
+ @cached_property
84
+ def db_hook(self) -> MsSqlHook:
85
+ return MsSqlHook(mssql_conn_id=self.mssql_conn_id)
86
+
78
87
  def query(self):
79
88
  """
80
89
  Query MSSQL and returns a cursor of results.
81
90
 
82
91
  :return: mssql cursor
83
92
  """
84
- mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
85
- conn = mssql.get_conn()
93
+ conn = self.db_hook.get_conn()
86
94
  cursor = conn.cursor()
87
95
  cursor.execute(self.sql)
88
96
  return cursor
@@ -109,3 +117,20 @@ class MSSQLToGCSOperator(BaseSQLToGCSOperator):
109
117
  if isinstance(value, (datetime.date, datetime.time)):
110
118
  return value.isoformat()
111
119
  return value
120
+
121
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
122
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
123
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
124
+ from airflow.providers.openlineage.extractors import OperatorLineage
125
+
126
+ sql_parsing_result = get_openlineage_facets_with_sql(
127
+ hook=self.db_hook,
128
+ sql=self.sql,
129
+ conn_id=self.mssql_conn_id,
130
+ database=None,
131
+ )
132
+ gcs_output_datasets = self._get_openlineage_output_datasets()
133
+ if sql_parsing_result:
134
+ sql_parsing_result.outputs = gcs_output_datasets
135
+ return sql_parsing_result
136
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})
@@ -22,6 +22,8 @@ from __future__ import annotations
22
22
  import base64
23
23
  from datetime import date, datetime, time, timedelta
24
24
  from decimal import Decimal
25
+ from functools import cached_property
26
+ from typing import TYPE_CHECKING
25
27
 
26
28
  try:
27
29
  from MySQLdb.constants import FIELD_TYPE
@@ -37,6 +39,9 @@ except ImportError:
37
39
  from airflow.providers.google.cloud.transfers.sql_to_gcs import BaseSQLToGCSOperator
38
40
  from airflow.providers.mysql.hooks.mysql import MySqlHook
39
41
 
42
+ if TYPE_CHECKING:
43
+ from airflow.providers.openlineage.extractors import OperatorLineage
44
+
40
45
 
41
46
  class MySQLToGCSOperator(BaseSQLToGCSOperator):
42
47
  """
@@ -77,10 +82,13 @@ class MySQLToGCSOperator(BaseSQLToGCSOperator):
77
82
  self.mysql_conn_id = mysql_conn_id
78
83
  self.ensure_utc = ensure_utc
79
84
 
85
+ @cached_property
86
+ def db_hook(self) -> MySqlHook:
87
+ return MySqlHook(mysql_conn_id=self.mysql_conn_id)
88
+
80
89
  def query(self):
81
90
  """Query mysql and returns a cursor to the results."""
82
- mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
83
- conn = mysql.get_conn()
91
+ conn = self.db_hook.get_conn()
84
92
  cursor = conn.cursor()
85
93
  if self.ensure_utc:
86
94
  # Ensure TIMESTAMP results are in UTC
@@ -140,3 +148,20 @@ class MySQLToGCSOperator(BaseSQLToGCSOperator):
140
148
  else:
141
149
  value = base64.standard_b64encode(value).decode("ascii")
142
150
  return value
151
+
152
+ def get_openlineage_facets_on_start(self) -> OperatorLineage | None:
153
+ from airflow.providers.common.compat.openlineage.facet import SQLJobFacet
154
+ from airflow.providers.common.compat.openlineage.utils.sql import get_openlineage_facets_with_sql
155
+ from airflow.providers.openlineage.extractors import OperatorLineage
156
+
157
+ sql_parsing_result = get_openlineage_facets_with_sql(
158
+ hook=self.db_hook,
159
+ sql=self.sql,
160
+ conn_id=self.mysql_conn_id,
161
+ database=None,
162
+ )
163
+ gcs_output_datasets = self._get_openlineage_output_datasets()
164
+ if sql_parsing_result:
165
+ sql_parsing_result.outputs = gcs_output_datasets
166
+ return sql_parsing_result
167
+ return OperatorLineage(outputs=gcs_output_datasets, job_facets={"sql": SQLJobFacet(self.sql)})