apache-airflow-providers-google 12.0.0rc1__py3-none-any.whl → 13.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
  155. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -30,11 +30,6 @@ from enum import Enum
30
30
  from functools import cached_property
31
31
  from typing import TYPE_CHECKING, Any
32
32
 
33
- from google.api_core.exceptions import AlreadyExists, NotFound
34
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
35
- from google.api_core.retry import Retry, exponential_sleep_generator
36
- from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
37
-
38
33
  from airflow.configuration import conf
39
34
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
40
35
  from airflow.providers.google.cloud.hooks.dataproc import (
@@ -54,9 +49,6 @@ from airflow.providers.google.cloud.links.dataproc import (
54
49
  DataprocWorkflowLink,
55
50
  DataprocWorkflowTemplateLink,
56
51
  )
57
- from airflow.providers.google.cloud.openlineage.utils import (
58
- inject_openlineage_properties_into_dataproc_job,
59
- )
60
52
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
61
53
  from airflow.providers.google.cloud.triggers.dataproc import (
62
54
  DataprocBatchTrigger,
@@ -69,16 +61,19 @@ from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
69
61
  from airflow.providers.google.common.deprecated import deprecated
70
62
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
71
63
  from airflow.utils import timezone
64
+ from google.api_core.exceptions import AlreadyExists, NotFound
65
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
66
+ from google.api_core.retry import Retry, exponential_sleep_generator
67
+ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
72
68
 
73
69
  if TYPE_CHECKING:
70
+ from airflow.utils.context import Context
74
71
  from google.api_core import operation
75
72
  from google.api_core.retry_async import AsyncRetry
76
73
  from google.protobuf.duration_pb2 import Duration
77
74
  from google.protobuf.field_mask_pb2 import FieldMask
78
75
  from google.type.interval_pb2 import Interval
79
76
 
80
- from airflow.utils.context import Context
81
-
82
77
 
83
78
  class PreemptibilityType(Enum):
84
79
  """Contains possible Type values of Preemptibility applicable for every secondary worker of Cluster."""
@@ -1824,6 +1819,12 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1824
1819
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1825
1820
  polling_interval_seconds: int = 10,
1826
1821
  cancel_on_kill: bool = True,
1822
+ openlineage_inject_parent_job_info: bool = conf.getboolean(
1823
+ "openlineage", "spark_inject_parent_job_info", fallback=False
1824
+ ),
1825
+ openlineage_inject_transport_info: bool = conf.getboolean(
1826
+ "openlineage", "spark_inject_transport_info", fallback=False
1827
+ ),
1827
1828
  **kwargs,
1828
1829
  ) -> None:
1829
1830
  super().__init__(**kwargs)
@@ -1843,11 +1844,17 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1843
1844
  self.polling_interval_seconds = polling_interval_seconds
1844
1845
  self.cancel_on_kill = cancel_on_kill
1845
1846
  self.operation_name: str | None = None
1847
+ self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
1848
+ self.openlineage_inject_transport_info = openlineage_inject_transport_info
1846
1849
 
1847
1850
  def execute(self, context: Context):
1848
1851
  self.log.info("Instantiating Inline Template")
1849
1852
  hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
1850
1853
  project_id = self.project_id or hook.project_id
1854
+ if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
1855
+ self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
1856
+ self._inject_openlineage_properties_into_dataproc_workflow_template(context)
1857
+
1851
1858
  operation = hook.instantiate_inline_workflow_template(
1852
1859
  template=self.template,
1853
1860
  project_id=project_id,
@@ -1903,6 +1910,25 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1903
1910
  hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
1904
1911
  hook.get_operations_client(region=self.region).cancel_operation(name=self.operation_name)
1905
1912
 
1913
+ def _inject_openlineage_properties_into_dataproc_workflow_template(self, context: Context) -> None:
1914
+ try:
1915
+ from airflow.providers.google.cloud.openlineage.utils import (
1916
+ inject_openlineage_properties_into_dataproc_workflow_template,
1917
+ )
1918
+
1919
+ self.template = inject_openlineage_properties_into_dataproc_workflow_template(
1920
+ template=self.template,
1921
+ context=context,
1922
+ inject_parent_job_info=self.openlineage_inject_parent_job_info,
1923
+ inject_transport_info=self.openlineage_inject_transport_info,
1924
+ )
1925
+ except Exception as e:
1926
+ self.log.warning(
1927
+ "An error occurred while trying to inject OpenLineage information. "
1928
+ "Dataproc template has not been modified by OpenLineage.",
1929
+ exc_info=e,
1930
+ )
1931
+
1906
1932
 
1907
1933
  class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1908
1934
  """
@@ -1968,6 +1994,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1968
1994
  openlineage_inject_parent_job_info: bool = conf.getboolean(
1969
1995
  "openlineage", "spark_inject_parent_job_info", fallback=False
1970
1996
  ),
1997
+ openlineage_inject_transport_info: bool = conf.getboolean(
1998
+ "openlineage", "spark_inject_transport_info", fallback=False
1999
+ ),
1971
2000
  **kwargs,
1972
2001
  ) -> None:
1973
2002
  super().__init__(**kwargs)
@@ -1990,15 +2019,15 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1990
2019
  self.job_id: str | None = None
1991
2020
  self.wait_timeout = wait_timeout
1992
2021
  self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
2022
+ self.openlineage_inject_transport_info = openlineage_inject_transport_info
1993
2023
 
1994
2024
  def execute(self, context: Context):
1995
2025
  self.log.info("Submitting job")
1996
2026
  self.hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
1997
- if self.openlineage_inject_parent_job_info:
2027
+ if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
1998
2028
  self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
1999
- self.job = inject_openlineage_properties_into_dataproc_job(
2000
- job=self.job, context=context, inject_parent_job_info=self.openlineage_inject_parent_job_info
2001
- )
2029
+ self._inject_openlineage_properties_into_dataproc_job(context)
2030
+
2002
2031
  job_object = self.hook.submit_job(
2003
2032
  project_id=self.project_id,
2004
2033
  region=self.region,
@@ -2072,6 +2101,25 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2072
2101
  if self.job_id and self.cancel_on_kill:
2073
2102
  self.hook.cancel_job(job_id=self.job_id, project_id=self.project_id, region=self.region)
2074
2103
 
2104
+ def _inject_openlineage_properties_into_dataproc_job(self, context: Context) -> None:
2105
+ try:
2106
+ from airflow.providers.google.cloud.openlineage.utils import (
2107
+ inject_openlineage_properties_into_dataproc_job,
2108
+ )
2109
+
2110
+ self.job = inject_openlineage_properties_into_dataproc_job(
2111
+ job=self.job,
2112
+ context=context,
2113
+ inject_parent_job_info=self.openlineage_inject_parent_job_info,
2114
+ inject_transport_info=self.openlineage_inject_transport_info,
2115
+ )
2116
+ except Exception as e:
2117
+ self.log.warning(
2118
+ "An error occurred while trying to inject OpenLineage information. "
2119
+ "Dataproc job has not been modified by OpenLineage.",
2120
+ exc_info=e,
2121
+ )
2122
+
2075
2123
 
2076
2124
  class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2077
2125
  """
@@ -2425,6 +2473,12 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2425
2473
  asynchronous: bool = False,
2426
2474
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
2427
2475
  polling_interval_seconds: int = 5,
2476
+ openlineage_inject_parent_job_info: bool = conf.getboolean(
2477
+ "openlineage", "spark_inject_parent_job_info", fallback=False
2478
+ ),
2479
+ openlineage_inject_transport_info: bool = conf.getboolean(
2480
+ "openlineage", "spark_inject_transport_info", fallback=False
2481
+ ),
2428
2482
  **kwargs,
2429
2483
  ):
2430
2484
  super().__init__(**kwargs)
@@ -2446,6 +2500,8 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2446
2500
  self.asynchronous = asynchronous
2447
2501
  self.deferrable = deferrable
2448
2502
  self.polling_interval_seconds = polling_interval_seconds
2503
+ self.openlineage_inject_parent_job_info = openlineage_inject_parent_job_info
2504
+ self.openlineage_inject_transport_info = openlineage_inject_transport_info
2449
2505
 
2450
2506
  def execute(self, context: Context):
2451
2507
  if self.asynchronous and self.deferrable:
@@ -2468,6 +2524,10 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2468
2524
  else:
2469
2525
  self.log.info("Starting batch. The batch ID will be generated since it was not provided.")
2470
2526
 
2527
+ if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
2528
+ self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
2529
+ self._inject_openlineage_properties_into_dataproc_batch(context)
2530
+
2471
2531
  try:
2472
2532
  self.operation = self.hook.create_batch(
2473
2533
  region=self.region,
@@ -2483,7 +2543,10 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2483
2543
  self.log.info("Batch with given id already exists.")
2484
2544
  self.log.info("Attaching to the job %s if it is still running.", batch_id)
2485
2545
  else:
2486
- batch_id = self.operation.metadata.batch.split("/")[-1]
2546
+ if self.operation and self.operation.metadata:
2547
+ batch_id = self.operation.metadata.batch.split("/")[-1]
2548
+ else:
2549
+ raise AirflowException("Operation metadata is not available.")
2487
2550
  self.log.info("The batch %s was created.", batch_id)
2488
2551
 
2489
2552
  DataprocBatchLink.persist(
@@ -2609,8 +2672,11 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2609
2672
  self.log.info("Batch with given id already exists.")
2610
2673
  self.log.info("Attaching to the job %s if it is still running.", self.batch_id)
2611
2674
  else:
2612
- batch_id = self.operation.metadata.batch.split("/")[-1]
2613
- self.log.info("The batch %s was created.", batch_id)
2675
+ if self.operation and self.operation.metadata:
2676
+ batch_id = self.operation.metadata.batch.split("/")[-1]
2677
+ self.log.info("The batch %s was created.", batch_id)
2678
+ else:
2679
+ raise AirflowException("Operation metadata is not available.")
2614
2680
 
2615
2681
  self.log.info("Waiting for the completion of batch job %s", batch_id)
2616
2682
  batch = self.hook.wait_for_batch(
@@ -2623,6 +2689,25 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2623
2689
  )
2624
2690
  return batch, batch_id
2625
2691
 
2692
+ def _inject_openlineage_properties_into_dataproc_batch(self, context: Context) -> None:
2693
+ try:
2694
+ from airflow.providers.google.cloud.openlineage.utils import (
2695
+ inject_openlineage_properties_into_dataproc_batch,
2696
+ )
2697
+
2698
+ self.batch = inject_openlineage_properties_into_dataproc_batch(
2699
+ batch=self.batch,
2700
+ context=context,
2701
+ inject_parent_job_info=self.openlineage_inject_parent_job_info,
2702
+ inject_transport_info=self.openlineage_inject_transport_info,
2703
+ )
2704
+ except Exception as e:
2705
+ self.log.warning(
2706
+ "An error occurred while trying to inject OpenLineage information. "
2707
+ "Dataproc batch has not been modified by OpenLineage.",
2708
+ exc_info=e,
2709
+ )
2710
+
2626
2711
 
2627
2712
  class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2628
2713
  """
@@ -23,25 +23,23 @@ import time
23
23
  from collections.abc import Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
- from google.api_core.exceptions import AlreadyExists
27
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
- from google.api_core.retry import Retry, exponential_sleep_generator
29
- from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
30
- from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
31
- from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
32
-
33
26
  from airflow.exceptions import AirflowException
34
27
  from airflow.models import BaseOperator, BaseOperatorLink
35
28
  from airflow.models.xcom import XCom
36
29
  from airflow.providers.google.cloud.hooks.dataproc_metastore import DataprocMetastoreHook
37
30
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
38
31
  from airflow.providers.google.common.links.storage import StorageLink
32
+ from google.api_core.exceptions import AlreadyExists
33
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
34
+ from google.api_core.retry import Retry, exponential_sleep_generator
35
+ from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
36
+ from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
37
+ from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
39
38
 
40
39
  if TYPE_CHECKING:
41
- from google.protobuf.field_mask_pb2 import FieldMask
42
-
43
40
  from airflow.models.taskinstancekey import TaskInstanceKey
44
41
  from airflow.utils.context import Context
42
+ from google.protobuf.field_mask_pb2 import FieldMask
45
43
 
46
44
 
47
45
  BASE_LINK = "https://console.cloud.google.com"
@@ -22,6 +22,22 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
+ from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
26
+ from airflow.providers.google.cloud.links.data_loss_prevention import (
27
+ CloudDLPDeidentifyTemplateDetailsLink,
28
+ CloudDLPDeidentifyTemplatesListLink,
29
+ CloudDLPInfoTypeDetailsLink,
30
+ CloudDLPInfoTypesListLink,
31
+ CloudDLPInspectTemplateDetailsLink,
32
+ CloudDLPInspectTemplatesListLink,
33
+ CloudDLPJobDetailsLink,
34
+ CloudDLPJobsListLink,
35
+ CloudDLPJobTriggerDetailsLink,
36
+ CloudDLPJobTriggersListLink,
37
+ CloudDLPPossibleInfoTypesListLink,
38
+ )
39
+ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
40
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
25
41
  from google.api_core.exceptions import AlreadyExists, InvalidArgument, NotFound
26
42
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
27
43
  from google.cloud.dlp_v2.types import (
@@ -45,29 +61,11 @@ from google.cloud.dlp_v2.types import (
45
61
  StoredInfoTypeConfig,
46
62
  )
47
63
 
48
- from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
49
- from airflow.providers.google.cloud.links.data_loss_prevention import (
50
- CloudDLPDeidentifyTemplateDetailsLink,
51
- CloudDLPDeidentifyTemplatesListLink,
52
- CloudDLPInfoTypeDetailsLink,
53
- CloudDLPInfoTypesListLink,
54
- CloudDLPInspectTemplateDetailsLink,
55
- CloudDLPInspectTemplatesListLink,
56
- CloudDLPJobDetailsLink,
57
- CloudDLPJobsListLink,
58
- CloudDLPJobTriggerDetailsLink,
59
- CloudDLPJobTriggersListLink,
60
- CloudDLPPossibleInfoTypesListLink,
61
- )
62
- from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
63
- from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
64
-
65
64
  if TYPE_CHECKING:
65
+ from airflow.utils.context import Context
66
66
  from google.api_core.retry import Retry
67
67
  from google.protobuf.field_mask_pb2 import FieldMask
68
68
 
69
- from airflow.utils.context import Context
70
-
71
69
 
72
70
  class CloudDLPCancelDLPJobOperator(GoogleCloudBaseOperator):
73
71
  """
@@ -35,14 +35,13 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
35
35
  if TYPE_CHECKING:
36
36
  from airflow.utils.context import Context
37
37
 
38
- from google.api_core.exceptions import Conflict
39
- from google.cloud.exceptions import GoogleCloudError
40
-
41
38
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
42
39
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
43
40
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
44
41
  from airflow.providers.google.common.links.storage import FileDetailsLink, StorageLink
45
42
  from airflow.utils import timezone
43
+ from google.api_core.exceptions import Conflict
44
+ from google.cloud.exceptions import GoogleCloudError
46
45
 
47
46
 
48
47
  class GCSCreateBucketOperator(GoogleCloudBaseOperator):
@@ -787,22 +786,20 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
787
786
 
788
787
  def execute(self, context: Context) -> list[str]:
789
788
  # Define intervals and prefixes.
790
- try:
791
- orig_start = context["data_interval_start"]
792
- orig_end = context["data_interval_end"]
793
- except KeyError:
794
- orig_start = pendulum.instance(context["logical_date"])
795
- next_dagrun = context["dag"].next_dagrun_info(last_automated_dagrun=None, restricted=False)
796
- if next_dagrun and next_dagrun.data_interval and next_dagrun.data_interval.end:
797
- orig_end = next_dagrun.data_interval.end
798
- else:
799
- orig_end = None
789
+ orig_start = context["data_interval_start"]
790
+ orig_end = context["data_interval_end"]
791
+
792
+ if orig_start is None or orig_end is None:
793
+ raise RuntimeError("`data_interval_start` & `data_interval_end` must not be None")
794
+
795
+ if not isinstance(orig_start, pendulum.DateTime):
796
+ orig_start = pendulum.instance(orig_start)
797
+
798
+ if not isinstance(orig_end, pendulum.DateTime):
799
+ orig_end = pendulum.instance(orig_end)
800
800
 
801
801
  timespan_start = orig_start
802
- if orig_end is None: # Only possible in Airflow before 2.2.
803
- self.log.warning("No following schedule found, setting timespan end to max %s", orig_end)
804
- timespan_end = pendulum.instance(datetime.datetime.max)
805
- elif orig_start >= orig_end: # Airflow 2.2 sets start == end for non-perodic schedules.
802
+ if orig_start >= orig_end: # Airflow 2.2 sets start == end for non-perodic schedules.
806
803
  self.log.warning("DAG schedule not periodic, setting timespan end to max %s", orig_end)
807
804
  timespan_end = pendulum.instance(datetime.datetime.max)
808
805
  else:
@@ -24,7 +24,6 @@ from collections.abc import Sequence
24
24
  from functools import cached_property
25
25
  from typing import TYPE_CHECKING, Any
26
26
 
27
- from google.api_core.exceptions import AlreadyExists
28
27
  from kubernetes.client import V1JobList, models as k8s
29
28
  from packaging.version import parse as parse_version
30
29
 
@@ -61,6 +60,7 @@ from airflow.providers.google.common.deprecated import deprecated
61
60
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
62
61
  from airflow.providers_manager import ProvidersManager
63
62
  from airflow.utils.timezone import utcnow
63
+ from google.api_core.exceptions import AlreadyExists
64
64
 
65
65
  try:
66
66
  from airflow.providers.cncf.kubernetes.operators.job import KubernetesDeleteJobOperator
@@ -73,11 +73,11 @@ except ImportError:
73
73
  )
74
74
 
75
75
  if TYPE_CHECKING:
76
- from google.cloud.container_v1.types import Cluster
77
76
  from kubernetes.client.models import V1Job
78
77
  from pendulum import DateTime
79
78
 
80
79
  from airflow.utils.context import Context
80
+ from google.cloud.container_v1.types import Cluster
81
81
 
82
82
  KUBE_CONFIG_ENV_VAR = "KUBECONFIG"
83
83