apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -6
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
  10. airflow/providers/google/cloud/hooks/bigquery.py +176 -293
  11. airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
  12. airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
  13. airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
  14. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
  16. airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
  19. airflow/providers/google/cloud/hooks/compute.py +1 -1
  20. airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
  21. airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
  22. airflow/providers/google/cloud/hooks/dataflow.py +72 -95
  23. airflow/providers/google/cloud/hooks/dataform.py +1 -1
  24. airflow/providers/google/cloud/hooks/datafusion.py +21 -19
  25. airflow/providers/google/cloud/hooks/dataplex.py +2 -2
  26. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  27. airflow/providers/google/cloud/hooks/dataproc.py +73 -72
  28. airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
  29. airflow/providers/google/cloud/hooks/dlp.py +1 -1
  30. airflow/providers/google/cloud/hooks/functions.py +1 -1
  31. airflow/providers/google/cloud/hooks/gcs.py +112 -15
  32. airflow/providers/google/cloud/hooks/gdm.py +1 -1
  33. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  34. airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
  35. airflow/providers/google/cloud/hooks/looker.py +6 -2
  36. airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
  37. airflow/providers/google/cloud/hooks/mlengine.py +4 -3
  38. airflow/providers/google/cloud/hooks/pubsub.py +3 -0
  39. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  40. airflow/providers/google/cloud/hooks/spanner.py +74 -9
  41. airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
  42. airflow/providers/google/cloud/hooks/tasks.py +1 -1
  43. airflow/providers/google/cloud/hooks/translate.py +2 -2
  44. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
  45. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
  46. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
  47. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
  53. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  54. airflow/providers/google/cloud/hooks/vision.py +3 -3
  55. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  56. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  57. airflow/providers/google/cloud/links/base.py +77 -13
  58. airflow/providers/google/cloud/links/bigquery.py +0 -47
  59. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  60. airflow/providers/google/cloud/links/bigtable.py +0 -48
  61. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  62. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  63. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  64. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  65. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  66. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  67. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  68. airflow/providers/google/cloud/links/compute.py +0 -58
  69. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  70. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  71. airflow/providers/google/cloud/links/dataflow.py +0 -34
  72. airflow/providers/google/cloud/links/dataform.py +0 -64
  73. airflow/providers/google/cloud/links/datafusion.py +1 -96
  74. airflow/providers/google/cloud/links/dataplex.py +0 -154
  75. airflow/providers/google/cloud/links/dataprep.py +0 -24
  76. airflow/providers/google/cloud/links/dataproc.py +11 -95
  77. airflow/providers/google/cloud/links/datastore.py +0 -31
  78. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  79. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  80. airflow/providers/google/cloud/links/mlengine.py +0 -70
  81. airflow/providers/google/cloud/links/pubsub.py +0 -32
  82. airflow/providers/google/cloud/links/spanner.py +0 -33
  83. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  84. airflow/providers/google/cloud/links/translate.py +17 -187
  85. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  86. airflow/providers/google/cloud/links/workflows.py +0 -52
  87. airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
  88. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  89. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  90. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  91. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  92. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  93. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  94. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  95. airflow/providers/google/cloud/operators/alloy_db.py +71 -56
  96. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  97. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
  98. airflow/providers/google/cloud/operators/bigtable.py +37 -8
  99. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  100. airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
  101. airflow/providers/google/cloud/operators/cloud_build.py +76 -33
  102. airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
  103. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  104. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  105. airflow/providers/google/cloud/operators/cloud_run.py +24 -6
  106. airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
  107. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
  108. airflow/providers/google/cloud/operators/compute.py +9 -41
  109. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  110. airflow/providers/google/cloud/operators/dataflow.py +40 -16
  111. airflow/providers/google/cloud/operators/dataform.py +15 -5
  112. airflow/providers/google/cloud/operators/datafusion.py +42 -21
  113. airflow/providers/google/cloud/operators/dataplex.py +194 -110
  114. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  115. airflow/providers/google/cloud/operators/dataproc.py +80 -36
  116. airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
  117. airflow/providers/google/cloud/operators/datastore.py +23 -7
  118. airflow/providers/google/cloud/operators/dlp.py +6 -29
  119. airflow/providers/google/cloud/operators/functions.py +17 -8
  120. airflow/providers/google/cloud/operators/gcs.py +12 -9
  121. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  122. airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
  123. airflow/providers/google/cloud/operators/looker.py +2 -2
  124. airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
  125. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  126. airflow/providers/google/cloud/operators/pubsub.py +68 -15
  127. airflow/providers/google/cloud/operators/spanner.py +26 -13
  128. airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
  129. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  130. airflow/providers/google/cloud/operators/tasks.py +1 -12
  131. airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
  132. airflow/providers/google/cloud/operators/translate.py +41 -17
  133. airflow/providers/google/cloud/operators/translate_speech.py +2 -3
  134. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  135. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
  136. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
  137. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  138. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  139. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  140. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  141. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
  142. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
  143. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  144. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
  145. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  146. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  147. airflow/providers/google/cloud/operators/vision.py +2 -2
  148. airflow/providers/google/cloud/operators/workflows.py +18 -15
  149. airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
  150. airflow/providers/google/cloud/sensors/bigquery.py +3 -3
  151. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
  152. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  153. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
  154. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
  155. airflow/providers/google/cloud/sensors/dataflow.py +26 -10
  156. airflow/providers/google/cloud/sensors/dataform.py +2 -3
  157. airflow/providers/google/cloud/sensors/datafusion.py +4 -5
  158. airflow/providers/google/cloud/sensors/dataplex.py +2 -3
  159. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  160. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  161. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
  162. airflow/providers/google/cloud/sensors/gcs.py +4 -5
  163. airflow/providers/google/cloud/sensors/looker.py +2 -3
  164. airflow/providers/google/cloud/sensors/pubsub.py +4 -5
  165. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  166. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
  167. airflow/providers/google/cloud/sensors/workflows.py +2 -3
  168. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  169. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  170. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
  171. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  172. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
  173. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  174. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  175. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  176. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  177. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  178. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
  179. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  180. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
  181. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
  182. airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
  183. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
  184. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  185. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  186. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  187. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  188. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  189. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  190. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  191. airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
  192. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  193. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
  194. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  195. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  196. airflow/providers/google/cloud/triggers/bigquery.py +76 -35
  197. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  198. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  199. airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
  200. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
  201. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  202. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  203. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  204. airflow/providers/google/cloud/triggers/dataproc.py +123 -53
  205. airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
  206. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  207. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  208. airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
  209. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  210. airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
  211. airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
  212. airflow/providers/google/cloud/utils/field_validator.py +2 -3
  213. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  214. airflow/providers/google/common/deprecated.py +2 -1
  215. airflow/providers/google/common/hooks/base_google.py +27 -9
  216. airflow/providers/google/common/hooks/operation_helpers.py +1 -1
  217. airflow/providers/google/common/links/storage.py +0 -22
  218. airflow/providers/google/common/utils/get_secret.py +31 -0
  219. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  220. airflow/providers/google/firebase/hooks/firestore.py +1 -1
  221. airflow/providers/google/firebase/operators/firestore.py +3 -3
  222. airflow/providers/google/get_provider_info.py +56 -52
  223. airflow/providers/google/go_module_utils.py +35 -3
  224. airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
  225. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  226. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
  227. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  228. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  229. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  230. airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
  231. airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
  232. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  233. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  234. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  235. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
  236. airflow/providers/google/suite/hooks/calendar.py +2 -2
  237. airflow/providers/google/suite/hooks/sheets.py +16 -2
  238. airflow/providers/google/suite/operators/sheets.py +8 -3
  239. airflow/providers/google/suite/sensors/drive.py +2 -2
  240. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
  241. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  242. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  243. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  244. airflow/providers/google/version_compat.py +15 -1
  245. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
  246. apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
  247. apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
  248. airflow/providers/google/cloud/hooks/automl.py +0 -673
  249. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  250. airflow/providers/google/cloud/links/automl.py +0 -193
  251. airflow/providers/google/cloud/operators/automl.py +0 -1362
  252. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  253. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  254. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  255. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
  256. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
  257. {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
@@ -28,7 +28,7 @@ from typing import TYPE_CHECKING, Any
28
28
  from googleapiclient.errors import HttpError
29
29
 
30
30
  from airflow.configuration import conf
31
- from airflow.exceptions import AirflowException
31
+ from airflow.providers.common.compat.sdk import AirflowException
32
32
  from airflow.providers.google.cloud.hooks.dataflow import (
33
33
  DEFAULT_DATAFLOW_LOCATION,
34
34
  DataflowHook,
@@ -43,7 +43,7 @@ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_MET
43
43
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
44
44
 
45
45
  if TYPE_CHECKING:
46
- from airflow.utils.context import Context
46
+ from airflow.providers.common.compat.sdk import Context
47
47
 
48
48
 
49
49
  class CheckJobRunning(Enum):
@@ -383,7 +383,12 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
383
383
  def execute(self, context: Context):
384
384
  def set_current_job(current_job):
385
385
  self.job = current_job
386
- DataflowJobLink.persist(self, context, self.project_id, self.location, self.job.get("id"))
386
+ DataflowJobLink.persist(
387
+ context=context,
388
+ project_id=self.project_id,
389
+ region=self.location,
390
+ job_id=self.job.get("id"),
391
+ )
387
392
 
388
393
  options = self.dataflow_default_options
389
394
  options.update(self.options)
@@ -404,7 +409,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
404
409
  append_job_name=self.append_job_name,
405
410
  )
406
411
  job_id = self.hook.extract_job_id(self.job)
407
- self.xcom_push(context, key="job_id", value=job_id)
412
+ context["task_instance"].xcom_push(key="job_id", value=job_id)
408
413
  return job_id
409
414
 
410
415
  self.job = self.hook.launch_job_with_template(
@@ -418,7 +423,9 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
418
423
  environment=self.environment,
419
424
  )
420
425
  job_id = self.hook.extract_job_id(self.job)
421
- DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
426
+ DataflowJobLink.persist(
427
+ context=context, project_id=self.project_id, region=self.location, job_id=job_id
428
+ )
422
429
  self.defer(
423
430
  trigger=TemplateJobStartTrigger(
424
431
  project_id=self.project_id,
@@ -439,7 +446,7 @@ class DataflowTemplatedJobStartOperator(GoogleCloudBaseOperator):
439
446
  raise AirflowException(event["message"])
440
447
 
441
448
  job_id = event["job_id"]
442
- self.xcom_push(context, key="job_id", value=job_id)
449
+ context["task_instance"].xcom_push(key="job_id", value=job_id)
443
450
  self.log.info("Task %s completed with response %s", self.task_id, event["message"])
444
451
  return job_id
445
452
 
@@ -576,6 +583,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
576
583
  def hook(self) -> DataflowHook:
577
584
  hook = DataflowHook(
578
585
  gcp_conn_id=self.gcp_conn_id,
586
+ poll_sleep=self.poll_sleep,
579
587
  drain_pipeline=self.drain_pipeline,
580
588
  cancel_timeout=self.cancel_timeout,
581
589
  wait_until_finished=self.wait_until_finished,
@@ -590,7 +598,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
590
598
 
591
599
  def set_current_job(current_job):
592
600
  self.job = current_job
593
- DataflowJobLink.persist(self, context, self.project_id, self.location, self.job.get("id"))
601
+ DataflowJobLink.persist(
602
+ context=context, project_id=self.project_id, region=self.location, job_id=self.job.get("id")
603
+ )
594
604
 
595
605
  if not self.deferrable:
596
606
  self.job = self.hook.start_flex_template(
@@ -600,7 +610,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
600
610
  on_new_job_callback=set_current_job,
601
611
  )
602
612
  job_id = self.hook.extract_job_id(self.job)
603
- self.xcom_push(context, key="job_id", value=job_id)
613
+ context["task_instance"].xcom_push(key="job_id", value=job_id)
604
614
  return self.job
605
615
 
606
616
  self.job = self.hook.launch_job_with_flex_template(
@@ -609,7 +619,9 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
609
619
  project_id=self.project_id,
610
620
  )
611
621
  job_id = self.hook.extract_job_id(self.job)
612
- DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
622
+ DataflowJobLink.persist(
623
+ context=context, project_id=self.project_id, region=self.location, job_id=job_id
624
+ )
613
625
  self.defer(
614
626
  trigger=TemplateJobStartTrigger(
615
627
  project_id=self.project_id,
@@ -639,7 +651,7 @@ class DataflowStartFlexTemplateOperator(GoogleCloudBaseOperator):
639
651
 
640
652
  job_id = event["job_id"]
641
653
  self.log.info("Task %s completed with response %s", job_id, event["message"])
642
- self.xcom_push(context, key="job_id", value=job_id)
654
+ context["task_instance"].xcom_push(key="job_id", value=job_id)
643
655
  job = self.hook.get_job(job_id=job_id, project_id=self.project_id, location=self.location)
644
656
  return job
645
657
 
@@ -764,7 +776,9 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
764
776
  location=self.region,
765
777
  )
766
778
 
767
- DataflowJobLink.persist(self, context, self.project_id, self.region, self.job_id)
779
+ DataflowJobLink.persist(
780
+ context=context, project_id=self.project_id, region=self.region, job_id=self.job_id
781
+ )
768
782
 
769
783
  if self.deferrable:
770
784
  self.defer(
@@ -794,7 +808,7 @@ class DataflowStartYamlJobOperator(GoogleCloudBaseOperator):
794
808
  raise AirflowException(event["message"])
795
809
  job = event["job"]
796
810
  self.log.info("Job %s completed with response %s", job["id"], event["message"])
797
- self.xcom_push(context, key="job_id", value=job["id"])
811
+ context["task_instance"].xcom_push(key="job_id", value=job["id"])
798
812
 
799
813
  return job
800
814
 
@@ -971,6 +985,14 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
971
985
 
972
986
  self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
973
987
 
988
+ @property
989
+ def extra_links_params(self) -> dict[str, Any]:
990
+ return {
991
+ "project_id": self.project_id,
992
+ "location": self.location,
993
+ "pipeline_name": self.pipeline_name,
994
+ }
995
+
974
996
  def execute(self, context: Context):
975
997
  if self.body is None:
976
998
  raise AirflowException(
@@ -1003,8 +1025,8 @@ class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
1003
1025
  pipeline_name=self.pipeline_name,
1004
1026
  location=self.location,
1005
1027
  )
1006
- DataflowPipelineLink.persist(self, context, self.project_id, self.location, self.pipeline_name)
1007
- self.xcom_push(context, key="pipeline_name", value=self.pipeline_name)
1028
+ DataflowPipelineLink.persist(context=context)
1029
+ context["task_instance"].xcom_push(key="pipeline_name", value=self.pipeline_name)
1008
1030
  if self.pipeline:
1009
1031
  if "error" in self.pipeline:
1010
1032
  raise AirflowException(self.pipeline.get("error").get("message"))
@@ -1075,8 +1097,10 @@ class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
1075
1097
  location=self.location,
1076
1098
  )["job"]
1077
1099
  job_id = self.dataflow_hook.extract_job_id(self.job)
1078
- self.xcom_push(context, key="job_id", value=job_id)
1079
- DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
1100
+ context["task_instance"].xcom_push(key="job_id", value=job_id)
1101
+ DataflowJobLink.persist(
1102
+ context=context, project_id=self.project_id, region=self.location, job_id=job_id
1103
+ )
1080
1104
  except HttpError as e:
1081
1105
  if e.resp.status == 404:
1082
1106
  raise AirflowException("Pipeline with given name was not found.")
@@ -28,7 +28,7 @@ from airflow.providers.google.cloud.links.dataform import (
28
28
  if TYPE_CHECKING:
29
29
  from google.api_core.retry import Retry
30
30
 
31
- from airflow.utils.context import Context
31
+ from airflow.providers.common.compat.sdk import Context
32
32
 
33
33
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
34
34
  from google.cloud.dataform_v1beta1.types import (
@@ -258,7 +258,6 @@ class DataformCreateWorkflowInvocationOperator(GoogleCloudBaseOperator):
258
258
  )
259
259
  workflow_invocation_id = result.name.split("/")[-1]
260
260
  DataformWorkflowInvocationLink.persist(
261
- operator_instance=self,
262
261
  context=context,
263
262
  project_id=self.project_id,
264
263
  region=self.region,
@@ -347,6 +346,13 @@ class DataformGetWorkflowInvocationOperator(GoogleCloudBaseOperator):
347
346
  timeout=self.timeout,
348
347
  metadata=self.metadata,
349
348
  )
349
+ DataformWorkflowInvocationLink.persist(
350
+ context=context,
351
+ project_id=self.project_id,
352
+ region=self.region,
353
+ repository_id=self.repository_id,
354
+ workflow_invocation_id=self.workflow_invocation_id,
355
+ )
350
356
  return WorkflowInvocation.to_dict(result)
351
357
 
352
358
 
@@ -412,7 +418,6 @@ class DataformQueryWorkflowInvocationActionsOperator(GoogleCloudBaseOperator):
412
418
  impersonation_chain=self.impersonation_chain,
413
419
  )
414
420
  DataformWorkflowInvocationLink.persist(
415
- operator_instance=self,
416
421
  context=context,
417
422
  project_id=self.project_id,
418
423
  region=self.region,
@@ -494,6 +499,13 @@ class DataformCancelWorkflowInvocationOperator(GoogleCloudBaseOperator):
494
499
  gcp_conn_id=self.gcp_conn_id,
495
500
  impersonation_chain=self.impersonation_chain,
496
501
  )
502
+ DataformWorkflowInvocationLink.persist(
503
+ context=context,
504
+ project_id=self.project_id,
505
+ region=self.region,
506
+ repository_id=self.repository_id,
507
+ workflow_invocation_id=self.workflow_invocation_id,
508
+ )
497
509
  hook.cancel_workflow_invocation(
498
510
  project_id=self.project_id,
499
511
  region=self.region,
@@ -576,7 +588,6 @@ class DataformCreateRepositoryOperator(GoogleCloudBaseOperator):
576
588
  )
577
589
 
578
590
  DataformRepositoryLink.persist(
579
- operator_instance=self,
580
591
  context=context,
581
592
  project_id=self.project_id,
582
593
  region=self.region,
@@ -735,7 +746,6 @@ class DataformCreateWorkspaceOperator(GoogleCloudBaseOperator):
735
746
  )
736
747
 
737
748
  DataformWorkspaceLink.persist(
738
- operator_instance=self,
739
749
  context=context,
740
750
  project_id=self.project_id,
741
751
  region=self.region,
@@ -26,7 +26,7 @@ from google.api_core.retry import exponential_sleep_generator
26
26
  from googleapiclient.errors import HttpError
27
27
 
28
28
  from airflow.configuration import conf
29
- from airflow.exceptions import AirflowException
29
+ from airflow.providers.common.compat.sdk import AirflowException
30
30
  from airflow.providers.google.cloud.hooks.datafusion import SUCCESS_STATES, DataFusionHook, PipelineStates
31
31
  from airflow.providers.google.cloud.links.datafusion import (
32
32
  DataFusionInstanceLink,
@@ -40,7 +40,8 @@ from airflow.providers.google.cloud.utils.helpers import resource_path_to_dict
40
40
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
41
41
 
42
42
  if TYPE_CHECKING:
43
- from airflow.utils.context import Context
43
+ from airflow.providers.common.compat.sdk import Context
44
+ from airflow.providers.openlineage.extractors import OperatorLineage
44
45
 
45
46
 
46
47
  class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
@@ -111,10 +112,9 @@ class CloudDataFusionRestartInstanceOperator(GoogleCloudBaseOperator):
111
112
  project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
112
113
  DataFusionInstanceLink.persist(
113
114
  context=context,
114
- task_instance=self,
115
115
  project_id=project_id,
116
116
  instance_name=self.instance_name,
117
- location=self.location,
117
+ region=self.location,
118
118
  )
119
119
 
120
120
 
@@ -269,10 +269,9 @@ class CloudDataFusionCreateInstanceOperator(GoogleCloudBaseOperator):
269
269
  project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
270
270
  DataFusionInstanceLink.persist(
271
271
  context=context,
272
- task_instance=self,
273
272
  project_id=project_id,
274
273
  instance_name=self.instance_name,
275
- location=self.location,
274
+ region=self.location,
276
275
  )
277
276
  return instance
278
277
 
@@ -358,10 +357,9 @@ class CloudDataFusionUpdateInstanceOperator(GoogleCloudBaseOperator):
358
357
  project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
359
358
  DataFusionInstanceLink.persist(
360
359
  context=context,
361
- task_instance=self,
362
360
  project_id=project_id,
363
361
  instance_name=self.instance_name,
364
- location=self.location,
362
+ region=self.location,
365
363
  )
366
364
 
367
365
 
@@ -429,10 +427,9 @@ class CloudDataFusionGetInstanceOperator(GoogleCloudBaseOperator):
429
427
  project_id = resource_path_to_dict(resource_name=instance["name"])["projects"]
430
428
  DataFusionInstanceLink.persist(
431
429
  context=context,
432
- task_instance=self,
433
430
  project_id=project_id,
434
431
  instance_name=self.instance_name,
435
- location=self.location,
432
+ region=self.location,
436
433
  )
437
434
  return instance
438
435
 
@@ -519,7 +516,6 @@ class CloudDataFusionCreatePipelineOperator(GoogleCloudBaseOperator):
519
516
  )
520
517
  DataFusionPipelineLink.persist(
521
518
  context=context,
522
- task_instance=self,
523
519
  uri=instance["serviceEndpoint"],
524
520
  pipeline_name=self.pipeline_name,
525
521
  namespace=self.namespace,
@@ -693,7 +689,6 @@ class CloudDataFusionListPipelinesOperator(GoogleCloudBaseOperator):
693
689
 
694
690
  DataFusionPipelinesLink.persist(
695
691
  context=context,
696
- task_instance=self,
697
692
  uri=service_endpoint,
698
693
  namespace=self.namespace,
699
694
  )
@@ -783,6 +778,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
783
778
  self.pipeline_timeout = pipeline_timeout
784
779
  self.deferrable = deferrable
785
780
  self.poll_interval = poll_interval
781
+ self.pipeline_id: str | None = None
786
782
 
787
783
  if success_states:
788
784
  self.success_states = success_states
@@ -802,18 +798,17 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
802
798
  project_id=self.project_id,
803
799
  )
804
800
  api_url = instance["apiEndpoint"]
805
- pipeline_id = hook.start_pipeline(
801
+ self.pipeline_id = hook.start_pipeline(
806
802
  pipeline_name=self.pipeline_name,
807
803
  pipeline_type=self.pipeline_type,
808
804
  instance_url=api_url,
809
805
  namespace=self.namespace,
810
806
  runtime_args=self.runtime_args,
811
807
  )
812
- self.log.info("Pipeline %s submitted successfully.", pipeline_id)
808
+ self.log.info("Pipeline %s submitted successfully.", self.pipeline_id)
813
809
 
814
810
  DataFusionPipelineLink.persist(
815
811
  context=context,
816
- task_instance=self,
817
812
  uri=instance["serviceEndpoint"],
818
813
  pipeline_name=self.pipeline_name,
819
814
  namespace=self.namespace,
@@ -831,7 +826,7 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
831
826
  namespace=self.namespace,
832
827
  pipeline_name=self.pipeline_name,
833
828
  pipeline_type=self.pipeline_type.value,
834
- pipeline_id=pipeline_id,
829
+ pipeline_id=self.pipeline_id,
835
830
  poll_interval=self.poll_interval,
836
831
  gcp_conn_id=self.gcp_conn_id,
837
832
  impersonation_chain=self.impersonation_chain,
@@ -841,19 +836,21 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
841
836
  else:
842
837
  if not self.asynchronous:
843
838
  # when NOT using asynchronous mode it will just wait for pipeline to finish and print message
844
- self.log.info("Waiting when pipeline %s will be in one of the success states", pipeline_id)
839
+ self.log.info(
840
+ "Waiting when pipeline %s will be in one of the success states", self.pipeline_id
841
+ )
845
842
  hook.wait_for_pipeline_state(
846
843
  success_states=self.success_states,
847
- pipeline_id=pipeline_id,
844
+ pipeline_id=self.pipeline_id,
848
845
  pipeline_name=self.pipeline_name,
849
846
  pipeline_type=self.pipeline_type,
850
847
  namespace=self.namespace,
851
848
  instance_url=api_url,
852
849
  timeout=self.pipeline_timeout,
853
850
  )
854
- self.log.info("Pipeline %s discovered success state.", pipeline_id)
851
+ self.log.info("Pipeline %s discovered success state.", self.pipeline_id)
855
852
  # otherwise, return pipeline_id so that sensor can use it later to check the pipeline state
856
- return pipeline_id
853
+ return self.pipeline_id
857
854
 
858
855
  def execute_complete(self, context: Context, event: dict[str, Any]):
859
856
  """
@@ -870,6 +867,31 @@ class CloudDataFusionStartPipelineOperator(GoogleCloudBaseOperator):
870
867
  )
871
868
  return event["pipeline_id"]
872
869
 
870
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
871
+ """Build and return OpenLineage facets and datasets for the completed pipeline start."""
872
+ from airflow.providers.common.compat.openlineage.facet import Dataset
873
+ from airflow.providers.google.cloud.openlineage.facets import DataFusionRunFacet
874
+ from airflow.providers.openlineage.extractors import OperatorLineage
875
+
876
+ pipeline_resource = f"{self.project_id}:{self.location}:{self.instance_name}:{self.pipeline_name}"
877
+
878
+ inputs = [Dataset(namespace="datafusion", name=pipeline_resource)]
879
+
880
+ if self.pipeline_id:
881
+ output_name = f"{pipeline_resource}:{self.pipeline_id}"
882
+ else:
883
+ output_name = f"{pipeline_resource}:unknown"
884
+ outputs = [Dataset(namespace="datafusion", name=output_name)]
885
+
886
+ run_facets = {
887
+ "dataFusionRun": DataFusionRunFacet(
888
+ runId=self.pipeline_id,
889
+ runtimeArgs=self.runtime_args,
890
+ )
891
+ }
892
+
893
+ return OperatorLineage(inputs=inputs, outputs=outputs, run_facets=run_facets, job_facets={})
894
+
873
895
 
874
896
  class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
875
897
  """
@@ -943,7 +965,6 @@ class CloudDataFusionStopPipelineOperator(GoogleCloudBaseOperator):
943
965
 
944
966
  DataFusionPipelineLink.persist(
945
967
  context=context,
946
- task_instance=self,
947
968
  uri=instance["serviceEndpoint"],
948
969
  pipeline_name=self.pipeline_name,
949
970
  namespace=self.namespace,