apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -21,25 +21,28 @@ from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
23
  from functools import cached_property
24
- from typing import TYPE_CHECKING
24
+ from typing import TYPE_CHECKING, Any
25
+
26
+ from google.api_core.exceptions import AlreadyExists, NotFound
27
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
+ from google.cloud.managedkafka_v1 import types
25
29
 
26
30
  from airflow.exceptions import AirflowException
27
31
  from airflow.providers.google.cloud.hooks.managed_kafka import ManagedKafkaHook
28
32
  from airflow.providers.google.cloud.links.managed_kafka import (
29
33
  ApacheKafkaClusterLink,
30
34
  ApacheKafkaClusterListLink,
35
+ ApacheKafkaConsumerGroupLink,
31
36
  ApacheKafkaTopicLink,
32
37
  )
33
38
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
34
- from google.api_core.exceptions import AlreadyExists, NotFound
35
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
36
- from google.cloud.managedkafka_v1 import types
37
39
 
38
40
  if TYPE_CHECKING:
39
- from airflow.utils.context import Context
40
41
  from google.api_core.retry import Retry
41
42
  from google.protobuf.field_mask_pb2 import FieldMask
42
43
 
44
+ from airflow.providers.common.compat.sdk import Context
45
+
43
46
 
44
47
  class ManagedKafkaBaseOperator(GoogleCloudBaseOperator):
45
48
  """
@@ -147,9 +150,17 @@ class ManagedKafkaCreateClusterOperator(ManagedKafkaBaseOperator):
147
150
  self.cluster_id = cluster_id
148
151
  self.request_id = request_id
149
152
 
153
+ @property
154
+ def extra_links_params(self) -> dict[str, Any]:
155
+ return {
156
+ "location": self.location,
157
+ "cluster_id": self.cluster_id,
158
+ "project_id": self.project_id,
159
+ }
160
+
150
161
  def execute(self, context: Context):
151
162
  self.log.info("Creating an Apache Kafka cluster.")
152
- ApacheKafkaClusterLink.persist(context=context, task_instance=self, cluster_id=self.cluster_id)
163
+ ApacheKafkaClusterLink.persist(context=context)
153
164
  try:
154
165
  operation = self.hook.create_cluster(
155
166
  project_id=self.project_id,
@@ -224,8 +235,14 @@ class ManagedKafkaListClustersOperator(ManagedKafkaBaseOperator):
224
235
  self.filter = filter
225
236
  self.order_by = order_by
226
237
 
238
+ @property
239
+ def extra_links_params(self) -> dict[str, Any]:
240
+ return {
241
+ "project_id": self.project_id,
242
+ }
243
+
227
244
  def execute(self, context: Context):
228
- ApacheKafkaClusterListLink.persist(context=context, task_instance=self)
245
+ ApacheKafkaClusterListLink.persist(context=context)
229
246
  self.log.info("Listing Clusters from location %s.", self.location)
230
247
  try:
231
248
  cluster_list_pager = self.hook.list_clusters(
@@ -239,8 +256,7 @@ class ManagedKafkaListClustersOperator(ManagedKafkaBaseOperator):
239
256
  timeout=self.timeout,
240
257
  metadata=self.metadata,
241
258
  )
242
- self.xcom_push(
243
- context=context,
259
+ context["ti"].xcom_push(
244
260
  key="cluster_page",
245
261
  value=types.ListClustersResponse.to_dict(cluster_list_pager._response),
246
262
  )
@@ -282,12 +298,16 @@ class ManagedKafkaGetClusterOperator(ManagedKafkaBaseOperator):
282
298
  super().__init__(*args, **kwargs)
283
299
  self.cluster_id = cluster_id
284
300
 
301
+ @property
302
+ def extra_links_params(self) -> dict[str, Any]:
303
+ return {
304
+ "location": self.location,
305
+ "cluster_id": self.cluster_id,
306
+ "project_id": self.project_id,
307
+ }
308
+
285
309
  def execute(self, context: Context):
286
- ApacheKafkaClusterLink.persist(
287
- context=context,
288
- task_instance=self,
289
- cluster_id=self.cluster_id,
290
- )
310
+ ApacheKafkaClusterLink.persist(context=context)
291
311
  self.log.info("Getting Cluster: %s", self.cluster_id)
292
312
  try:
293
313
  cluster = self.hook.get_cluster(
@@ -359,12 +379,16 @@ class ManagedKafkaUpdateClusterOperator(ManagedKafkaBaseOperator):
359
379
  self.update_mask = update_mask
360
380
  self.request_id = request_id
361
381
 
382
+ @property
383
+ def extra_links_params(self) -> dict[str, Any]:
384
+ return {
385
+ "location": self.location,
386
+ "cluster_id": self.cluster_id,
387
+ "project_id": self.project_id,
388
+ }
389
+
362
390
  def execute(self, context: Context):
363
- ApacheKafkaClusterLink.persist(
364
- context=context,
365
- task_instance=self,
366
- cluster_id=self.cluster_id,
367
- )
391
+ ApacheKafkaClusterLink.persist(context=context)
368
392
  self.log.info("Updating an Apache Kafka cluster.")
369
393
  try:
370
394
  operation = self.hook.update_cluster(
@@ -494,14 +518,18 @@ class ManagedKafkaCreateTopicOperator(ManagedKafkaBaseOperator):
494
518
  self.topic_id = topic_id
495
519
  self.topic = topic
496
520
 
521
+ @property
522
+ def extra_links_params(self) -> dict[str, Any]:
523
+ return {
524
+ "location": self.location,
525
+ "cluster_id": self.cluster_id,
526
+ "topic_id": self.topic_id,
527
+ "project_id": self.project_id,
528
+ }
529
+
497
530
  def execute(self, context: Context):
498
531
  self.log.info("Creating an Apache Kafka topic.")
499
- ApacheKafkaTopicLink.persist(
500
- context=context,
501
- task_instance=self,
502
- cluster_id=self.cluster_id,
503
- topic_id=self.topic_id,
504
- )
532
+ ApacheKafkaTopicLink.persist(context=context)
505
533
  try:
506
534
  topic_obj = self.hook.create_topic(
507
535
  project_id=self.project_id,
@@ -571,8 +599,16 @@ class ManagedKafkaListTopicsOperator(ManagedKafkaBaseOperator):
571
599
  self.page_size = page_size
572
600
  self.page_token = page_token
573
601
 
602
+ @property
603
+ def extra_links_params(self) -> dict[str, Any]:
604
+ return {
605
+ "location": self.location,
606
+ "cluster_id": self.cluster_id,
607
+ "project_id": self.project_id,
608
+ }
609
+
574
610
  def execute(self, context: Context):
575
- ApacheKafkaClusterLink.persist(context=context, task_instance=self, cluster_id=self.cluster_id)
611
+ ApacheKafkaClusterLink.persist(context=context)
576
612
  self.log.info("Listing Topics for cluster %s.", self.cluster_id)
577
613
  try:
578
614
  topic_list_pager = self.hook.list_topics(
@@ -585,8 +621,7 @@ class ManagedKafkaListTopicsOperator(ManagedKafkaBaseOperator):
585
621
  timeout=self.timeout,
586
622
  metadata=self.metadata,
587
623
  )
588
- self.xcom_push(
589
- context=context,
624
+ context["ti"].xcom_push(
590
625
  key="topic_page",
591
626
  value=types.ListTopicsResponse.to_dict(topic_list_pager._response),
592
627
  )
@@ -633,13 +668,17 @@ class ManagedKafkaGetTopicOperator(ManagedKafkaBaseOperator):
633
668
  self.cluster_id = cluster_id
634
669
  self.topic_id = topic_id
635
670
 
671
+ @property
672
+ def extra_links_params(self) -> dict[str, Any]:
673
+ return {
674
+ "location": self.location,
675
+ "cluster_id": self.cluster_id,
676
+ "topic_id": self.topic_id,
677
+ "project_id": self.project_id,
678
+ }
679
+
636
680
  def execute(self, context: Context):
637
- ApacheKafkaTopicLink.persist(
638
- context=context,
639
- task_instance=self,
640
- cluster_id=self.cluster_id,
641
- topic_id=self.topic_id,
642
- )
681
+ ApacheKafkaTopicLink.persist(context=context)
643
682
  self.log.info("Getting Topic: %s", self.topic_id)
644
683
  try:
645
684
  topic = self.hook.get_topic(
@@ -704,13 +743,17 @@ class ManagedKafkaUpdateTopicOperator(ManagedKafkaBaseOperator):
704
743
  self.topic = topic
705
744
  self.update_mask = update_mask
706
745
 
746
+ @property
747
+ def extra_links_params(self) -> dict[str, Any]:
748
+ return {
749
+ "location": self.location,
750
+ "cluster_id": self.cluster_id,
751
+ "topic_id": self.topic_id,
752
+ "project_id": self.project_id,
753
+ }
754
+
707
755
  def execute(self, context: Context):
708
- ApacheKafkaTopicLink.persist(
709
- context=context,
710
- task_instance=self,
711
- cluster_id=self.cluster_id,
712
- topic_id=self.topic_id,
713
- )
756
+ ApacheKafkaTopicLink.persist(context=context)
714
757
  self.log.info("Updating an Apache Kafka topic.")
715
758
  try:
716
759
  topic_obj = self.hook.update_topic(
@@ -786,3 +829,282 @@ class ManagedKafkaDeleteTopicOperator(ManagedKafkaBaseOperator):
786
829
  except NotFound as not_found_err:
787
830
  self.log.info("The Apache Kafka topic ID %s does not exist.", self.topic_id)
788
831
  raise AirflowException(not_found_err)
832
+
833
+
834
+ class ManagedKafkaListConsumerGroupsOperator(ManagedKafkaBaseOperator):
835
+ """
836
+ List the consumer groups in a given cluster.
837
+
838
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
839
+ :param location: Required. The ID of the Google Cloud region that the service belongs to.
840
+ :param cluster_id: Required. The ID of the cluster whose consumer groups are to be listed.
841
+ :param page_size: Optional. The maximum number of consumer groups to return. The service may return
842
+ fewer than this value. If unset or zero, all consumer groups for the parent is returned.
843
+ :param page_token: Optional. A page token, received from a previous ``ListConsumerGroups`` call.
844
+ Provide this to retrieve the subsequent page. When paginating, all other parameters provided to
845
+ ``ListConsumerGroups`` must match the call that provided the page token.
846
+ :param retry: Designation of what errors, if any, should be retried.
847
+ :param timeout: The timeout for this request.
848
+ :param metadata: Strings which should be sent along with the request as metadata.
849
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
850
+ :param impersonation_chain: Optional service account to impersonate using short-term
851
+ credentials, or chained list of accounts required to get the access_token
852
+ of the last account in the list, which will be impersonated in the request.
853
+ If set as a string, the account must grant the originating account
854
+ the Service Account Token Creator IAM role.
855
+ If set as a sequence, the identities from the list must grant
856
+ Service Account Token Creator IAM role to the directly preceding identity, with first
857
+ account from the list granting this role to the originating account (templated).
858
+ """
859
+
860
+ template_fields: Sequence[str] = tuple({"cluster_id"} | set(ManagedKafkaBaseOperator.template_fields))
861
+ operator_extra_links = (ApacheKafkaClusterLink(),)
862
+
863
+ def __init__(
864
+ self,
865
+ cluster_id: str,
866
+ page_size: int | None = None,
867
+ page_token: str | None = None,
868
+ *args,
869
+ **kwargs,
870
+ ) -> None:
871
+ super().__init__(*args, **kwargs)
872
+ self.cluster_id = cluster_id
873
+ self.page_size = page_size
874
+ self.page_token = page_token
875
+
876
+ @property
877
+ def extra_links_params(self) -> dict[str, Any]:
878
+ return {
879
+ "location": self.location,
880
+ "cluster_id": self.cluster_id,
881
+ "project_id": self.project_id,
882
+ }
883
+
884
+ def execute(self, context: Context):
885
+ ApacheKafkaClusterLink.persist(context=context)
886
+ self.log.info("Listing Consumer Groups for cluster %s.", self.cluster_id)
887
+ try:
888
+ consumer_group_list_pager = self.hook.list_consumer_groups(
889
+ project_id=self.project_id,
890
+ location=self.location,
891
+ cluster_id=self.cluster_id,
892
+ page_size=self.page_size,
893
+ page_token=self.page_token,
894
+ retry=self.retry,
895
+ timeout=self.timeout,
896
+ metadata=self.metadata,
897
+ )
898
+ context["ti"].xcom_push(
899
+ key="consumer_group_page",
900
+ value=types.ListConsumerGroupsResponse.to_dict(consumer_group_list_pager._response),
901
+ )
902
+ except Exception as error:
903
+ raise AirflowException(error)
904
+ return [types.ConsumerGroup.to_dict(consumer_group) for consumer_group in consumer_group_list_pager]
905
+
906
+
907
+ class ManagedKafkaGetConsumerGroupOperator(ManagedKafkaBaseOperator):
908
+ """
909
+ Return the properties of a single consumer group.
910
+
911
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
912
+ :param location: Required. The ID of the Google Cloud region that the service belongs to.
913
+ :param cluster_id: Required. The ID of the cluster whose consumer group is to be returned.
914
+ :param consumer_group_id: Required. The ID of the consumer group whose configuration to return.
915
+ :param retry: Designation of what errors, if any, should be retried.
916
+ :param timeout: The timeout for this request.
917
+ :param metadata: Strings which should be sent along with the request as metadata.
918
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
919
+ :param impersonation_chain: Optional service account to impersonate using short-term
920
+ credentials, or chained list of accounts required to get the access_token
921
+ of the last account in the list, which will be impersonated in the request.
922
+ If set as a string, the account must grant the originating account
923
+ the Service Account Token Creator IAM role.
924
+ If set as a sequence, the identities from the list must grant
925
+ Service Account Token Creator IAM role to the directly preceding identity, with first
926
+ account from the list granting this role to the originating account (templated).
927
+ """
928
+
929
+ template_fields: Sequence[str] = tuple(
930
+ {"cluster_id", "consumer_group_id"} | set(ManagedKafkaBaseOperator.template_fields)
931
+ )
932
+ operator_extra_links = (ApacheKafkaConsumerGroupLink(),)
933
+
934
+ def __init__(
935
+ self,
936
+ cluster_id: str,
937
+ consumer_group_id: str,
938
+ *args,
939
+ **kwargs,
940
+ ) -> None:
941
+ super().__init__(*args, **kwargs)
942
+ self.cluster_id = cluster_id
943
+ self.consumer_group_id = consumer_group_id
944
+
945
+ @property
946
+ def extra_links_params(self) -> dict[str, Any]:
947
+ return {
948
+ "location": self.location,
949
+ "cluster_id": self.cluster_id,
950
+ "consumer_group_id": self.consumer_group_id,
951
+ "project_id": self.project_id,
952
+ }
953
+
954
+ def execute(self, context: Context):
955
+ ApacheKafkaConsumerGroupLink.persist(context=context)
956
+ self.log.info("Getting Consumer Group: %s", self.consumer_group_id)
957
+ try:
958
+ consumer_group = self.hook.get_consumer_group(
959
+ project_id=self.project_id,
960
+ location=self.location,
961
+ cluster_id=self.cluster_id,
962
+ consumer_group_id=self.consumer_group_id,
963
+ retry=self.retry,
964
+ timeout=self.timeout,
965
+ metadata=self.metadata,
966
+ )
967
+ self.log.info(
968
+ "The consumer group %s from cluster %s was retrieved.",
969
+ self.consumer_group_id,
970
+ self.cluster_id,
971
+ )
972
+ return types.ConsumerGroup.to_dict(consumer_group)
973
+ except NotFound as not_found_err:
974
+ self.log.info("The Consumer Group %s does not exist.", self.consumer_group_id)
975
+ raise AirflowException(not_found_err)
976
+
977
+
978
+ class ManagedKafkaUpdateConsumerGroupOperator(ManagedKafkaBaseOperator):
979
+ """
980
+ Update the properties of a single consumer group.
981
+
982
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
983
+ :param location: Required. The ID of the Google Cloud region that the service belongs to.
984
+ :param cluster_id: Required. The ID of the cluster whose topic is to be updated.
985
+ :param consumer_group_id: Required. The ID of the consumer group whose configuration to update.
986
+ :param consumer_group: Required. The consumer_group to update. Its ``name`` field must be populated.
987
+ :param update_mask: Required. Field mask is used to specify the fields to be overwritten in the
988
+ ConsumerGroup resource by the update. The fields specified in the update_mask are relative to the
989
+ resource, not the full request. A field will be overwritten if it is in the mask.
990
+ :param retry: Designation of what errors, if any, should be retried.
991
+ :param timeout: The timeout for this request.
992
+ :param metadata: Strings which should be sent along with the request as metadata.
993
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
994
+ :param impersonation_chain: Optional service account to impersonate using short-term
995
+ credentials, or chained list of accounts required to get the access_token
996
+ of the last account in the list, which will be impersonated in the request.
997
+ If set as a string, the account must grant the originating account
998
+ the Service Account Token Creator IAM role.
999
+ If set as a sequence, the identities from the list must grant
1000
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1001
+ account from the list granting this role to the originating account (templated).
1002
+ """
1003
+
1004
+ template_fields: Sequence[str] = tuple(
1005
+ {"cluster_id", "consumer_group_id", "consumer_group", "update_mask"}
1006
+ | set(ManagedKafkaBaseOperator.template_fields)
1007
+ )
1008
+ operator_extra_links = (ApacheKafkaConsumerGroupLink(),)
1009
+
1010
+ def __init__(
1011
+ self,
1012
+ cluster_id: str,
1013
+ consumer_group_id: str,
1014
+ consumer_group: types.Topic | dict,
1015
+ update_mask: FieldMask | dict,
1016
+ *args,
1017
+ **kwargs,
1018
+ ) -> None:
1019
+ super().__init__(*args, **kwargs)
1020
+ self.cluster_id = cluster_id
1021
+ self.consumer_group_id = consumer_group_id
1022
+ self.consumer_group = consumer_group
1023
+ self.update_mask = update_mask
1024
+
1025
+ @property
1026
+ def extra_links_params(self) -> dict[str, Any]:
1027
+ return {
1028
+ "location": self.location,
1029
+ "cluster_id": self.cluster_id,
1030
+ "consumer_group_id": self.consumer_group_id,
1031
+ "project_id": self.project_id,
1032
+ }
1033
+
1034
+ def execute(self, context: Context):
1035
+ ApacheKafkaConsumerGroupLink.persist(context=context)
1036
+ self.log.info("Updating an Apache Kafka consumer group.")
1037
+ try:
1038
+ consumer_group_obj = self.hook.update_consumer_group(
1039
+ project_id=self.project_id,
1040
+ location=self.location,
1041
+ cluster_id=self.cluster_id,
1042
+ consumer_group_id=self.consumer_group_id,
1043
+ consumer_group=self.consumer_group,
1044
+ update_mask=self.update_mask,
1045
+ retry=self.retry,
1046
+ timeout=self.timeout,
1047
+ metadata=self.metadata,
1048
+ )
1049
+ self.log.info("Apache Kafka consumer group %s was updated.", self.consumer_group_id)
1050
+ return types.ConsumerGroup.to_dict(consumer_group_obj)
1051
+ except NotFound as not_found_err:
1052
+ self.log.info("The Consumer Group %s does not exist.", self.consumer_group_id)
1053
+ raise AirflowException(not_found_err)
1054
+ except Exception as error:
1055
+ raise AirflowException(error)
1056
+
1057
+
1058
+ class ManagedKafkaDeleteConsumerGroupOperator(ManagedKafkaBaseOperator):
1059
+ """
1060
+ Delete a single consumer group.
1061
+
1062
+ :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
1063
+ :param location: Required. The ID of the Google Cloud region that the service belongs to.
1064
+ :param cluster_id: Required. The ID of the cluster whose consumer group is to be deleted.
1065
+ :param consumer_group_id: Required. The ID of the consumer group to delete.
1066
+ :param retry: Designation of what errors, if any, should be retried.
1067
+ :param timeout: The timeout for this request.
1068
+ :param metadata: Strings which should be sent along with the request as metadata.
1069
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
1070
+ :param impersonation_chain: Optional service account to impersonate using short-term
1071
+ credentials, or chained list of accounts required to get the access_token
1072
+ of the last account in the list, which will be impersonated in the request.
1073
+ If set as a string, the account must grant the originating account
1074
+ the Service Account Token Creator IAM role.
1075
+ If set as a sequence, the identities from the list must grant
1076
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1077
+ account from the list granting this role to the originating account (templated).
1078
+ """
1079
+
1080
+ template_fields: Sequence[str] = tuple(
1081
+ {"cluster_id", "consumer_group_id"} | set(ManagedKafkaBaseOperator.template_fields)
1082
+ )
1083
+
1084
+ def __init__(
1085
+ self,
1086
+ cluster_id: str,
1087
+ consumer_group_id: str,
1088
+ *args,
1089
+ **kwargs,
1090
+ ) -> None:
1091
+ super().__init__(*args, **kwargs)
1092
+ self.cluster_id = cluster_id
1093
+ self.consumer_group_id = consumer_group_id
1094
+
1095
+ def execute(self, context: Context):
1096
+ try:
1097
+ self.log.info("Deleting Apache Kafka consumer group: %s", self.consumer_group_id)
1098
+ self.hook.delete_consumer_group(
1099
+ project_id=self.project_id,
1100
+ location=self.location,
1101
+ cluster_id=self.cluster_id,
1102
+ consumer_group_id=self.consumer_group_id,
1103
+ retry=self.retry,
1104
+ timeout=self.timeout,
1105
+ metadata=self.metadata,
1106
+ )
1107
+ self.log.info("Apache Kafka consumer group was deleted.")
1108
+ except NotFound as not_found_err:
1109
+ self.log.info("The Apache Kafka consumer group ID %s does not exist.", self.consumer_group_id)
1110
+ raise AirflowException(not_found_err)
@@ -22,16 +22,18 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from airflow.providers.google.cloud.hooks.natural_language import CloudNaturalLanguageHook
26
- from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
27
25
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
26
  from google.protobuf.json_format import MessageToDict
29
27
 
28
+ from airflow.providers.google.cloud.hooks.natural_language import CloudNaturalLanguageHook
29
+ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
30
+
30
31
  if TYPE_CHECKING:
31
- from airflow.utils.context import Context
32
32
  from google.api_core.retry import Retry
33
33
  from google.cloud.language_v1.types import Document, EncodingType
34
34
 
35
+ from airflow.providers.common.compat.sdk import Context
36
+
35
37
 
36
38
  MetaData = Sequence[tuple[str, str]]
37
39
 
@@ -25,16 +25,10 @@ This module contains Google PubSub operators.
25
25
 
26
26
  from __future__ import annotations
27
27
 
28
- from collections.abc import Sequence
29
- from typing import TYPE_CHECKING, Any, Callable
28
+ from collections.abc import Callable, Sequence
29
+ from functools import cached_property
30
+ from typing import TYPE_CHECKING, Any
30
31
 
31
- from airflow.exceptions import AirflowException
32
- from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
33
- from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink, PubSubTopicLink
34
- from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
35
- from airflow.providers.google.cloud.triggers.pubsub import PubsubPullTrigger
36
- from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
37
- from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
38
32
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
39
33
  from google.cloud.pubsub_v1.types import (
40
34
  DeadLetterPolicy,
@@ -47,10 +41,21 @@ from google.cloud.pubsub_v1.types import (
47
41
  SchemaSettings,
48
42
  )
49
43
 
44
+ from airflow.configuration import conf
45
+ from airflow.exceptions import AirflowException
46
+ from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
47
+ from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink, PubSubTopicLink
48
+ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
49
+ from airflow.providers.google.cloud.triggers.pubsub import PubsubPullTrigger
50
+ from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
51
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
52
+
50
53
  if TYPE_CHECKING:
51
- from airflow.utils.context import Context
52
54
  from google.api_core.retry import Retry
53
55
 
56
+ from airflow.providers.common.compat.sdk import Context
57
+ from airflow.providers.openlineage.extractors import OperatorLineage
58
+
54
59
 
55
60
  class PubSubCreateTopicOperator(GoogleCloudBaseOperator):
56
61
  """
@@ -180,7 +185,6 @@ class PubSubCreateTopicOperator(GoogleCloudBaseOperator):
180
185
  self.log.info("Created topic %s", self.topic)
181
186
  PubSubTopicLink.persist(
182
187
  context=context,
183
- task_instance=self,
184
188
  topic_id=self.topic,
185
189
  project_id=self.project_id or hook.project_id,
186
190
  )
@@ -357,15 +361,18 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
357
361
  self.timeout = timeout
358
362
  self.metadata = metadata
359
363
  self.impersonation_chain = impersonation_chain
364
+ self._resolved_subscription_name: str | None = None
360
365
 
361
- def execute(self, context: Context) -> str:
362
- hook = PubSubHook(
366
+ @cached_property
367
+ def pubsub_hook(self):
368
+ return PubSubHook(
363
369
  gcp_conn_id=self.gcp_conn_id,
364
370
  impersonation_chain=self.impersonation_chain,
365
371
  )
366
372
 
373
+ def execute(self, context: Context) -> str:
367
374
  self.log.info("Creating subscription for topic %s", self.topic)
368
- result = hook.create_subscription(
375
+ result = self.pubsub_hook.create_subscription(
369
376
  project_id=self.project_id,
370
377
  topic=self.topic,
371
378
  subscription=self.subscription,
@@ -387,14 +394,34 @@ class PubSubCreateSubscriptionOperator(GoogleCloudBaseOperator):
387
394
  )
388
395
 
389
396
  self.log.info("Created subscription for topic %s", self.topic)
397
+
398
+ # Store resolved subscription for Open Lineage
399
+ self._resolved_subscription_name = self.subscription or result
400
+
390
401
  PubSubSubscriptionLink.persist(
391
402
  context=context,
392
- task_instance=self,
393
- subscription_id=self.subscription or result, # result returns subscription name
394
- project_id=self.project_id or hook.project_id,
403
+ subscription_id=self._resolved_subscription_name, # result returns subscription name
404
+ project_id=self.project_id or self.pubsub_hook.project_id,
395
405
  )
396
406
  return result
397
407
 
408
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
409
+ from airflow.providers.common.compat.openlineage.facet import Dataset
410
+ from airflow.providers.openlineage.extractors import OperatorLineage
411
+
412
+ topic_project_id = self.project_id or self.pubsub_hook.project_id
413
+ subscription_project_id = self.subscription_project_id or topic_project_id
414
+
415
+ return OperatorLineage(
416
+ inputs=[Dataset(namespace="pubsub", name=f"topic:{topic_project_id}:{self.topic}")],
417
+ outputs=[
418
+ Dataset(
419
+ namespace="pubsub",
420
+ name=f"subscription:{subscription_project_id}:{self._resolved_subscription_name}",
421
+ )
422
+ ],
423
+ )
424
+
398
425
 
399
426
  class PubSubDeleteTopicOperator(GoogleCloudBaseOperator):
400
427
  """
@@ -691,17 +718,28 @@ class PubSubPublishMessageOperator(GoogleCloudBaseOperator):
691
718
  self.enable_message_ordering = enable_message_ordering
692
719
  self.impersonation_chain = impersonation_chain
693
720
 
694
- def execute(self, context: Context) -> None:
695
- hook = PubSubHook(
721
+ @cached_property
722
+ def pubsub_hook(self):
723
+ return PubSubHook(
696
724
  gcp_conn_id=self.gcp_conn_id,
697
725
  impersonation_chain=self.impersonation_chain,
698
726
  enable_message_ordering=self.enable_message_ordering,
699
727
  )
700
728
 
729
+ def execute(self, context: Context) -> None:
701
730
  self.log.info("Publishing to topic %s", self.topic)
702
- hook.publish(project_id=self.project_id, topic=self.topic, messages=self.messages)
731
+ self.pubsub_hook.publish(project_id=self.project_id, topic=self.topic, messages=self.messages)
703
732
  self.log.info("Published to topic %s", self.topic)
704
733
 
734
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
735
+ from airflow.providers.common.compat.openlineage.facet import Dataset
736
+ from airflow.providers.openlineage.extractors import OperatorLineage
737
+
738
+ project_id = self.project_id or self.pubsub_hook.project_id
739
+ output_dataset = [Dataset(namespace="pubsub", name=f"topic:{project_id}:{self.topic}")]
740
+
741
+ return OperatorLineage(outputs=output_dataset)
742
+
705
743
 
706
744
  class PubSubPullOperator(GoogleCloudBaseOperator):
707
745
  """
@@ -768,7 +806,7 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
768
806
  messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
769
807
  gcp_conn_id: str = "google_cloud_default",
770
808
  impersonation_chain: str | Sequence[str] | None = None,
771
- deferrable: bool = False,
809
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
772
810
  poll_interval: int = 300,
773
811
  **kwargs,
774
812
  ) -> None:
@@ -852,3 +890,13 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
852
890
  messages_json = [ReceivedMessage.to_dict(m) for m in pulled_messages]
853
891
 
854
892
  return messages_json
893
+
894
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
895
+ from airflow.providers.common.compat.openlineage.facet import Dataset
896
+ from airflow.providers.openlineage.extractors import OperatorLineage
897
+
898
+ output_dataset = [
899
+ Dataset(namespace="pubsub", name=f"subscription:{self.project_id}:{self.subscription}")
900
+ ]
901
+
902
+ return OperatorLineage(outputs=output_dataset)