apache-airflow-providers-google 12.0.0rc1__py3-none-any.whl → 13.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. airflow/providers/google/LICENSE +0 -52
  2. airflow/providers/google/__init__.py +1 -1
  3. airflow/providers/google/ads/hooks/ads.py +27 -13
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
  5. airflow/providers/google/assets/bigquery.py +17 -0
  6. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
  7. airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
  8. airflow/providers/google/cloud/hooks/automl.py +10 -4
  9. airflow/providers/google/cloud/hooks/bigquery.py +125 -22
  10. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  11. airflow/providers/google/cloud/hooks/bigtable.py +2 -3
  12. airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
  13. airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
  14. airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
  15. airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
  16. airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
  17. airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
  18. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
  19. airflow/providers/google/cloud/hooks/compute.py +3 -3
  20. airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
  21. airflow/providers/google/cloud/hooks/dataflow.py +12 -12
  22. airflow/providers/google/cloud/hooks/dataform.py +2 -3
  23. airflow/providers/google/cloud/hooks/datafusion.py +2 -2
  24. airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
  25. airflow/providers/google/cloud/hooks/dataproc.py +4 -5
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
  27. airflow/providers/google/cloud/hooks/dlp.py +3 -4
  28. airflow/providers/google/cloud/hooks/gcs.py +7 -6
  29. airflow/providers/google/cloud/hooks/kms.py +2 -3
  30. airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
  31. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  32. airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -3
  34. airflow/providers/google/cloud/hooks/os_login.py +2 -3
  35. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  36. airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
  37. airflow/providers/google/cloud/hooks/spanner.py +2 -2
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
  39. airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
  40. airflow/providers/google/cloud/hooks/tasks.py +3 -4
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
  42. airflow/providers/google/cloud/hooks/translate.py +236 -5
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
  48. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
  49. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
  50. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
  51. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
  52. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
  53. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
  54. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
  55. airflow/providers/google/cloud/hooks/vision.py +3 -4
  56. airflow/providers/google/cloud/hooks/workflows.py +2 -3
  57. airflow/providers/google/cloud/links/alloy_db.py +46 -0
  58. airflow/providers/google/cloud/links/bigquery.py +25 -0
  59. airflow/providers/google/cloud/links/dataplex.py +172 -2
  60. airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
  61. airflow/providers/google/cloud/links/managed_kafka.py +104 -0
  62. airflow/providers/google/cloud/links/translate.py +28 -0
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
  65. airflow/providers/google/cloud/openlineage/facets.py +67 -0
  66. airflow/providers/google/cloud/openlineage/mixins.py +438 -173
  67. airflow/providers/google/cloud/openlineage/utils.py +394 -61
  68. airflow/providers/google/cloud/operators/alloy_db.py +980 -69
  69. airflow/providers/google/cloud/operators/automl.py +83 -245
  70. airflow/providers/google/cloud/operators/bigquery.py +377 -74
  71. airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
  72. airflow/providers/google/cloud/operators/bigtable.py +1 -3
  73. airflow/providers/google/cloud/operators/cloud_base.py +1 -2
  74. airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
  75. airflow/providers/google/cloud/operators/cloud_build.py +3 -5
  76. airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
  77. airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
  78. airflow/providers/google/cloud/operators/cloud_run.py +6 -5
  79. airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
  80. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
  81. airflow/providers/google/cloud/operators/compute.py +3 -4
  82. airflow/providers/google/cloud/operators/datacatalog.py +9 -11
  83. airflow/providers/google/cloud/operators/dataflow.py +1 -112
  84. airflow/providers/google/cloud/operators/dataform.py +3 -5
  85. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  86. airflow/providers/google/cloud/operators/dataplex.py +2046 -7
  87. airflow/providers/google/cloud/operators/dataproc.py +102 -17
  88. airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
  89. airflow/providers/google/cloud/operators/dlp.py +17 -19
  90. airflow/providers/google/cloud/operators/gcs.py +14 -17
  91. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
  92. airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
  93. airflow/providers/google/cloud/operators/natural_language.py +3 -5
  94. airflow/providers/google/cloud/operators/pubsub.py +39 -7
  95. airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
  96. airflow/providers/google/cloud/operators/stackdriver.py +3 -5
  97. airflow/providers/google/cloud/operators/tasks.py +4 -6
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
  99. airflow/providers/google/cloud/operators/translate.py +414 -5
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -4
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
  110. airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
  111. airflow/providers/google/cloud/operators/vision.py +4 -6
  112. airflow/providers/google/cloud/operators/workflows.py +5 -7
  113. airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
  114. airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
  115. airflow/providers/google/cloud/sensors/bigtable.py +2 -3
  116. airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
  117. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
  118. airflow/providers/google/cloud/sensors/dataplex.py +4 -6
  119. airflow/providers/google/cloud/sensors/dataproc.py +2 -3
  120. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
  121. airflow/providers/google/cloud/sensors/gcs.py +2 -4
  122. airflow/providers/google/cloud/sensors/pubsub.py +2 -3
  123. airflow/providers/google/cloud/sensors/workflows.py +3 -5
  124. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
  125. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
  126. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
  127. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
  128. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
  129. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
  130. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
  131. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
  132. airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
  133. airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
  134. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
  135. airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
  136. airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
  137. airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
  138. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
  139. airflow/providers/google/cloud/triggers/dataflow.py +2 -3
  140. airflow/providers/google/cloud/triggers/dataplex.py +1 -2
  141. airflow/providers/google/cloud/triggers/dataproc.py +2 -3
  142. airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
  143. airflow/providers/google/cloud/triggers/pubsub.py +1 -2
  144. airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
  145. airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
  146. airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
  147. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  148. airflow/providers/google/common/consts.py +1 -2
  149. airflow/providers/google/common/hooks/base_google.py +8 -7
  150. airflow/providers/google/get_provider_info.py +186 -134
  151. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
  152. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  153. airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
  154. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
  155. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
  156. airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
  157. airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
  158. airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
  159. airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
  160. airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
  161. airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
  162. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
  163. {apache_airflow_providers_google-12.0.0rc1.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -27,10 +27,6 @@ from collections.abc import Sequence
27
27
  from functools import cached_property
28
28
  from typing import TYPE_CHECKING, Any, SupportsAbs
29
29
 
30
- from google.api_core.exceptions import Conflict
31
- from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
32
- from google.cloud.bigquery.table import RowIterator
33
-
34
30
  from airflow.configuration import conf
35
31
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
36
32
  from airflow.providers.common.sql.operators.sql import ( # type: ignore[attr-defined] # for _parse_boolean
@@ -43,8 +39,12 @@ from airflow.providers.common.sql.operators.sql import ( # type: ignore[attr-de
43
39
  )
44
40
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
45
41
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
46
- from airflow.providers.google.cloud.links.bigquery import BigQueryDatasetLink, BigQueryTableLink
47
- from airflow.providers.google.cloud.openlineage.mixins import _BigQueryOpenLineageMixin
42
+ from airflow.providers.google.cloud.links.bigquery import (
43
+ BigQueryDatasetLink,
44
+ BigQueryJobDetailLink,
45
+ BigQueryTableLink,
46
+ )
47
+ from airflow.providers.google.cloud.openlineage.mixins import _BigQueryInsertJobOperatorOpenLineageMixin
48
48
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
49
49
  from airflow.providers.google.cloud.triggers.bigquery import (
50
50
  BigQueryCheckTrigger,
@@ -54,15 +54,19 @@ from airflow.providers.google.cloud.triggers.bigquery import (
54
54
  BigQueryValueCheckTrigger,
55
55
  )
56
56
  from airflow.providers.google.cloud.utils.bigquery import convert_job_id
57
+ from airflow.providers.google.common.deprecated import deprecated
57
58
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
58
59
  from airflow.utils.helpers import exactly_one
60
+ from google.api_core.exceptions import Conflict
61
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
62
+ from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
63
+ from google.cloud.bigquery.table import RowIterator, Table, TableListItem, TableReference
59
64
 
60
65
  if TYPE_CHECKING:
66
+ from airflow.utils.context import Context
61
67
  from google.api_core.retry import Retry
62
68
  from google.cloud.bigquery import UnknownJob
63
69
 
64
- from airflow.utils.context import Context
65
-
66
70
 
67
71
  BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
68
72
 
@@ -1158,6 +1162,185 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
1158
1162
  return event["records"]
1159
1163
 
1160
1164
 
1165
+ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
1166
+ """
1167
+ Creates a new table in the specified BigQuery dataset, optionally with schema.
1168
+
1169
+ The schema to be used for the BigQuery table may be specified in one of
1170
+ two ways. You may either directly pass the schema fields in, or you may
1171
+ point the operator to a Google Cloud Storage object name. The object in
1172
+ Google Cloud Storage must be a JSON file with the schema fields in it.
1173
+ You can also create a table without schema.
1174
+
1175
+ .. seealso::
1176
+ For more information on how to use this operator, take a look at the guide:
1177
+ :ref:`howto/operator:BigQueryCreateTableOperator`
1178
+
1179
+ :param project_id: Optional. The project to create the table into.
1180
+ :param dataset_id: Required. The dataset to create the table into.
1181
+ :param table_id: Required. The Name of the table to be created.
1182
+ :param table_resource: Required. Table resource as described in documentation:
1183
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
1184
+ If ``table`` is a reference, an empty table is created with the specified ID. The dataset that
1185
+ the table belongs to must already exist.
1186
+ :param if_exists: Optional. What should Airflow do if the table exists. If set to `log`,
1187
+ the TI will be passed to success and an error message will be logged. Set to `ignore` to ignore
1188
+ the error, set to `fail` to fail the TI, and set to `skip` to skip it.
1189
+ :param gcs_schema_object: Optional. Full path to the JSON file containing schema. For
1190
+ example: ``gs://test-bucket/dir1/dir2/employee_schema.json``
1191
+ :param gcp_conn_id: Optional. The connection ID used to connect to Google Cloud and
1192
+ interact with the Bigquery service.
1193
+ :param google_cloud_storage_conn_id: Optional. The connection ID used to connect to Google Cloud.
1194
+ and interact with the Google Cloud Storage service.
1195
+ :param location: Optional. The location used for the operation.
1196
+ :param retry: Optional. A retry object used to retry requests. If `None` is specified, requests
1197
+ will not be retried.
1198
+ :param timeout: Optional. The amount of time, in seconds, to wait for the request to complete.
1199
+ Note that if `retry` is specified, the timeout applies to each individual attempt.
1200
+ :param impersonation_chain: Optional. Service account to impersonate using short-term
1201
+ credentials, or chained list of accounts required to get the access_token
1202
+ of the last account in the list, which will be impersonated in the request.
1203
+ If set as a string, the account must grant the originating account
1204
+ the Service Account Token Creator IAM role.
1205
+ If set as a sequence, the identities from the list must grant
1206
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1207
+ account from the list granting this role to the originating account.
1208
+ """
1209
+
1210
+ template_fields: Sequence[str] = (
1211
+ "dataset_id",
1212
+ "table_id",
1213
+ "table_resource",
1214
+ "project_id",
1215
+ "gcs_schema_object",
1216
+ "impersonation_chain",
1217
+ )
1218
+ template_fields_renderers = {"table_resource": "json"}
1219
+ ui_color = BigQueryUIColors.TABLE.value
1220
+ operator_extra_links = (BigQueryTableLink(),)
1221
+
1222
+ def __init__(
1223
+ self,
1224
+ *,
1225
+ dataset_id: str,
1226
+ table_id: str,
1227
+ table_resource: dict[str, Any] | Table | TableReference | TableListItem,
1228
+ project_id: str = PROVIDE_PROJECT_ID,
1229
+ location: str | None = None,
1230
+ gcs_schema_object: str | None = None,
1231
+ gcp_conn_id: str = "google_cloud_default",
1232
+ google_cloud_storage_conn_id: str = "google_cloud_default",
1233
+ impersonation_chain: str | Sequence[str] | None = None,
1234
+ if_exists: str = "log",
1235
+ retry: Retry | _MethodDefault = DEFAULT,
1236
+ timeout: float | None = None,
1237
+ **kwargs,
1238
+ ) -> None:
1239
+ super().__init__(**kwargs)
1240
+ self.project_id = project_id
1241
+ self.location = location
1242
+ self.dataset_id = dataset_id
1243
+ self.table_id = table_id
1244
+ self.table_resource = table_resource
1245
+ self.if_exists = IfExistAction(if_exists)
1246
+ self.gcs_schema_object = gcs_schema_object
1247
+ self.gcp_conn_id = gcp_conn_id
1248
+ self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
1249
+ self.impersonation_chain = impersonation_chain
1250
+ self.retry = retry
1251
+ self.timeout = timeout
1252
+ self._table: Table | None = None
1253
+
1254
+ def execute(self, context: Context) -> None:
1255
+ bq_hook = BigQueryHook(
1256
+ gcp_conn_id=self.gcp_conn_id,
1257
+ location=self.location,
1258
+ impersonation_chain=self.impersonation_chain,
1259
+ )
1260
+ if self.gcs_schema_object:
1261
+ gcs_bucket, gcs_object = _parse_gcs_url(self.gcs_schema_object)
1262
+ gcs_hook = GCSHook(
1263
+ gcp_conn_id=self.google_cloud_storage_conn_id,
1264
+ impersonation_chain=self.impersonation_chain,
1265
+ )
1266
+ schema_fields_string = gcs_hook.download_as_byte_array(gcs_bucket, gcs_object).decode("utf-8")
1267
+ schema_fields = json.loads(schema_fields_string)
1268
+ else:
1269
+ schema_fields = None
1270
+
1271
+ try:
1272
+ self.log.info("Creating table...")
1273
+ self._table = bq_hook.create_table(
1274
+ project_id=self.project_id,
1275
+ dataset_id=self.dataset_id,
1276
+ table_id=self.table_id,
1277
+ schema_fields=schema_fields,
1278
+ table_resource=self.table_resource,
1279
+ exists_ok=self.if_exists == IfExistAction.IGNORE,
1280
+ timeout=self.timeout,
1281
+ location=self.location,
1282
+ )
1283
+ if self._table:
1284
+ persist_kwargs = {
1285
+ "context": context,
1286
+ "task_instance": self,
1287
+ "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1288
+ "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1289
+ "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
1290
+ }
1291
+ self.log.info(
1292
+ "Table %s.%s.%s created successfully",
1293
+ self._table.project,
1294
+ self._table.dataset_id,
1295
+ self._table.table_id,
1296
+ )
1297
+ else:
1298
+ raise AirflowException("Table creation failed.")
1299
+ except Conflict:
1300
+ error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
1301
+ if self.if_exists == IfExistAction.LOG:
1302
+ self.log.info(error_msg)
1303
+ persist_kwargs = {
1304
+ "context": context,
1305
+ "task_instance": self,
1306
+ "project_id": self.project_id or bq_hook.project_id,
1307
+ "dataset_id": self.dataset_id,
1308
+ "table_id": self.table_id,
1309
+ }
1310
+ elif self.if_exists == IfExistAction.FAIL:
1311
+ raise AirflowException(error_msg)
1312
+ else:
1313
+ raise AirflowSkipException(error_msg)
1314
+
1315
+ BigQueryTableLink.persist(**persist_kwargs)
1316
+
1317
+ def get_openlineage_facets_on_complete(self, _):
1318
+ """Implement _on_complete as we will use table resource returned by create method."""
1319
+ from airflow.providers.common.compat.openlineage.facet import Dataset
1320
+ from airflow.providers.google.cloud.openlineage.utils import (
1321
+ BIGQUERY_NAMESPACE,
1322
+ get_facets_from_bq_table,
1323
+ )
1324
+ from airflow.providers.openlineage.extractors import OperatorLineage
1325
+
1326
+ if not self._table:
1327
+ self.log.debug("OpenLineage did not find `self._table` attribute.")
1328
+ return OperatorLineage()
1329
+
1330
+ output_dataset = Dataset(
1331
+ namespace=BIGQUERY_NAMESPACE,
1332
+ name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
1333
+ facets=get_facets_from_bq_table(self._table),
1334
+ )
1335
+
1336
+ return OperatorLineage(outputs=[output_dataset])
1337
+
1338
+
1339
+ @deprecated(
1340
+ planned_removal_date="July 30, 2025",
1341
+ use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
1342
+ category=AirflowProviderDeprecationWarning,
1343
+ )
1161
1344
  class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1162
1345
  """
1163
1346
  Creates a new table in the specified BigQuery dataset, optionally with schema.
@@ -1335,6 +1518,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1335
1518
  self.cluster_fields = cluster_fields
1336
1519
  self.table_resource = table_resource
1337
1520
  self.impersonation_chain = impersonation_chain
1521
+ self._table: Table | None = None
1338
1522
  if exists_ok is not None:
1339
1523
  warnings.warn(
1340
1524
  "`exists_ok` parameter is deprecated, please use `if_exists`",
@@ -1365,6 +1549,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1365
1549
 
1366
1550
  try:
1367
1551
  self.log.info("Creating table")
1552
+ # Save table as attribute for further use by OpenLineage
1368
1553
  self._table = bq_hook.create_empty_table(
1369
1554
  project_id=self.project_id,
1370
1555
  dataset_id=self.dataset_id,
@@ -1379,19 +1564,22 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1379
1564
  table_resource=self.table_resource,
1380
1565
  exists_ok=self.if_exists == IfExistAction.IGNORE,
1381
1566
  )
1382
- persist_kwargs = {
1383
- "context": context,
1384
- "task_instance": self,
1385
- "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1386
- "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1387
- "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
1388
- }
1389
- self.log.info(
1390
- "Table %s.%s.%s created successfully",
1391
- self._table.project,
1392
- self._table.dataset_id,
1393
- self._table.table_id,
1394
- )
1567
+ if self._table:
1568
+ persist_kwargs = {
1569
+ "context": context,
1570
+ "task_instance": self,
1571
+ "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1572
+ "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1573
+ "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
1574
+ }
1575
+ self.log.info(
1576
+ "Table %s.%s.%s created successfully",
1577
+ self._table.project,
1578
+ self._table.dataset_id,
1579
+ self._table.table_id,
1580
+ )
1581
+ else:
1582
+ raise AirflowException("Table creation failed.")
1395
1583
  except Conflict:
1396
1584
  error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
1397
1585
  if self.if_exists == IfExistAction.LOG:
@@ -1410,7 +1598,8 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1410
1598
 
1411
1599
  BigQueryTableLink.persist(**persist_kwargs)
1412
1600
 
1413
- def get_openlineage_facets_on_complete(self, task_instance):
1601
+ def get_openlineage_facets_on_complete(self, _):
1602
+ """Implement _on_complete as we will use table resource returned by create method."""
1414
1603
  from airflow.providers.common.compat.openlineage.facet import Dataset
1415
1604
  from airflow.providers.google.cloud.openlineage.utils import (
1416
1605
  BIGQUERY_NAMESPACE,
@@ -1418,17 +1607,24 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1418
1607
  )
1419
1608
  from airflow.providers.openlineage.extractors import OperatorLineage
1420
1609
 
1421
- table_info = self._table.to_api_repr()["tableReference"]
1422
- table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
1610
+ if not self._table:
1611
+ self.log.debug("OpenLineage did not find `self._table` attribute.")
1612
+ return OperatorLineage()
1613
+
1423
1614
  output_dataset = Dataset(
1424
1615
  namespace=BIGQUERY_NAMESPACE,
1425
- name=table_id,
1616
+ name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
1426
1617
  facets=get_facets_from_bq_table(self._table),
1427
1618
  )
1428
1619
 
1429
1620
  return OperatorLineage(outputs=[output_dataset])
1430
1621
 
1431
1622
 
1623
+ @deprecated(
1624
+ planned_removal_date="July 30, 2025",
1625
+ use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
1626
+ category=AirflowProviderDeprecationWarning,
1627
+ )
1432
1628
  class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1433
1629
  """
1434
1630
  Create a new external table with data from Google Cloud Storage.
@@ -1645,6 +1841,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1645
1841
  self.encryption_configuration = encryption_configuration
1646
1842
  self.location = location
1647
1843
  self.impersonation_chain = impersonation_chain
1844
+ self._table: Table | None = None
1648
1845
 
1649
1846
  def execute(self, context: Context) -> None:
1650
1847
  bq_hook = BigQueryHook(
@@ -1653,16 +1850,18 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1653
1850
  impersonation_chain=self.impersonation_chain,
1654
1851
  )
1655
1852
  if self.table_resource:
1853
+ # Save table as attribute for further use by OpenLineage
1656
1854
  self._table = bq_hook.create_empty_table(
1657
1855
  table_resource=self.table_resource,
1658
1856
  )
1659
- BigQueryTableLink.persist(
1660
- context=context,
1661
- task_instance=self,
1662
- dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
1663
- project_id=self._table.to_api_repr()["tableReference"]["projectId"],
1664
- table_id=self._table.to_api_repr()["tableReference"]["tableId"],
1665
- )
1857
+ if self._table:
1858
+ BigQueryTableLink.persist(
1859
+ context=context,
1860
+ task_instance=self,
1861
+ dataset_id=self._table.dataset_id,
1862
+ project_id=self._table.project,
1863
+ table_id=self._table.table_id,
1864
+ )
1666
1865
  return
1667
1866
 
1668
1867
  if not self.schema_fields and self.schema_object and self.source_format != "DATASTORE_BACKUP":
@@ -1712,19 +1911,19 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1712
1911
  "encryptionConfiguration": self.encryption_configuration,
1713
1912
  }
1714
1913
 
1715
- self._table = bq_hook.create_empty_table(
1716
- table_resource=table_resource,
1717
- )
1718
-
1719
- BigQueryTableLink.persist(
1720
- context=context,
1721
- task_instance=self,
1722
- dataset_id=self._table.to_api_repr()["tableReference"]["datasetId"],
1723
- project_id=self._table.to_api_repr()["tableReference"]["projectId"],
1724
- table_id=self._table.to_api_repr()["tableReference"]["tableId"],
1725
- )
1914
+ # Save table as attribute for further use by OpenLineage
1915
+ self._table = bq_hook.create_empty_table(table_resource=table_resource)
1916
+ if self._table:
1917
+ BigQueryTableLink.persist(
1918
+ context=context,
1919
+ task_instance=self,
1920
+ dataset_id=self._table.dataset_id,
1921
+ project_id=self._table.project,
1922
+ table_id=self._table.table_id,
1923
+ )
1726
1924
 
1727
- def get_openlineage_facets_on_complete(self, task_instance):
1925
+ def get_openlineage_facets_on_complete(self, _):
1926
+ """Implement _on_complete as we will use table resource returned by create method."""
1728
1927
  from airflow.providers.common.compat.openlineage.facet import Dataset
1729
1928
  from airflow.providers.google.cloud.openlineage.utils import (
1730
1929
  BIGQUERY_NAMESPACE,
@@ -1732,11 +1931,9 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1732
1931
  )
1733
1932
  from airflow.providers.openlineage.extractors import OperatorLineage
1734
1933
 
1735
- table_info = self._table.to_api_repr()["tableReference"]
1736
- table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
1737
1934
  output_dataset = Dataset(
1738
1935
  namespace=BIGQUERY_NAMESPACE,
1739
- name=table_id,
1936
+ name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
1740
1937
  facets=get_facets_from_bq_table(self._table),
1741
1938
  )
1742
1939
 
@@ -2129,6 +2326,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2129
2326
  self.gcp_conn_id = gcp_conn_id
2130
2327
  self.table_resource = table_resource
2131
2328
  self.impersonation_chain = impersonation_chain
2329
+ self._table: dict | None = None
2132
2330
  super().__init__(**kwargs)
2133
2331
 
2134
2332
  def execute(self, context: Context):
@@ -2137,7 +2335,8 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2137
2335
  impersonation_chain=self.impersonation_chain,
2138
2336
  )
2139
2337
 
2140
- table = bq_hook.update_table(
2338
+ # Save table as attribute for further use by OpenLineage
2339
+ self._table = bq_hook.update_table(
2141
2340
  table_resource=self.table_resource,
2142
2341
  fields=self.fields,
2143
2342
  dataset_id=self.dataset_id,
@@ -2145,15 +2344,34 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2145
2344
  project_id=self.project_id,
2146
2345
  )
2147
2346
 
2148
- BigQueryTableLink.persist(
2149
- context=context,
2150
- task_instance=self,
2151
- dataset_id=table["tableReference"]["datasetId"],
2152
- project_id=table["tableReference"]["projectId"],
2153
- table_id=table["tableReference"]["tableId"],
2347
+ if self._table:
2348
+ BigQueryTableLink.persist(
2349
+ context=context,
2350
+ task_instance=self,
2351
+ dataset_id=self._table["tableReference"]["datasetId"],
2352
+ project_id=self._table["tableReference"]["projectId"],
2353
+ table_id=self._table["tableReference"]["tableId"],
2354
+ )
2355
+
2356
+ return self._table
2357
+
2358
+ def get_openlineage_facets_on_complete(self, _):
2359
+ """Implement _on_complete as we will use table resource returned by update method."""
2360
+ from airflow.providers.common.compat.openlineage.facet import Dataset
2361
+ from airflow.providers.google.cloud.openlineage.utils import (
2362
+ BIGQUERY_NAMESPACE,
2363
+ get_facets_from_bq_table,
2364
+ )
2365
+ from airflow.providers.openlineage.extractors import OperatorLineage
2366
+
2367
+ table = Table.from_api_repr(self._table)
2368
+ output_dataset = Dataset(
2369
+ namespace=BIGQUERY_NAMESPACE,
2370
+ name=f"{table.project}.{table.dataset_id}.{table.table_id}",
2371
+ facets=get_facets_from_bq_table(table),
2154
2372
  )
2155
2373
 
2156
- return table
2374
+ return OperatorLineage(outputs=[output_dataset])
2157
2375
 
2158
2376
 
2159
2377
  class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
@@ -2287,15 +2505,47 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
2287
2505
  self.ignore_if_missing = ignore_if_missing
2288
2506
  self.location = location
2289
2507
  self.impersonation_chain = impersonation_chain
2508
+ self.hook: BigQueryHook | None = None
2290
2509
 
2291
2510
  def execute(self, context: Context) -> None:
2292
2511
  self.log.info("Deleting: %s", self.deletion_dataset_table)
2293
- hook = BigQueryHook(
2512
+ # Save hook as attribute for further use by OpenLineage
2513
+ self.hook = BigQueryHook(
2294
2514
  gcp_conn_id=self.gcp_conn_id,
2295
2515
  location=self.location,
2296
2516
  impersonation_chain=self.impersonation_chain,
2297
2517
  )
2298
- hook.delete_table(table_id=self.deletion_dataset_table, not_found_ok=self.ignore_if_missing)
2518
+ self.hook.delete_table(table_id=self.deletion_dataset_table, not_found_ok=self.ignore_if_missing)
2519
+
2520
+ def get_openlineage_facets_on_complete(self, _):
2521
+ """Implement _on_complete as we need default project_id from hook."""
2522
+ from airflow.providers.common.compat.openlineage.facet import (
2523
+ Dataset,
2524
+ LifecycleStateChange,
2525
+ LifecycleStateChangeDatasetFacet,
2526
+ PreviousIdentifier,
2527
+ )
2528
+ from airflow.providers.google.cloud.openlineage.utils import BIGQUERY_NAMESPACE
2529
+ from airflow.providers.openlineage.extractors import OperatorLineage
2530
+
2531
+ bq_table_id = str(
2532
+ TableReference.from_string(self.deletion_dataset_table, default_project=self.hook.project_id)
2533
+ )
2534
+ ds = Dataset(
2535
+ namespace=BIGQUERY_NAMESPACE,
2536
+ name=bq_table_id,
2537
+ facets={
2538
+ "lifecycleStateChange": LifecycleStateChangeDatasetFacet(
2539
+ lifecycleStateChange=LifecycleStateChange.DROP.value,
2540
+ previousIdentifier=PreviousIdentifier(
2541
+ namespace=BIGQUERY_NAMESPACE,
2542
+ name=bq_table_id,
2543
+ ),
2544
+ )
2545
+ },
2546
+ )
2547
+
2548
+ return OperatorLineage(inputs=[ds])
2299
2549
 
2300
2550
 
2301
2551
  class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
@@ -2354,6 +2604,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2354
2604
  self.gcp_conn_id = gcp_conn_id
2355
2605
  self.location = location
2356
2606
  self.impersonation_chain = impersonation_chain
2607
+ self._table: dict | None = None
2357
2608
 
2358
2609
  def execute(self, context: Context) -> None:
2359
2610
  self.log.info("Upserting Dataset: %s with table_resource: %s", self.dataset_id, self.table_resource)
@@ -2362,18 +2613,38 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2362
2613
  location=self.location,
2363
2614
  impersonation_chain=self.impersonation_chain,
2364
2615
  )
2365
- table = hook.run_table_upsert(
2616
+ # Save table as attribute for further use by OpenLineage
2617
+ self._table = hook.run_table_upsert(
2366
2618
  dataset_id=self.dataset_id,
2367
2619
  table_resource=self.table_resource,
2368
2620
  project_id=self.project_id,
2369
2621
  )
2370
- BigQueryTableLink.persist(
2371
- context=context,
2372
- task_instance=self,
2373
- dataset_id=table["tableReference"]["datasetId"],
2374
- project_id=table["tableReference"]["projectId"],
2375
- table_id=table["tableReference"]["tableId"],
2622
+ if self._table:
2623
+ BigQueryTableLink.persist(
2624
+ context=context,
2625
+ task_instance=self,
2626
+ dataset_id=self._table["tableReference"]["datasetId"],
2627
+ project_id=self._table["tableReference"]["projectId"],
2628
+ table_id=self._table["tableReference"]["tableId"],
2629
+ )
2630
+
2631
+ def get_openlineage_facets_on_complete(self, _):
2632
+ """Implement _on_complete as we will use table resource returned by upsert method."""
2633
+ from airflow.providers.common.compat.openlineage.facet import Dataset
2634
+ from airflow.providers.google.cloud.openlineage.utils import (
2635
+ BIGQUERY_NAMESPACE,
2636
+ get_facets_from_bq_table,
2376
2637
  )
2638
+ from airflow.providers.openlineage.extractors import OperatorLineage
2639
+
2640
+ table = Table.from_api_repr(self._table)
2641
+ output_dataset = Dataset(
2642
+ namespace=BIGQUERY_NAMESPACE,
2643
+ name=f"{table.project}.{table.dataset_id}.{table.table_id}",
2644
+ facets=get_facets_from_bq_table(table),
2645
+ )
2646
+
2647
+ return OperatorLineage(outputs=[output_dataset])
2377
2648
 
2378
2649
 
2379
2650
  class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
@@ -2462,6 +2733,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2462
2733
  self.gcp_conn_id = gcp_conn_id
2463
2734
  self.impersonation_chain = impersonation_chain
2464
2735
  self.location = location
2736
+ self._table: dict | None = None
2465
2737
  super().__init__(**kwargs)
2466
2738
 
2467
2739
  def execute(self, context: Context):
@@ -2469,25 +2741,44 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2469
2741
  gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, location=self.location
2470
2742
  )
2471
2743
 
2472
- table = bq_hook.update_table_schema(
2744
+ # Save table as attribute for further use by OpenLineage
2745
+ self._table = bq_hook.update_table_schema(
2473
2746
  schema_fields_updates=self.schema_fields_updates,
2474
2747
  include_policy_tags=self.include_policy_tags,
2475
2748
  dataset_id=self.dataset_id,
2476
2749
  table_id=self.table_id,
2477
2750
  project_id=self.project_id,
2478
2751
  )
2752
+ if self._table:
2753
+ BigQueryTableLink.persist(
2754
+ context=context,
2755
+ task_instance=self,
2756
+ dataset_id=self._table["tableReference"]["datasetId"],
2757
+ project_id=self._table["tableReference"]["projectId"],
2758
+ table_id=self._table["tableReference"]["tableId"],
2759
+ )
2760
+ return self._table
2479
2761
 
2480
- BigQueryTableLink.persist(
2481
- context=context,
2482
- task_instance=self,
2483
- dataset_id=table["tableReference"]["datasetId"],
2484
- project_id=table["tableReference"]["projectId"],
2485
- table_id=table["tableReference"]["tableId"],
2762
+ def get_openlineage_facets_on_complete(self, _):
2763
+ """Implement _on_complete as we will use table resource returned by update method."""
2764
+ from airflow.providers.common.compat.openlineage.facet import Dataset
2765
+ from airflow.providers.google.cloud.openlineage.utils import (
2766
+ BIGQUERY_NAMESPACE,
2767
+ get_facets_from_bq_table,
2768
+ )
2769
+ from airflow.providers.openlineage.extractors import OperatorLineage
2770
+
2771
+ table = Table.from_api_repr(self._table)
2772
+ output_dataset = Dataset(
2773
+ namespace=BIGQUERY_NAMESPACE,
2774
+ name=f"{table.project}.{table.dataset_id}.{table.table_id}",
2775
+ facets=get_facets_from_bq_table(table),
2486
2776
  )
2487
- return table
2777
+
2778
+ return OperatorLineage(outputs=[output_dataset])
2488
2779
 
2489
2780
 
2490
- class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMixin):
2781
+ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOperatorOpenLineageMixin):
2491
2782
  """
2492
2783
  Execute a BigQuery job.
2493
2784
 
@@ -2554,7 +2845,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2554
2845
  )
2555
2846
  template_fields_renderers = {"configuration": "json", "configuration.query.query": "sql"}
2556
2847
  ui_color = BigQueryUIColors.QUERY.value
2557
- operator_extra_links = (BigQueryTableLink(),)
2848
+ operator_extra_links = (BigQueryTableLink(), BigQueryJobDetailLink())
2558
2849
 
2559
2850
  def __init__(
2560
2851
  self,
@@ -2664,6 +2955,8 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2664
2955
  try:
2665
2956
  self.log.info("Executing: %s'", self.configuration)
2666
2957
  # Create a job
2958
+ if self.job_id is None:
2959
+ raise ValueError("job_id cannot be None")
2667
2960
  job: BigQueryJob | UnknownJob = self._submit_job(hook, self.job_id)
2668
2961
  except Conflict:
2669
2962
  # If the job already exists retrieve it
@@ -2726,6 +3019,15 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2726
3019
  )
2727
3020
  context["ti"].xcom_push(key="job_id_path", value=job_id_path)
2728
3021
 
3022
+ persist_kwargs = {
3023
+ "context": context,
3024
+ "task_instance": self,
3025
+ "project_id": self.project_id,
3026
+ "location": self.location,
3027
+ "job_id": self.job_id,
3028
+ }
3029
+ BigQueryJobDetailLink.persist(**persist_kwargs)
3030
+
2729
3031
  # Wait for the job to complete
2730
3032
  if not self.deferrable:
2731
3033
  job.result(timeout=self.result_timeout, retry=self.result_retry)
@@ -2749,6 +3051,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2749
3051
  )
2750
3052
  self.log.info("Current state of job %s is %s", job.job_id, job.state)
2751
3053
  self._handle_job_error(job)
3054
+ return self.job_id
2752
3055
 
2753
3056
  def execute_complete(self, context: Context, event: dict[str, Any]) -> str | None:
2754
3057
  """