apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -27,9 +27,15 @@ from collections.abc import Sequence
27
27
  from functools import cached_property
28
28
  from typing import TYPE_CHECKING, Any, SupportsAbs
29
29
 
30
+ from google.api_core.exceptions import Conflict
31
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
32
+ from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
33
+ from google.cloud.bigquery.table import RowIterator, Table, TableListItem, TableReference
34
+
30
35
  from airflow.configuration import conf
31
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
32
- from airflow.providers.common.sql.operators.sql import ( # type: ignore[attr-defined] # for _parse_boolean
36
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
37
+ from airflow.providers.common.compat.sdk import AirflowSkipException
38
+ from airflow.providers.common.sql.operators.sql import ( # for _parse_boolean
33
39
  SQLCheckOperator,
34
40
  SQLColumnCheckOperator,
35
41
  SQLIntervalCheckOperator,
@@ -54,19 +60,15 @@ from airflow.providers.google.cloud.triggers.bigquery import (
54
60
  BigQueryValueCheckTrigger,
55
61
  )
56
62
  from airflow.providers.google.cloud.utils.bigquery import convert_job_id
57
- from airflow.providers.google.common.deprecated import deprecated
58
63
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
59
64
  from airflow.utils.helpers import exactly_one
60
- from google.api_core.exceptions import Conflict
61
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
62
- from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
63
- from google.cloud.bigquery.table import RowIterator, Table, TableListItem, TableReference
64
65
 
65
66
  if TYPE_CHECKING:
66
- from airflow.utils.context import Context
67
67
  from google.api_core.retry import Retry
68
68
  from google.cloud.bigquery import UnknownJob
69
69
 
70
+ from airflow.providers.common.compat.sdk import Context
71
+
70
72
 
71
73
  BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
72
74
 
@@ -91,10 +93,23 @@ class IfExistAction(enum.Enum):
91
93
  SKIP = "skip"
92
94
 
93
95
 
96
+ class _BigQueryHookWithFlexibleProjectId(BigQueryHook):
97
+ @property
98
+ def project_id(self) -> str:
99
+ _, project_id = self.get_credentials_and_project_id()
100
+ return project_id or PROVIDE_PROJECT_ID
101
+
102
+ @project_id.setter
103
+ def project_id(self, value: str) -> None:
104
+ cached_creds, _ = self.get_credentials_and_project_id()
105
+ self._cached_project_id = value or PROVIDE_PROJECT_ID
106
+ self._cached_credntials = cached_creds
107
+
108
+
94
109
  class _BigQueryDbHookMixin:
95
- def get_db_hook(self: BigQueryCheckOperator) -> BigQueryHook: # type:ignore[misc]
110
+ def get_db_hook(self: BigQueryCheckOperator) -> _BigQueryHookWithFlexibleProjectId: # type:ignore[misc]
96
111
  """Get BigQuery DB Hook."""
97
- return BigQueryHook(
112
+ hook = _BigQueryHookWithFlexibleProjectId(
98
113
  gcp_conn_id=self.gcp_conn_id,
99
114
  use_legacy_sql=self.use_legacy_sql,
100
115
  location=self.location,
@@ -102,6 +117,11 @@ class _BigQueryDbHookMixin:
102
117
  labels=self.labels,
103
118
  )
104
119
 
120
+ # mypy assuming project_id is read only, as project_id is a property in GoogleBaseHook.
121
+ if self.project_id:
122
+ hook.project_id = self.project_id # type:ignore[misc]
123
+ return hook
124
+
105
125
 
106
126
  class _BigQueryOperatorsEncryptionConfigurationMixin:
107
127
  """A class to handle the configuration for BigQueryHook.insert_job method."""
@@ -188,6 +208,7 @@ class BigQueryCheckOperator(
188
208
  https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs.
189
209
  For example, [{ 'name': 'corpus', 'parameterType': { 'type': 'STRING' },
190
210
  'parameterValue': { 'value': 'romeoandjuliet' } }]. (templated)
211
+ :param project_id: Google Cloud Project where the job is running
191
212
  """
192
213
 
193
214
  template_fields: Sequence[str] = (
@@ -206,6 +227,7 @@ class BigQueryCheckOperator(
206
227
  *,
207
228
  sql: str,
208
229
  gcp_conn_id: str = "google_cloud_default",
230
+ project_id: str = PROVIDE_PROJECT_ID,
209
231
  use_legacy_sql: bool = True,
210
232
  location: str | None = None,
211
233
  impersonation_chain: str | Sequence[str] | None = None,
@@ -226,6 +248,7 @@ class BigQueryCheckOperator(
226
248
  self.deferrable = deferrable
227
249
  self.poll_interval = poll_interval
228
250
  self.query_params = query_params
251
+ self.project_id = project_id
229
252
 
230
253
  def _submit_job(
231
254
  self,
@@ -241,7 +264,7 @@ class BigQueryCheckOperator(
241
264
 
242
265
  return hook.insert_job(
243
266
  configuration=configuration,
244
- project_id=hook.project_id,
267
+ project_id=self.project_id,
245
268
  location=self.location,
246
269
  job_id=job_id,
247
270
  nowait=True,
@@ -255,6 +278,8 @@ class BigQueryCheckOperator(
255
278
  gcp_conn_id=self.gcp_conn_id,
256
279
  impersonation_chain=self.impersonation_chain,
257
280
  )
281
+ if self.project_id is None:
282
+ self.project_id = hook.project_id
258
283
  job = self._submit_job(hook, job_id="")
259
284
  context["ti"].xcom_push(key="job_id", value=job.job_id)
260
285
  if job.running():
@@ -263,7 +288,7 @@ class BigQueryCheckOperator(
263
288
  trigger=BigQueryCheckTrigger(
264
289
  conn_id=self.gcp_conn_id,
265
290
  job_id=job.job_id,
266
- project_id=hook.project_id,
291
+ project_id=self.project_id,
267
292
  location=self.location or hook.location,
268
293
  poll_interval=self.poll_interval,
269
294
  impersonation_chain=self.impersonation_chain,
@@ -285,10 +310,8 @@ class BigQueryCheckOperator(
285
310
  def _validate_records(self, records) -> None:
286
311
  if not records:
287
312
  raise AirflowException(f"The following query returned zero rows: {self.sql}")
288
- elif not all(records):
289
- self._raise_exception( # type: ignore[attr-defined]
290
- f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
291
- )
313
+ if not all(records):
314
+ self._raise_exception(f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}")
292
315
 
293
316
  def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
294
317
  """
@@ -340,6 +363,7 @@ class BigQueryValueCheckOperator(
340
363
  :param deferrable: Run operator in the deferrable mode.
341
364
  :param poll_interval: (Deferrable mode only) polling period in seconds to
342
365
  check for the status of job.
366
+ :param project_id: Google Cloud Project where the job is running
343
367
  """
344
368
 
345
369
  template_fields: Sequence[str] = (
@@ -361,6 +385,7 @@ class BigQueryValueCheckOperator(
361
385
  tolerance: Any = None,
362
386
  encryption_configuration: dict | None = None,
363
387
  gcp_conn_id: str = "google_cloud_default",
388
+ project_id: str = PROVIDE_PROJECT_ID,
364
389
  use_legacy_sql: bool = True,
365
390
  location: str | None = None,
366
391
  impersonation_chain: str | Sequence[str] | None = None,
@@ -378,6 +403,7 @@ class BigQueryValueCheckOperator(
378
403
  self.labels = labels
379
404
  self.deferrable = deferrable
380
405
  self.poll_interval = poll_interval
406
+ self.project_id = project_id
381
407
 
382
408
  def _submit_job(
383
409
  self,
@@ -396,18 +422,19 @@ class BigQueryValueCheckOperator(
396
422
 
397
423
  return hook.insert_job(
398
424
  configuration=configuration,
399
- project_id=hook.project_id,
425
+ project_id=self.project_id,
400
426
  location=self.location,
401
427
  job_id=job_id,
402
428
  nowait=True,
403
429
  )
404
430
 
405
- def execute(self, context: Context) -> None: # type: ignore[override]
431
+ def execute(self, context: Context) -> None:
406
432
  if not self.deferrable:
407
433
  super().execute(context=context)
408
434
  else:
409
435
  hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
410
-
436
+ if self.project_id is None:
437
+ self.project_id = hook.project_id
411
438
  job = self._submit_job(hook, job_id="")
412
439
  context["ti"].xcom_push(key="job_id", value=job.job_id)
413
440
  if job.running():
@@ -416,7 +443,7 @@ class BigQueryValueCheckOperator(
416
443
  trigger=BigQueryValueCheckTrigger(
417
444
  conn_id=self.gcp_conn_id,
418
445
  job_id=job.job_id,
419
- project_id=hook.project_id,
446
+ project_id=self.project_id,
420
447
  location=self.location or hook.location,
421
448
  sql=self.sql,
422
449
  pass_value=self.pass_value,
@@ -573,6 +600,9 @@ class BigQueryIntervalCheckOperator(
573
600
  hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
574
601
  self.log.info("Using ratio formula: %s", self.ratio_formula)
575
602
 
603
+ if self.project_id is None:
604
+ self.project_id = hook.project_id
605
+
576
606
  self.log.info("Executing SQL check: %s", self.sql1)
577
607
  job_1 = self._submit_job(hook, sql=self.sql1, job_id="")
578
608
  context["ti"].xcom_push(key="job_id", value=job_1.job_id)
@@ -585,7 +615,7 @@ class BigQueryIntervalCheckOperator(
585
615
  conn_id=self.gcp_conn_id,
586
616
  first_job_id=job_1.job_id,
587
617
  second_job_id=job_2.job_id,
588
- project_id=hook.project_id,
618
+ project_id=self.project_id,
589
619
  table=self.table,
590
620
  location=self.location or hook.location,
591
621
  metrics_thresholds=self.metrics_thresholds,
@@ -652,6 +682,7 @@ class BigQueryColumnCheckOperator(
652
682
  Service Account Token Creator IAM role to the directly preceding identity, with first
653
683
  account from the list granting this role to the originating account (templated).
654
684
  :param labels: a dictionary containing labels for the table, passed to BigQuery
685
+ :param project_id: Google Cloud Project where the job is running
655
686
  """
656
687
 
657
688
  template_fields: Sequence[str] = tuple(set(SQLColumnCheckOperator.template_fields) | {"gcp_conn_id"})
@@ -668,6 +699,7 @@ class BigQueryColumnCheckOperator(
668
699
  accept_none: bool = True,
669
700
  encryption_configuration: dict | None = None,
670
701
  gcp_conn_id: str = "google_cloud_default",
702
+ project_id: str = PROVIDE_PROJECT_ID,
671
703
  use_legacy_sql: bool = True,
672
704
  location: str | None = None,
673
705
  impersonation_chain: str | Sequence[str] | None = None,
@@ -693,6 +725,7 @@ class BigQueryColumnCheckOperator(
693
725
  self.location = location
694
726
  self.impersonation_chain = impersonation_chain
695
727
  self.labels = labels
728
+ self.project_id = project_id
696
729
 
697
730
  def _submit_job(
698
731
  self,
@@ -704,7 +737,7 @@ class BigQueryColumnCheckOperator(
704
737
  self.include_encryption_configuration(configuration, "query")
705
738
  return hook.insert_job(
706
739
  configuration=configuration,
707
- project_id=hook.project_id,
740
+ project_id=self.project_id,
708
741
  location=self.location,
709
742
  job_id=job_id,
710
743
  nowait=False,
@@ -713,6 +746,9 @@ class BigQueryColumnCheckOperator(
713
746
  def execute(self, context=None):
714
747
  """Perform checks on the given columns."""
715
748
  hook = self.get_db_hook()
749
+
750
+ if self.project_id is None:
751
+ self.project_id = hook.project_id
716
752
  failed_tests = []
717
753
 
718
754
  job = self._submit_job(hook, job_id="")
@@ -784,6 +820,7 @@ class BigQueryTableCheckOperator(
784
820
  account from the list granting this role to the originating account (templated).
785
821
  :param labels: a dictionary containing labels for the table, passed to BigQuery
786
822
  :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
823
+ :param project_id: Google Cloud Project where the job is running
787
824
 
788
825
  .. code-block:: python
789
826
 
@@ -803,6 +840,7 @@ class BigQueryTableCheckOperator(
803
840
  checks: dict,
804
841
  partition_clause: str | None = None,
805
842
  gcp_conn_id: str = "google_cloud_default",
843
+ project_id: str = PROVIDE_PROJECT_ID,
806
844
  use_legacy_sql: bool = True,
807
845
  location: str | None = None,
808
846
  impersonation_chain: str | Sequence[str] | None = None,
@@ -817,6 +855,7 @@ class BigQueryTableCheckOperator(
817
855
  self.impersonation_chain = impersonation_chain
818
856
  self.labels = labels
819
857
  self.encryption_configuration = encryption_configuration
858
+ self.project_id = project_id
820
859
 
821
860
  def _submit_job(
822
861
  self,
@@ -830,7 +869,7 @@ class BigQueryTableCheckOperator(
830
869
 
831
870
  return hook.insert_job(
832
871
  configuration=configuration,
833
- project_id=hook.project_id,
872
+ project_id=self.project_id,
834
873
  location=self.location,
835
874
  job_id=job_id,
836
875
  nowait=False,
@@ -839,6 +878,8 @@ class BigQueryTableCheckOperator(
839
878
  def execute(self, context=None):
840
879
  """Execute the given checks on the table."""
841
880
  hook = self.get_db_hook()
881
+ if self.project_id is None:
882
+ self.project_id = hook.project_id
842
883
  job = self._submit_job(hook, job_id="")
843
884
  context["ti"].xcom_push(key="job_id", value=job.job_id)
844
885
  records = job.result().to_dataframe()
@@ -972,6 +1013,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
972
1013
  "project_id",
973
1014
  "max_results",
974
1015
  "selected_fields",
1016
+ "gcp_conn_id",
975
1017
  "impersonation_chain",
976
1018
  )
977
1019
  ui_color = BigQueryUIColors.QUERY.value
@@ -1115,7 +1157,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
1115
1157
  "BigQueryHook.list_rows() returns iterator when return_iterator is False (default)"
1116
1158
  )
1117
1159
  self.log.info("Total extracted rows: %s", len(rows))
1118
-
1160
+ table_data: list[dict[str, Any]] | list[Any]
1119
1161
  if self.as_dict:
1120
1162
  table_data = [dict(row) for row in rows]
1121
1163
  else:
@@ -1213,6 +1255,7 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
1213
1255
  "table_resource",
1214
1256
  "project_id",
1215
1257
  "gcs_schema_object",
1258
+ "gcp_conn_id",
1216
1259
  "impersonation_chain",
1217
1260
  )
1218
1261
  template_fields_renderers = {"table_resource": "json"}
@@ -1283,7 +1326,6 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
1283
1326
  if self._table:
1284
1327
  persist_kwargs = {
1285
1328
  "context": context,
1286
- "task_instance": self,
1287
1329
  "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1288
1330
  "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1289
1331
  "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
@@ -1302,7 +1344,6 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
1302
1344
  self.log.info(error_msg)
1303
1345
  persist_kwargs = {
1304
1346
  "context": context,
1305
- "task_instance": self,
1306
1347
  "project_id": self.project_id or bq_hook.project_id,
1307
1348
  "dataset_id": self.dataset_id,
1308
1349
  "table_id": self.table_id,
@@ -1336,610 +1377,6 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
1336
1377
  return OperatorLineage(outputs=[output_dataset])
1337
1378
 
1338
1379
 
1339
- @deprecated(
1340
- planned_removal_date="July 30, 2025",
1341
- use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
1342
- category=AirflowProviderDeprecationWarning,
1343
- )
1344
- class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1345
- """
1346
- Creates a new table in the specified BigQuery dataset, optionally with schema.
1347
-
1348
- The schema to be used for the BigQuery table may be specified in one of
1349
- two ways. You may either directly pass the schema fields in, or you may
1350
- point the operator to a Google Cloud Storage object name. The object in
1351
- Google Cloud Storage must be a JSON file with the schema fields in it.
1352
- You can also create a table without schema.
1353
-
1354
- .. seealso::
1355
- For more information on how to use this operator, take a look at the guide:
1356
- :ref:`howto/operator:BigQueryCreateEmptyTableOperator`
1357
-
1358
- :param project_id: The project to create the table into. (templated)
1359
- :param dataset_id: The dataset to create the table into. (templated)
1360
- :param table_id: The Name of the table to be created. (templated)
1361
- :param table_resource: Table resource as described in documentation:
1362
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
1363
- If provided all other parameters are ignored. (templated)
1364
- :param schema_fields: If set, the schema field list as defined here:
1365
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
1366
-
1367
- **Example**::
1368
-
1369
- schema_fields = [
1370
- {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1371
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
1372
- ]
1373
-
1374
- :param gcs_schema_object: Full path to the JSON file containing
1375
- schema (templated). For
1376
- example: ``gs://test-bucket/dir1/dir2/employee_schema.json``
1377
- :param time_partitioning: configure optional time partitioning fields i.e.
1378
- partition by field, type and expiration as per API specifications.
1379
-
1380
- .. seealso::
1381
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
1382
- :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
1383
- interact with the Bigquery service.
1384
- :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud.
1385
- and interact with the Google Cloud Storage service.
1386
- :param labels: a dictionary containing labels for the table, passed to BigQuery
1387
-
1388
- **Example (with schema JSON in GCS)**::
1389
-
1390
- CreateTable = BigQueryCreateEmptyTableOperator(
1391
- task_id="BigQueryCreateEmptyTableOperator_task",
1392
- dataset_id="ODS",
1393
- table_id="Employees",
1394
- project_id="internal-gcp-project",
1395
- gcs_schema_object="gs://schema-bucket/employee_schema.json",
1396
- gcp_conn_id="airflow-conn-id",
1397
- google_cloud_storage_conn_id="airflow-conn-id",
1398
- )
1399
-
1400
- **Corresponding Schema file** (``employee_schema.json``)::
1401
-
1402
- [
1403
- {"mode": "NULLABLE", "name": "emp_name", "type": "STRING"},
1404
- {"mode": "REQUIRED", "name": "salary", "type": "INTEGER"},
1405
- ]
1406
-
1407
- **Example (with schema in the DAG)**::
1408
-
1409
- CreateTable = BigQueryCreateEmptyTableOperator(
1410
- task_id="BigQueryCreateEmptyTableOperator_task",
1411
- dataset_id="ODS",
1412
- table_id="Employees",
1413
- project_id="internal-gcp-project",
1414
- schema_fields=[
1415
- {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1416
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
1417
- ],
1418
- gcp_conn_id="airflow-conn-id-account",
1419
- google_cloud_storage_conn_id="airflow-conn-id",
1420
- )
1421
-
1422
- :param view: (Optional) A dictionary containing definition for the view.
1423
- If set, it will create a view instead of a table:
1424
-
1425
- .. seealso::
1426
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
1427
- :param materialized_view: (Optional) The materialized view definition.
1428
- :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
1429
-
1430
- .. code-block:: python
1431
-
1432
- encryption_configuration = {
1433
- "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1434
- }
1435
- :param location: The location used for the operation.
1436
- :param cluster_fields: (Optional) The fields used for clustering.
1437
- BigQuery supports clustering for both partitioned and
1438
- non-partitioned tables.
1439
-
1440
- .. seealso::
1441
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields
1442
- :param impersonation_chain: Optional service account to impersonate using short-term
1443
- credentials, or chained list of accounts required to get the access_token
1444
- of the last account in the list, which will be impersonated in the request.
1445
- If set as a string, the account must grant the originating account
1446
- the Service Account Token Creator IAM role.
1447
- If set as a sequence, the identities from the list must grant
1448
- Service Account Token Creator IAM role to the directly preceding identity, with first
1449
- account from the list granting this role to the originating account (templated).
1450
- :param if_exists: What should Airflow do if the table exists. If set to `log`, the TI will be passed to
1451
- success and an error message will be logged. Set to `ignore` to ignore the error, set to `fail` to
1452
- fail the TI, and set to `skip` to skip it.
1453
- :param exists_ok: Deprecated - use `if_exists="ignore"` instead.
1454
- """
1455
-
1456
- template_fields: Sequence[str] = (
1457
- "dataset_id",
1458
- "table_id",
1459
- "table_resource",
1460
- "project_id",
1461
- "gcs_schema_object",
1462
- "labels",
1463
- "view",
1464
- "materialized_view",
1465
- "impersonation_chain",
1466
- )
1467
- template_fields_renderers = {"table_resource": "json", "materialized_view": "json"}
1468
- ui_color = BigQueryUIColors.TABLE.value
1469
- operator_extra_links = (BigQueryTableLink(),)
1470
-
1471
- def __init__(
1472
- self,
1473
- *,
1474
- dataset_id: str,
1475
- table_id: str,
1476
- table_resource: dict[str, Any] | None = None,
1477
- project_id: str = PROVIDE_PROJECT_ID,
1478
- schema_fields: list | None = None,
1479
- gcs_schema_object: str | None = None,
1480
- time_partitioning: dict | None = None,
1481
- gcp_conn_id: str = "google_cloud_default",
1482
- google_cloud_storage_conn_id: str = "google_cloud_default",
1483
- labels: dict | None = None,
1484
- view: dict | None = None,
1485
- materialized_view: dict | None = None,
1486
- encryption_configuration: dict | None = None,
1487
- location: str | None = None,
1488
- cluster_fields: list[str] | None = None,
1489
- impersonation_chain: str | Sequence[str] | None = None,
1490
- if_exists: str = "log",
1491
- bigquery_conn_id: str | None = None,
1492
- exists_ok: bool | None = None,
1493
- **kwargs,
1494
- ) -> None:
1495
- if bigquery_conn_id:
1496
- warnings.warn(
1497
- "The bigquery_conn_id parameter has been deprecated. Use the gcp_conn_id parameter instead.",
1498
- AirflowProviderDeprecationWarning,
1499
- stacklevel=2,
1500
- )
1501
- gcp_conn_id = bigquery_conn_id
1502
-
1503
- super().__init__(**kwargs)
1504
-
1505
- self.project_id = project_id
1506
- self.dataset_id = dataset_id
1507
- self.table_id = table_id
1508
- self.schema_fields = schema_fields
1509
- self.gcs_schema_object = gcs_schema_object
1510
- self.gcp_conn_id = gcp_conn_id
1511
- self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
1512
- self.time_partitioning = time_partitioning or {}
1513
- self.labels = labels
1514
- self.view = view
1515
- self.materialized_view = materialized_view
1516
- self.encryption_configuration = encryption_configuration
1517
- self.location = location
1518
- self.cluster_fields = cluster_fields
1519
- self.table_resource = table_resource
1520
- self.impersonation_chain = impersonation_chain
1521
- self._table: Table | None = None
1522
- if exists_ok is not None:
1523
- warnings.warn(
1524
- "`exists_ok` parameter is deprecated, please use `if_exists`",
1525
- AirflowProviderDeprecationWarning,
1526
- stacklevel=2,
1527
- )
1528
- self.if_exists = IfExistAction.IGNORE if exists_ok else IfExistAction.LOG
1529
- else:
1530
- self.if_exists = IfExistAction(if_exists)
1531
-
1532
- def execute(self, context: Context) -> None:
1533
- bq_hook = BigQueryHook(
1534
- gcp_conn_id=self.gcp_conn_id,
1535
- location=self.location,
1536
- impersonation_chain=self.impersonation_chain,
1537
- )
1538
-
1539
- if not self.schema_fields and self.gcs_schema_object:
1540
- gcs_bucket, gcs_object = _parse_gcs_url(self.gcs_schema_object)
1541
- gcs_hook = GCSHook(
1542
- gcp_conn_id=self.google_cloud_storage_conn_id,
1543
- impersonation_chain=self.impersonation_chain,
1544
- )
1545
- schema_fields_string = gcs_hook.download_as_byte_array(gcs_bucket, gcs_object).decode("utf-8")
1546
- schema_fields = json.loads(schema_fields_string)
1547
- else:
1548
- schema_fields = self.schema_fields
1549
-
1550
- try:
1551
- self.log.info("Creating table")
1552
- # Save table as attribute for further use by OpenLineage
1553
- self._table = bq_hook.create_empty_table(
1554
- project_id=self.project_id,
1555
- dataset_id=self.dataset_id,
1556
- table_id=self.table_id,
1557
- schema_fields=schema_fields,
1558
- time_partitioning=self.time_partitioning,
1559
- cluster_fields=self.cluster_fields,
1560
- labels=self.labels,
1561
- view=self.view,
1562
- materialized_view=self.materialized_view,
1563
- encryption_configuration=self.encryption_configuration,
1564
- table_resource=self.table_resource,
1565
- exists_ok=self.if_exists == IfExistAction.IGNORE,
1566
- )
1567
- if self._table:
1568
- persist_kwargs = {
1569
- "context": context,
1570
- "task_instance": self,
1571
- "project_id": self._table.to_api_repr()["tableReference"]["projectId"],
1572
- "dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
1573
- "table_id": self._table.to_api_repr()["tableReference"]["tableId"],
1574
- }
1575
- self.log.info(
1576
- "Table %s.%s.%s created successfully",
1577
- self._table.project,
1578
- self._table.dataset_id,
1579
- self._table.table_id,
1580
- )
1581
- else:
1582
- raise AirflowException("Table creation failed.")
1583
- except Conflict:
1584
- error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
1585
- if self.if_exists == IfExistAction.LOG:
1586
- self.log.info(error_msg)
1587
- persist_kwargs = {
1588
- "context": context,
1589
- "task_instance": self,
1590
- "project_id": self.project_id or bq_hook.project_id,
1591
- "dataset_id": self.dataset_id,
1592
- "table_id": self.table_id,
1593
- }
1594
- elif self.if_exists == IfExistAction.FAIL:
1595
- raise AirflowException(error_msg)
1596
- else:
1597
- raise AirflowSkipException(error_msg)
1598
-
1599
- BigQueryTableLink.persist(**persist_kwargs)
1600
-
1601
- def get_openlineage_facets_on_complete(self, _):
1602
- """Implement _on_complete as we will use table resource returned by create method."""
1603
- from airflow.providers.common.compat.openlineage.facet import Dataset
1604
- from airflow.providers.google.cloud.openlineage.utils import (
1605
- BIGQUERY_NAMESPACE,
1606
- get_facets_from_bq_table,
1607
- )
1608
- from airflow.providers.openlineage.extractors import OperatorLineage
1609
-
1610
- if not self._table:
1611
- self.log.debug("OpenLineage did not find `self._table` attribute.")
1612
- return OperatorLineage()
1613
-
1614
- output_dataset = Dataset(
1615
- namespace=BIGQUERY_NAMESPACE,
1616
- name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
1617
- facets=get_facets_from_bq_table(self._table),
1618
- )
1619
-
1620
- return OperatorLineage(outputs=[output_dataset])
1621
-
1622
-
1623
- @deprecated(
1624
- planned_removal_date="July 30, 2025",
1625
- use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
1626
- category=AirflowProviderDeprecationWarning,
1627
- )
1628
- class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1629
- """
1630
- Create a new external table with data from Google Cloud Storage.
1631
-
1632
- The schema to be used for the BigQuery table may be specified in one of
1633
- two ways. You may either directly pass the schema fields in, or you may
1634
- point the operator to a Google Cloud Storage object name. The object in
1635
- Google Cloud Storage must be a JSON file with the schema fields in it.
1636
-
1637
- .. seealso::
1638
- For more information on how to use this operator, take a look at the guide:
1639
- :ref:`howto/operator:BigQueryCreateExternalTableOperator`
1640
-
1641
- :param bucket: The bucket to point the external table to. (templated)
1642
- :param source_objects: List of Google Cloud Storage URIs to point
1643
- table to. If source_format is 'DATASTORE_BACKUP', the list must only contain a single URI.
1644
- :param destination_project_dataset_table: The dotted ``(<project>.)<dataset>.<table>``
1645
- BigQuery table to load data into (templated). If ``<project>`` is not included,
1646
- project will be the project defined in the connection json.
1647
- :param schema_fields: If set, the schema field list as defined here:
1648
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
1649
-
1650
- **Example**::
1651
-
1652
- schema_fields = [
1653
- {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1654
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
1655
- ]
1656
-
1657
- Should not be set when source_format is 'DATASTORE_BACKUP'.
1658
- :param table_resource: Table resource as described in documentation:
1659
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
1660
- If provided all other parameters are ignored. External schema from object will be resolved.
1661
- :param schema_object: If set, a GCS object path pointing to a .json file that
1662
- contains the schema for the table. (templated)
1663
- :param gcs_schema_bucket: GCS bucket name where the schema JSON is stored (templated).
1664
- The default value is self.bucket.
1665
- :param source_format: File format of the data.
1666
- :param autodetect: Try to detect schema and format options automatically.
1667
- The schema_fields and schema_object options will be honored when specified explicitly.
1668
- https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
1669
- :param compression: (Optional) The compression type of the data source.
1670
- Possible values include GZIP and NONE.
1671
- The default value is NONE.
1672
- This setting is ignored for Google Cloud Bigtable,
1673
- Google Cloud Datastore backups and Avro formats.
1674
- :param skip_leading_rows: Number of rows to skip when loading from a CSV.
1675
- :param field_delimiter: The delimiter to use for the CSV.
1676
- :param max_bad_records: The maximum number of bad records that BigQuery can
1677
- ignore when running the job.
1678
- :param quote_character: The value that is used to quote data sections in a CSV file.
1679
- :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false).
1680
- :param allow_jagged_rows: Accept rows that are missing trailing optional columns.
1681
- The missing values are treated as nulls. If false, records with missing trailing
1682
- columns are treated as bad records, and if there are too many bad records, an
1683
- invalid error is returned in the job result. Only applicable to CSV, ignored
1684
- for other formats.
1685
- :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
1686
- interact with the Bigquery service.
1687
- :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud
1688
- and interact with the Google Cloud Storage service.
1689
- :param src_fmt_configs: configure optional fields specific to the source format
1690
- :param labels: a dictionary containing labels for the table, passed to BigQuery
1691
- :param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
1692
-
1693
- .. code-block:: python
1694
-
1695
- encryption_configuration = {
1696
- "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1697
- }
1698
- :param location: The location used for the operation.
1699
- :param impersonation_chain: Optional service account to impersonate using short-term
1700
- credentials, or chained list of accounts required to get the access_token
1701
- of the last account in the list, which will be impersonated in the request.
1702
- If set as a string, the account must grant the originating account
1703
- the Service Account Token Creator IAM role.
1704
- If set as a sequence, the identities from the list must grant
1705
- Service Account Token Creator IAM role to the directly preceding identity, with first
1706
- account from the list granting this role to the originating account (templated).
1707
- """
1708
-
1709
- template_fields: Sequence[str] = (
1710
- "bucket",
1711
- "source_objects",
1712
- "schema_object",
1713
- "gcs_schema_bucket",
1714
- "destination_project_dataset_table",
1715
- "labels",
1716
- "table_resource",
1717
- "impersonation_chain",
1718
- )
1719
- template_fields_renderers = {"table_resource": "json"}
1720
- ui_color = BigQueryUIColors.TABLE.value
1721
- operator_extra_links = (BigQueryTableLink(),)
1722
-
1723
- def __init__(
1724
- self,
1725
- *,
1726
- bucket: str | None = None,
1727
- source_objects: list[str] | None = None,
1728
- destination_project_dataset_table: str | None = None,
1729
- table_resource: dict[str, Any] | None = None,
1730
- schema_fields: list | None = None,
1731
- schema_object: str | None = None,
1732
- gcs_schema_bucket: str | None = None,
1733
- source_format: str | None = None,
1734
- autodetect: bool = False,
1735
- compression: str | None = None,
1736
- skip_leading_rows: int | None = None,
1737
- field_delimiter: str | None = None,
1738
- max_bad_records: int = 0,
1739
- quote_character: str | None = None,
1740
- allow_quoted_newlines: bool = False,
1741
- allow_jagged_rows: bool = False,
1742
- gcp_conn_id: str = "google_cloud_default",
1743
- google_cloud_storage_conn_id: str = "google_cloud_default",
1744
- src_fmt_configs: dict | None = None,
1745
- labels: dict | None = None,
1746
- encryption_configuration: dict | None = None,
1747
- location: str | None = None,
1748
- impersonation_chain: str | Sequence[str] | None = None,
1749
- bigquery_conn_id: str | None = None,
1750
- **kwargs,
1751
- ) -> None:
1752
- if bigquery_conn_id:
1753
- warnings.warn(
1754
- "The bigquery_conn_id parameter has been deprecated. Use the gcp_conn_id parameter instead.",
1755
- AirflowProviderDeprecationWarning,
1756
- stacklevel=2,
1757
- )
1758
- gcp_conn_id = bigquery_conn_id
1759
-
1760
- super().__init__(**kwargs)
1761
-
1762
- self.table_resource = table_resource
1763
- self.bucket = bucket or ""
1764
- self.source_objects = source_objects or []
1765
- self.schema_object = schema_object or None
1766
- self.gcs_schema_bucket = gcs_schema_bucket or ""
1767
- self.destination_project_dataset_table = destination_project_dataset_table or ""
1768
-
1769
- # BQ config
1770
- kwargs_passed = any(
1771
- [
1772
- destination_project_dataset_table,
1773
- schema_fields,
1774
- source_format,
1775
- compression,
1776
- skip_leading_rows,
1777
- field_delimiter,
1778
- max_bad_records,
1779
- autodetect,
1780
- quote_character,
1781
- allow_quoted_newlines,
1782
- allow_jagged_rows,
1783
- src_fmt_configs,
1784
- labels,
1785
- encryption_configuration,
1786
- ]
1787
- )
1788
-
1789
- if not table_resource:
1790
- warnings.warn(
1791
- "Passing table parameters via keywords arguments will be deprecated. "
1792
- "Please provide table definition using `table_resource` parameter.",
1793
- AirflowProviderDeprecationWarning,
1794
- stacklevel=2,
1795
- )
1796
- if not bucket:
1797
- raise ValueError("`bucket` is required when not using `table_resource`.")
1798
- if not gcs_schema_bucket:
1799
- gcs_schema_bucket = bucket
1800
- if not source_objects:
1801
- raise ValueError("`source_objects` is required when not using `table_resource`.")
1802
- if not source_format:
1803
- source_format = "CSV"
1804
- if not compression:
1805
- compression = "NONE"
1806
- if not skip_leading_rows:
1807
- skip_leading_rows = 0
1808
- if not field_delimiter:
1809
- field_delimiter = ","
1810
- if not destination_project_dataset_table:
1811
- raise ValueError(
1812
- "`destination_project_dataset_table` is required when not using `table_resource`."
1813
- )
1814
- self.bucket = bucket
1815
- self.source_objects = source_objects
1816
- self.schema_object = schema_object
1817
- self.gcs_schema_bucket = gcs_schema_bucket
1818
- self.destination_project_dataset_table = destination_project_dataset_table
1819
- self.schema_fields = schema_fields
1820
- self.source_format = source_format
1821
- self.compression = compression
1822
- self.skip_leading_rows = skip_leading_rows
1823
- self.field_delimiter = field_delimiter
1824
- self.table_resource = None
1825
- else:
1826
- pass
1827
-
1828
- if table_resource and kwargs_passed:
1829
- raise ValueError("You provided both `table_resource` and exclusive keywords arguments.")
1830
-
1831
- self.max_bad_records = max_bad_records
1832
- self.quote_character = quote_character
1833
- self.allow_quoted_newlines = allow_quoted_newlines
1834
- self.allow_jagged_rows = allow_jagged_rows
1835
- self.gcp_conn_id = gcp_conn_id
1836
- self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
1837
- self.autodetect = autodetect
1838
-
1839
- self.src_fmt_configs = src_fmt_configs or {}
1840
- self.labels = labels
1841
- self.encryption_configuration = encryption_configuration
1842
- self.location = location
1843
- self.impersonation_chain = impersonation_chain
1844
- self._table: Table | None = None
1845
-
1846
- def execute(self, context: Context) -> None:
1847
- bq_hook = BigQueryHook(
1848
- gcp_conn_id=self.gcp_conn_id,
1849
- location=self.location,
1850
- impersonation_chain=self.impersonation_chain,
1851
- )
1852
- if self.table_resource:
1853
- # Save table as attribute for further use by OpenLineage
1854
- self._table = bq_hook.create_empty_table(
1855
- table_resource=self.table_resource,
1856
- )
1857
- if self._table:
1858
- BigQueryTableLink.persist(
1859
- context=context,
1860
- task_instance=self,
1861
- dataset_id=self._table.dataset_id,
1862
- project_id=self._table.project,
1863
- table_id=self._table.table_id,
1864
- )
1865
- return
1866
-
1867
- if not self.schema_fields and self.schema_object and self.source_format != "DATASTORE_BACKUP":
1868
- gcs_hook = GCSHook(
1869
- gcp_conn_id=self.google_cloud_storage_conn_id,
1870
- impersonation_chain=self.impersonation_chain,
1871
- )
1872
- schema_fields = json.loads(
1873
- gcs_hook.download(self.gcs_schema_bucket, self.schema_object).decode("utf-8")
1874
- )
1875
- else:
1876
- schema_fields = self.schema_fields
1877
-
1878
- source_uris = [f"gs://{self.bucket}/{source_object}" for source_object in self.source_objects]
1879
-
1880
- project_id, dataset_id, table_id = bq_hook.split_tablename(
1881
- table_input=self.destination_project_dataset_table,
1882
- default_project_id=bq_hook.project_id or "",
1883
- )
1884
-
1885
- external_data_configuration = {
1886
- "source_uris": source_uris,
1887
- "source_format": self.source_format,
1888
- "autodetect": self.autodetect,
1889
- "compression": self.compression,
1890
- "maxBadRecords": self.max_bad_records,
1891
- }
1892
- if self.source_format == "CSV":
1893
- external_data_configuration["csvOptions"] = {
1894
- "fieldDelimiter": self.field_delimiter,
1895
- "skipLeadingRows": self.skip_leading_rows,
1896
- "quote": self.quote_character,
1897
- "allowQuotedNewlines": self.allow_quoted_newlines,
1898
- "allowJaggedRows": self.allow_jagged_rows,
1899
- }
1900
-
1901
- table_resource = {
1902
- "tableReference": {
1903
- "projectId": project_id,
1904
- "datasetId": dataset_id,
1905
- "tableId": table_id,
1906
- },
1907
- "labels": self.labels,
1908
- "schema": {"fields": schema_fields},
1909
- "externalDataConfiguration": external_data_configuration,
1910
- "location": self.location,
1911
- "encryptionConfiguration": self.encryption_configuration,
1912
- }
1913
-
1914
- # Save table as attribute for further use by OpenLineage
1915
- self._table = bq_hook.create_empty_table(table_resource=table_resource)
1916
- if self._table:
1917
- BigQueryTableLink.persist(
1918
- context=context,
1919
- task_instance=self,
1920
- dataset_id=self._table.dataset_id,
1921
- project_id=self._table.project,
1922
- table_id=self._table.table_id,
1923
- )
1924
-
1925
- def get_openlineage_facets_on_complete(self, _):
1926
- """Implement _on_complete as we will use table resource returned by create method."""
1927
- from airflow.providers.common.compat.openlineage.facet import Dataset
1928
- from airflow.providers.google.cloud.openlineage.utils import (
1929
- BIGQUERY_NAMESPACE,
1930
- get_facets_from_bq_table,
1931
- )
1932
- from airflow.providers.openlineage.extractors import OperatorLineage
1933
-
1934
- output_dataset = Dataset(
1935
- namespace=BIGQUERY_NAMESPACE,
1936
- name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
1937
- facets=get_facets_from_bq_table(self._table),
1938
- )
1939
-
1940
- return OperatorLineage(outputs=[output_dataset])
1941
-
1942
-
1943
1380
  class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1944
1381
  """
1945
1382
  Delete an existing dataset from your Project in BigQuery.
@@ -1981,6 +1418,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1981
1418
  template_fields: Sequence[str] = (
1982
1419
  "dataset_id",
1983
1420
  "project_id",
1421
+ "gcp_conn_id",
1984
1422
  "impersonation_chain",
1985
1423
  )
1986
1424
  ui_color = BigQueryUIColors.DATASET.value
@@ -2060,6 +1498,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
2060
1498
  "dataset_id",
2061
1499
  "project_id",
2062
1500
  "dataset_reference",
1501
+ "gcp_conn_id",
2063
1502
  "impersonation_chain",
2064
1503
  )
2065
1504
  template_fields_renderers = {"dataset_reference": "json"}
@@ -2114,7 +1553,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
2114
1553
  )
2115
1554
  persist_kwargs = {
2116
1555
  "context": context,
2117
- "task_instance": self,
2118
1556
  "project_id": dataset["datasetReference"]["projectId"],
2119
1557
  "dataset_id": dataset["datasetReference"]["datasetId"],
2120
1558
  }
@@ -2126,7 +1564,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
2126
1564
  )
2127
1565
  persist_kwargs = {
2128
1566
  "context": context,
2129
- "task_instance": self,
2130
1567
  "project_id": project_id,
2131
1568
  "dataset_id": dataset_id,
2132
1569
  }
@@ -2166,6 +1603,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
2166
1603
  template_fields: Sequence[str] = (
2167
1604
  "dataset_id",
2168
1605
  "project_id",
1606
+ "gcp_conn_id",
2169
1607
  "impersonation_chain",
2170
1608
  )
2171
1609
  ui_color = BigQueryUIColors.DATASET.value
@@ -2198,7 +1636,6 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
2198
1636
  dataset_api_repr = dataset.to_api_repr()
2199
1637
  BigQueryDatasetLink.persist(
2200
1638
  context=context,
2201
- task_instance=self,
2202
1639
  dataset_id=dataset_api_repr["datasetReference"]["datasetId"],
2203
1640
  project_id=dataset_api_repr["datasetReference"]["projectId"],
2204
1641
  )
@@ -2231,6 +1668,7 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
2231
1668
  template_fields: Sequence[str] = (
2232
1669
  "dataset_id",
2233
1670
  "project_id",
1671
+ "gcp_conn_id",
2234
1672
  "impersonation_chain",
2235
1673
  )
2236
1674
  ui_color = BigQueryUIColors.DATASET.value
@@ -2301,6 +1739,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2301
1739
  "dataset_id",
2302
1740
  "table_id",
2303
1741
  "project_id",
1742
+ "gcp_conn_id",
2304
1743
  "impersonation_chain",
2305
1744
  )
2306
1745
  template_fields_renderers = {"table_resource": "json"}
@@ -2347,7 +1786,6 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2347
1786
  if self._table:
2348
1787
  BigQueryTableLink.persist(
2349
1788
  context=context,
2350
- task_instance=self,
2351
1789
  dataset_id=self._table["tableReference"]["datasetId"],
2352
1790
  project_id=self._table["tableReference"]["projectId"],
2353
1791
  table_id=self._table["tableReference"]["tableId"],
@@ -2408,6 +1846,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
2408
1846
  template_fields: Sequence[str] = (
2409
1847
  "dataset_id",
2410
1848
  "project_id",
1849
+ "gcp_conn_id",
2411
1850
  "impersonation_chain",
2412
1851
  )
2413
1852
  template_fields_renderers = {"dataset_resource": "json"}
@@ -2450,7 +1889,6 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
2450
1889
  dataset_api_repr = dataset.to_api_repr()
2451
1890
  BigQueryDatasetLink.persist(
2452
1891
  context=context,
2453
- task_instance=self,
2454
1892
  dataset_id=dataset_api_repr["datasetReference"]["datasetId"],
2455
1893
  project_id=dataset_api_repr["datasetReference"]["projectId"],
2456
1894
  )
@@ -2484,6 +1922,7 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
2484
1922
 
2485
1923
  template_fields: Sequence[str] = (
2486
1924
  "deletion_dataset_table",
1925
+ "gcp_conn_id",
2487
1926
  "impersonation_chain",
2488
1927
  )
2489
1928
  ui_color = BigQueryUIColors.TABLE.value
@@ -2578,6 +2017,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2578
2017
  template_fields: Sequence[str] = (
2579
2018
  "dataset_id",
2580
2019
  "table_resource",
2020
+ "gcp_conn_id",
2581
2021
  "impersonation_chain",
2582
2022
  "project_id",
2583
2023
  )
@@ -2622,7 +2062,6 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2622
2062
  if self._table:
2623
2063
  BigQueryTableLink.persist(
2624
2064
  context=context,
2625
- task_instance=self,
2626
2065
  dataset_id=self._table["tableReference"]["datasetId"],
2627
2066
  project_id=self._table["tableReference"]["projectId"],
2628
2067
  table_id=self._table["tableReference"]["tableId"],
@@ -2706,6 +2145,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2706
2145
  "dataset_id",
2707
2146
  "table_id",
2708
2147
  "project_id",
2148
+ "gcp_conn_id",
2709
2149
  "impersonation_chain",
2710
2150
  )
2711
2151
  template_fields_renderers = {"schema_fields_updates": "json"}
@@ -2752,7 +2192,6 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2752
2192
  if self._table:
2753
2193
  BigQueryTableLink.persist(
2754
2194
  context=context,
2755
- task_instance=self,
2756
2195
  dataset_id=self._table["tableReference"]["datasetId"],
2757
2196
  project_id=self._table["tableReference"]["projectId"],
2758
2197
  table_id=self._table["tableReference"]["tableId"],
@@ -2836,6 +2275,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2836
2275
  template_fields: Sequence[str] = (
2837
2276
  "configuration",
2838
2277
  "job_id",
2278
+ "gcp_conn_id",
2839
2279
  "impersonation_chain",
2840
2280
  "project_id",
2841
2281
  )
@@ -2895,7 +2335,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2895
2335
 
2896
2336
  def _add_job_labels(self) -> None:
2897
2337
  dag_label = self.dag_id.lower()
2898
- task_label = self.task_id.lower()
2338
+ task_label = self.task_id.lower().replace(".", "-")
2899
2339
 
2900
2340
  if LABEL_REGEX.match(dag_label) and LABEL_REGEX.match(task_label):
2901
2341
  automatic_labels = {"airflow-dag": dag_label, "airflow-task": task_label}
@@ -2947,8 +2387,9 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2947
2387
  job_id=self.job_id,
2948
2388
  dag_id=self.dag_id,
2949
2389
  task_id=self.task_id,
2950
- logical_date=context["logical_date"],
2390
+ logical_date=None,
2951
2391
  configuration=self.configuration,
2392
+ run_after=hook.get_run_after_or_logical_date(context),
2952
2393
  force_rerun=self.force_rerun,
2953
2394
  )
2954
2395
 
@@ -2974,14 +2415,13 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2974
2415
  f"Or, if you want to reattach in this scenario add {job.state} to `reattach_states`"
2975
2416
  )
2976
2417
 
2977
- else:
2978
- # Job already reached state DONE
2979
- if job.state == "DONE":
2980
- raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
2418
+ # Job already reached state DONE
2419
+ if job.state == "DONE":
2420
+ raise AirflowException("Job is already in state DONE. Can not reattach to this job.")
2981
2421
 
2982
- # We are reattaching to a job
2983
- self.log.info("Reattaching to existing Job in state %s", job.state)
2984
- self._handle_job_error(job)
2422
+ # We are reattaching to a job
2423
+ self.log.info("Reattaching to existing Job in state %s", job.state)
2424
+ self._handle_job_error(job)
2985
2425
 
2986
2426
  job_types = {
2987
2427
  LoadJob._JOB_TYPE: ["sourceTable", "destinationTable"],
@@ -2999,7 +2439,6 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2999
2439
  table = job_configuration[job_type][table_prop]
3000
2440
  persist_kwargs = {
3001
2441
  "context": context,
3002
- "task_instance": self,
3003
2442
  "project_id": self.project_id,
3004
2443
  "table_id": table,
3005
2444
  }
@@ -3013,7 +2452,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
3013
2452
 
3014
2453
  if self.project_id:
3015
2454
  job_id_path = convert_job_id(
3016
- job_id=self.job_id, # type: ignore[arg-type]
2455
+ job_id=self.job_id,
3017
2456
  project_id=self.project_id,
3018
2457
  location=self.location,
3019
2458
  )
@@ -3021,7 +2460,6 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
3021
2460
 
3022
2461
  persist_kwargs = {
3023
2462
  "context": context,
3024
- "task_instance": self,
3025
2463
  "project_id": self.project_id,
3026
2464
  "location": self.location,
3027
2465
  "job_id": self.job_id,
@@ -3034,24 +2472,23 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
3034
2472
  self._handle_job_error(job)
3035
2473
 
3036
2474
  return self.job_id
3037
- else:
3038
- if job.running():
3039
- self.defer(
3040
- timeout=self.execution_timeout,
3041
- trigger=BigQueryInsertJobTrigger(
3042
- conn_id=self.gcp_conn_id,
3043
- job_id=self.job_id,
3044
- project_id=self.project_id,
3045
- location=self.location or hook.location,
3046
- poll_interval=self.poll_interval,
3047
- impersonation_chain=self.impersonation_chain,
3048
- cancel_on_kill=self.cancel_on_kill,
3049
- ),
3050
- method_name="execute_complete",
3051
- )
3052
- self.log.info("Current state of job %s is %s", job.job_id, job.state)
3053
- self._handle_job_error(job)
3054
- return self.job_id
2475
+ if job.running():
2476
+ self.defer(
2477
+ timeout=self.execution_timeout,
2478
+ trigger=BigQueryInsertJobTrigger(
2479
+ conn_id=self.gcp_conn_id,
2480
+ job_id=self.job_id,
2481
+ project_id=self.project_id,
2482
+ location=self.location or hook.location,
2483
+ poll_interval=self.poll_interval,
2484
+ impersonation_chain=self.impersonation_chain,
2485
+ cancel_on_kill=self.cancel_on_kill,
2486
+ ),
2487
+ method_name="execute_complete",
2488
+ )
2489
+ self.log.info("Current state of job %s is %s", job.job_id, job.state)
2490
+ self._handle_job_error(job)
2491
+ return self.job_id
3055
2492
 
3056
2493
  def execute_complete(self, context: Context, event: dict[str, Any]) -> str | None:
3057
2494
  """