apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -20,41 +20,20 @@
20
20
 
21
21
  from __future__ import annotations
22
22
 
23
- import asyncio
24
23
  import json
25
24
  import logging
26
25
  import re
27
26
  import time
28
27
  import uuid
28
+ import warnings
29
29
  from collections.abc import Iterable, Mapping, Sequence
30
30
  from copy import deepcopy
31
31
  from datetime import datetime, timedelta
32
- from typing import TYPE_CHECKING, Any, NoReturn, Union, cast
32
+ from typing import TYPE_CHECKING, Any, Literal, NoReturn, cast, overload
33
33
 
34
+ import pendulum
34
35
  from aiohttp import ClientSession as ClientSession
35
36
  from gcloud.aio.bigquery import Job, Table as Table_async
36
- from googleapiclient.discovery import build
37
- from pandas_gbq import read_gbq
38
- from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
39
- from requests import Session
40
- from sqlalchemy import create_engine
41
-
42
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
43
- from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
44
- from airflow.providers.common.sql.hooks.sql import DbApiHook
45
- from airflow.providers.google.cloud.utils.bigquery import bq_cast
46
- from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes
47
- from airflow.providers.google.common.consts import CLIENT_INFO
48
- from airflow.providers.google.common.deprecated import deprecated
49
- from airflow.providers.google.common.hooks.base_google import (
50
- PROVIDE_PROJECT_ID,
51
- GoogleBaseAsyncHook,
52
- GoogleBaseHook,
53
- get_field,
54
- )
55
- from airflow.utils.hashlib_wrapper import md5
56
- from airflow.utils.helpers import convert_camel_to_snake
57
- from airflow.utils.log.logging_mixin import LoggingMixin
58
37
  from google.cloud.bigquery import (
59
38
  DEFAULT_RETRY,
60
39
  Client,
@@ -75,16 +54,47 @@ from google.cloud.bigquery.table import (
75
54
  TableReference,
76
55
  )
77
56
  from google.cloud.exceptions import NotFound
57
+ from googleapiclient.discovery import build
58
+ from pandas_gbq import read_gbq
59
+ from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
60
+ from sqlalchemy import create_engine
61
+
62
+ from airflow.exceptions import (
63
+ AirflowException,
64
+ AirflowOptionalProviderFeatureException,
65
+ AirflowProviderDeprecationWarning,
66
+ )
67
+ from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
68
+ from airflow.providers.common.sql.hooks.sql import DbApiHook
69
+ from airflow.providers.google.cloud.utils.bigquery import bq_cast
70
+ from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes
71
+ from airflow.providers.google.common.consts import CLIENT_INFO
72
+ from airflow.providers.google.common.deprecated import deprecated
73
+ from airflow.providers.google.common.hooks.base_google import (
74
+ _UNSET,
75
+ PROVIDE_PROJECT_ID,
76
+ GoogleBaseAsyncHook,
77
+ GoogleBaseHook,
78
+ get_field,
79
+ )
80
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
81
+ from airflow.utils.hashlib_wrapper import md5
82
+ from airflow.utils.helpers import convert_camel_to_snake
83
+ from airflow.utils.log.logging_mixin import LoggingMixin
84
+ from airflow.utils.types import DagRunType
78
85
 
79
86
  if TYPE_CHECKING:
80
87
  import pandas as pd
81
-
88
+ import polars as pl
82
89
  from google.api_core.page_iterator import HTTPIterator
83
90
  from google.api_core.retry import Retry
91
+ from requests import Session
92
+
93
+ from airflow.sdk import Context
84
94
 
85
95
  log = logging.getLogger(__name__)
86
96
 
87
- BigQueryJob = Union[CopyJob, QueryJob, LoadJob, ExtractJob]
97
+ BigQueryJob = CopyJob | QueryJob | LoadJob | ExtractJob
88
98
 
89
99
 
90
100
  class BigQueryHook(GoogleBaseHook, DbApiHook):
@@ -121,10 +131,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
121
131
  from wtforms import validators
122
132
  from wtforms.fields.simple import BooleanField, StringField
123
133
 
124
- from airflow.www.validators import ValidJson
134
+ from airflow.providers.google.cloud.utils.validators import ValidJson
125
135
 
126
136
  connection_form_widgets = super().get_connection_form_widgets()
127
- connection_form_widgets["use_legacy_sql"] = BooleanField(lazy_gettext("Use Legacy SQL"), default=True)
137
+ connection_form_widgets["use_legacy_sql"] = BooleanField(lazy_gettext("Use Legacy SQL"))
128
138
  connection_form_widgets["location"] = StringField(
129
139
  lazy_gettext("Location"), widget=BS3TextFieldWidget()
130
140
  )
@@ -152,21 +162,47 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
152
162
 
153
163
  def __init__(
154
164
  self,
155
- use_legacy_sql: bool = True,
156
- location: str | None = None,
157
- priority: str = "INTERACTIVE",
158
- api_resource_configs: dict | None = None,
165
+ use_legacy_sql: bool | object = _UNSET,
166
+ location: str | None | object = _UNSET,
167
+ priority: str | object = _UNSET,
168
+ api_resource_configs: dict | None | object = _UNSET,
159
169
  impersonation_scopes: str | Sequence[str] | None = None,
160
- labels: dict | None = None,
170
+ labels: dict | None | object = _UNSET,
161
171
  **kwargs,
162
172
  ) -> None:
163
173
  super().__init__(**kwargs)
164
- self.use_legacy_sql: bool = self._get_field("use_legacy_sql", use_legacy_sql)
165
- self.location: str | None = self._get_field("location", location)
166
- self.priority: str = self._get_field("priority", priority)
174
+ # Use sentinel pattern to distinguish "not provided" from "explicitly provided"
175
+ if use_legacy_sql is _UNSET:
176
+ value = self._get_field("use_legacy_sql", _UNSET)
177
+ self.use_legacy_sql: bool = value if value is not None else True
178
+ else:
179
+ self.use_legacy_sql = use_legacy_sql # type: ignore[assignment]
180
+
181
+ if location is _UNSET:
182
+ self.location: str | None = self._get_field("location", _UNSET)
183
+ else:
184
+ self.location = location # type: ignore[assignment]
185
+
186
+ if priority is _UNSET:
187
+ value = self._get_field("priority", _UNSET)
188
+ self.priority: str = value if value is not None else "INTERACTIVE"
189
+ else:
190
+ self.priority = priority # type: ignore[assignment]
191
+
167
192
  self.running_job_id: str | None = None
168
- self.api_resource_configs: dict = self._get_field("api_resource_configs", api_resource_configs or {})
169
- self.labels = self._get_field("labels", labels or {})
193
+
194
+ if api_resource_configs is _UNSET:
195
+ value = self._get_field("api_resource_configs", _UNSET)
196
+ self.api_resource_configs: dict = value if value is not None else {}
197
+ else:
198
+ self.api_resource_configs = api_resource_configs or {} # type: ignore[assignment]
199
+
200
+ if labels is _UNSET:
201
+ value = self._get_field("labels", _UNSET)
202
+ self.labels = value if value is not None else {}
203
+ else:
204
+ self.labels = labels or {} # type: ignore[assignment]
205
+
170
206
  self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
171
207
 
172
208
  def get_conn(self) -> BigQueryConnection:
@@ -276,15 +312,57 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
276
312
  """
277
313
  raise NotImplementedError()
278
314
 
279
- def get_pandas_df(
315
+ def _get_pandas_df(
280
316
  self,
281
317
  sql: str,
282
318
  parameters: Iterable | Mapping[str, Any] | None = None,
283
319
  dialect: str | None = None,
284
320
  **kwargs,
285
321
  ) -> pd.DataFrame:
322
+ if dialect is None:
323
+ dialect = "legacy" if self.use_legacy_sql else "standard"
324
+
325
+ credentials, project_id = self.get_credentials_and_project_id()
326
+
327
+ return read_gbq(sql, project_id=project_id, dialect=dialect, credentials=credentials, **kwargs)
328
+
329
+ def _get_polars_df(self, sql, parameters=None, dialect=None, **kwargs) -> pl.DataFrame:
330
+ try:
331
+ import polars as pl
332
+ except ImportError:
333
+ raise AirflowOptionalProviderFeatureException(
334
+ "Polars is not installed. Please install it with `pip install polars`."
335
+ )
336
+
337
+ if dialect is None:
338
+ dialect = "legacy" if self.use_legacy_sql else "standard"
339
+
340
+ credentials, project_id = self.get_credentials_and_project_id()
341
+
342
+ pandas_df = read_gbq(sql, project_id=project_id, dialect=dialect, credentials=credentials, **kwargs)
343
+ return pl.from_pandas(pandas_df)
344
+
345
+ @overload
346
+ def get_df(
347
+ self, sql, parameters=None, dialect=None, *, df_type: Literal["pandas"] = "pandas", **kwargs
348
+ ) -> pd.DataFrame: ...
349
+
350
+ @overload
351
+ def get_df(
352
+ self, sql, parameters=None, dialect=None, *, df_type: Literal["polars"], **kwargs
353
+ ) -> pl.DataFrame: ...
354
+
355
+ def get_df(
356
+ self,
357
+ sql,
358
+ parameters=None,
359
+ dialect=None,
360
+ *,
361
+ df_type: Literal["pandas", "polars"] = "pandas",
362
+ **kwargs,
363
+ ) -> pd.DataFrame | pl.DataFrame:
286
364
  """
287
- Get a Pandas DataFrame for the BigQuery results.
365
+ Get a DataFrame for the BigQuery results.
288
366
 
289
367
  The DbApiHook method must be overridden because Pandas doesn't support
290
368
  PEP 249 connections, except for SQLite.
@@ -300,12 +378,19 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
300
378
  defaults to use `self.use_legacy_sql` if not specified
301
379
  :param kwargs: (optional) passed into pandas_gbq.read_gbq method
302
380
  """
303
- if dialect is None:
304
- dialect = "legacy" if self.use_legacy_sql else "standard"
381
+ if df_type == "polars":
382
+ return self._get_polars_df(sql, parameters, dialect, **kwargs)
305
383
 
306
- credentials, project_id = self.get_credentials_and_project_id()
384
+ if df_type == "pandas":
385
+ return self._get_pandas_df(sql, parameters, dialect, **kwargs)
307
386
 
308
- return read_gbq(sql, project_id=project_id, dialect=dialect, credentials=credentials, **kwargs)
387
+ @deprecated(
388
+ planned_removal_date="November 30, 2025",
389
+ use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_df",
390
+ category=AirflowProviderDeprecationWarning,
391
+ )
392
+ def get_pandas_df(self, sql, parameters=None, dialect=None, **kwargs):
393
+ return self._get_pandas_df(sql, parameters, dialect, **kwargs)
309
394
 
310
395
  @GoogleBaseHook.fallback_to_default_project_id
311
396
  def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool:
@@ -347,135 +432,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
347
432
  except NotFound:
348
433
  return False
349
434
 
350
- @deprecated(
351
- planned_removal_date="July 30, 2025",
352
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_table",
353
- category=AirflowProviderDeprecationWarning,
354
- )
355
- @GoogleBaseHook.fallback_to_default_project_id
356
- def create_empty_table(
357
- self,
358
- project_id: str = PROVIDE_PROJECT_ID,
359
- dataset_id: str | None = None,
360
- table_id: str | None = None,
361
- table_resource: dict[str, Any] | None = None,
362
- schema_fields: list | None = None,
363
- time_partitioning: dict | None = None,
364
- cluster_fields: list[str] | None = None,
365
- labels: dict | None = None,
366
- view: dict | None = None,
367
- materialized_view: dict | None = None,
368
- encryption_configuration: dict | None = None,
369
- retry: Retry = DEFAULT_RETRY,
370
- location: str | None = None,
371
- exists_ok: bool = True,
372
- ) -> Table:
373
- """
374
- Create a new, empty table in the dataset.
375
-
376
- To create a view, which is defined by a SQL query, parse a dictionary to
377
- the *view* argument.
378
-
379
- :param project_id: The project to create the table into.
380
- :param dataset_id: The dataset to create the table into.
381
- :param table_id: The Name of the table to be created.
382
- :param table_resource: Table resource as described in documentation:
383
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
384
- If provided all other parameters are ignored.
385
- :param schema_fields: If set, the schema field list as defined here:
386
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
387
-
388
- .. code-block:: python
389
-
390
- schema_fields = [
391
- {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
392
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
393
- ]
394
-
395
- :param labels: a dictionary containing labels for the table, passed to BigQuery
396
- :param retry: Optional. How to retry the RPC.
397
- :param time_partitioning: configure optional time partitioning fields i.e.
398
- partition by field, type and expiration as per API specifications.
399
-
400
- .. seealso::
401
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
402
- :param cluster_fields: [Optional] The fields used for clustering.
403
- BigQuery supports clustering for both partitioned and
404
- non-partitioned tables.
405
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields
406
- :param view: [Optional] A dictionary containing definition for the view.
407
- If set, it will create a view instead of a table:
408
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
409
-
410
- .. code-block:: python
411
-
412
- view = {
413
- "query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000",
414
- "useLegacySql": False,
415
- }
416
-
417
- :param materialized_view: [Optional] The materialized view definition.
418
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
419
-
420
- .. code-block:: python
421
-
422
- encryption_configuration = {
423
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
424
- }
425
-
426
- :param num_retries: Maximum number of retries in case of connection problems.
427
- :param location: (Optional) The geographic location where the table should reside.
428
- :param exists_ok: If ``True``, ignore "already exists" errors when creating the table.
429
- :return: Created table
430
- """
431
- _table_resource: dict[str, Any] = {}
432
-
433
- if self.location:
434
- _table_resource["location"] = self.location
435
-
436
- if schema_fields:
437
- _table_resource["schema"] = {"fields": schema_fields}
438
-
439
- if time_partitioning:
440
- _table_resource["timePartitioning"] = time_partitioning
441
-
442
- if cluster_fields:
443
- _table_resource["clustering"] = {"fields": cluster_fields}
444
-
445
- if labels:
446
- _table_resource["labels"] = labels
447
-
448
- if view:
449
- _table_resource["view"] = view
450
-
451
- if materialized_view:
452
- _table_resource["materializedView"] = materialized_view
453
-
454
- if encryption_configuration:
455
- _table_resource["encryptionConfiguration"] = encryption_configuration
456
-
457
- table_resource = table_resource or _table_resource
458
- table_resource = self._resolve_table_reference(
459
- table_resource=table_resource,
460
- project_id=project_id,
461
- dataset_id=dataset_id,
462
- table_id=table_id,
463
- )
464
- table = Table.from_api_repr(table_resource)
465
- result = self.get_client(project_id=project_id, location=location).create_table(
466
- table=table, exists_ok=exists_ok, retry=retry
467
- )
468
- get_hook_lineage_collector().add_output_asset(
469
- context=self,
470
- scheme="bigquery",
471
- asset_kwargs={
472
- "project_id": result.project,
473
- "dataset_id": result.dataset_id,
474
- "table_id": result.table_id,
475
- },
476
- )
477
- return result
478
-
479
435
  @GoogleBaseHook.fallback_to_default_project_id
480
436
  def create_table(
481
437
  self,
@@ -862,7 +818,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
862
818
  if return_iterator:
863
819
  # The iterator returned by list_datasets() is a HTTPIterator but annotated
864
820
  # as Iterator
865
- return iterator # type: ignore
821
+ return iterator # type: ignore
866
822
 
867
823
  datasets_list = list(iterator)
868
824
  self.log.info("Datasets List: %s", len(datasets_list))
@@ -1350,7 +1306,16 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1350
1306
  job_api_repr.result(timeout=timeout, retry=retry)
1351
1307
  return job_api_repr
1352
1308
 
1353
- def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, force_rerun=False) -> str:
1309
+ def generate_job_id(
1310
+ self,
1311
+ job_id: str | None,
1312
+ dag_id: str,
1313
+ task_id: str,
1314
+ logical_date: datetime | None,
1315
+ configuration: dict,
1316
+ run_after: pendulum.DateTime | datetime | None = None,
1317
+ force_rerun: bool = False,
1318
+ ) -> str:
1354
1319
  if force_rerun:
1355
1320
  hash_base = str(uuid.uuid4())
1356
1321
  else:
@@ -1361,10 +1326,31 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1361
1326
  if job_id:
1362
1327
  return f"{job_id}_{uniqueness_suffix}"
1363
1328
 
1364
- exec_date = logical_date.isoformat()
1365
- job_id = f"airflow_{dag_id}_{task_id}_{exec_date}_{uniqueness_suffix}"
1329
+ if logical_date is not None:
1330
+ if AIRFLOW_V_3_0_PLUS:
1331
+ warnings.warn(
1332
+ "The 'logical_date' parameter is deprecated. Please use 'run_after' instead.",
1333
+ AirflowProviderDeprecationWarning,
1334
+ stacklevel=1,
1335
+ )
1336
+ job_id_timestamp = logical_date
1337
+ elif run_after is not None:
1338
+ job_id_timestamp = run_after
1339
+ else:
1340
+ job_id_timestamp = pendulum.now("UTC")
1341
+
1342
+ job_id = f"airflow_{dag_id}_{task_id}_{job_id_timestamp.isoformat()}_{uniqueness_suffix}"
1366
1343
  return re.sub(r"[:\-+.]", "_", job_id)
1367
1344
 
1345
+ def get_run_after_or_logical_date(self, context: Context) -> pendulum.DateTime | datetime | None:
1346
+ dag_run = context.get("dag_run")
1347
+ if not dag_run:
1348
+ return pendulum.now("UTC")
1349
+
1350
+ if AIRFLOW_V_3_0_PLUS:
1351
+ return dag_run.start_date
1352
+ return dag_run.start_date if dag_run.run_type == DagRunType.SCHEDULED else context.get("logical_date")
1353
+
1368
1354
  def split_tablename(
1369
1355
  self, table_input: str, default_project_id: str, var_name: str | None = None
1370
1356
  ) -> tuple[str, str, str]:
@@ -1377,8 +1363,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1377
1363
  def var_print(var_name):
1378
1364
  if var_name is None:
1379
1365
  return ""
1380
- else:
1381
- return f"Format exception for {var_name}: "
1366
+ return f"Format exception for {var_name}: "
1382
1367
 
1383
1368
  if table_input.count(".") + table_input.count(":") > 3:
1384
1369
  raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
@@ -1779,6 +1764,7 @@ class BigQueryCursor(BigQueryBaseCursor):
1779
1764
  schema_update_options: Iterable | None = None,
1780
1765
  priority: str | None = None,
1781
1766
  time_partitioning: dict | None = None,
1767
+ range_partitioning: dict | None = None,
1782
1768
  api_resource_configs: dict | None = None,
1783
1769
  cluster_fields: list[str] | None = None,
1784
1770
  encryption_configuration: dict | None = None,
@@ -1791,6 +1777,10 @@ class BigQueryCursor(BigQueryBaseCursor):
1791
1777
 
1792
1778
  if time_partitioning is None:
1793
1779
  time_partitioning = {}
1780
+ if range_partitioning is None:
1781
+ range_partitioning = {}
1782
+ if time_partitioning and range_partitioning:
1783
+ raise ValueError("Only one of time_partitioning or range_partitioning can be set.")
1794
1784
 
1795
1785
  if not api_resource_configs:
1796
1786
  api_resource_configs = self.hook.api_resource_configs
@@ -1820,14 +1810,6 @@ class BigQueryCursor(BigQueryBaseCursor):
1820
1810
  f" Please only use one or more of the following options: {allowed_schema_update_options}"
1821
1811
  )
1822
1812
 
1823
- if schema_update_options:
1824
- if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
1825
- raise ValueError(
1826
- "schema_update_options is only "
1827
- "allowed if write_disposition is "
1828
- "'WRITE_APPEND' or 'WRITE_TRUNCATE'."
1829
- )
1830
-
1831
1813
  if destination_dataset_table:
1832
1814
  destination_project, destination_dataset, destination_table = self.hook.split_tablename(
1833
1815
  table_input=destination_dataset_table, default_project_id=self.project_id
@@ -1851,16 +1833,21 @@ class BigQueryCursor(BigQueryBaseCursor):
1851
1833
  (maximum_billing_tier, "maximumBillingTier", None, int),
1852
1834
  (maximum_bytes_billed, "maximumBytesBilled", None, float),
1853
1835
  (time_partitioning, "timePartitioning", {}, dict),
1836
+ (range_partitioning, "rangePartitioning", {}, dict),
1854
1837
  (schema_update_options, "schemaUpdateOptions", None, list),
1855
1838
  (destination_dataset_table, "destinationTable", None, dict),
1856
1839
  (cluster_fields, "clustering", None, dict),
1857
1840
  ]
1858
1841
 
1859
- for param, param_name, param_default, param_type in query_param_list:
1860
- if param_name not in configuration["query"] and param in [None, {}, ()]:
1842
+ for param_raw, param_name, param_default, param_type in query_param_list:
1843
+ param: Any
1844
+ if param_name not in configuration["query"] and param_raw in [None, {}, ()]:
1861
1845
  if param_name == "timePartitioning":
1862
- param_default = _cleanse_time_partitioning(destination_dataset_table, time_partitioning)
1863
- param = param_default
1846
+ param = _cleanse_time_partitioning(destination_dataset_table, time_partitioning)
1847
+ else:
1848
+ param = param_default
1849
+ else:
1850
+ param = param_raw
1864
1851
 
1865
1852
  if param in [None, {}, ()]:
1866
1853
  continue
@@ -1887,15 +1874,14 @@ class BigQueryCursor(BigQueryBaseCursor):
1887
1874
  "must be a dict with {'projectId':'', "
1888
1875
  "'datasetId':'', 'tableId':''}"
1889
1876
  )
1890
- else:
1891
- configuration["query"].update(
1892
- {
1893
- "allowLargeResults": allow_large_results,
1894
- "flattenResults": flatten_results,
1895
- "writeDisposition": write_disposition,
1896
- "createDisposition": create_disposition,
1897
- }
1898
- )
1877
+ configuration["query"].update(
1878
+ {
1879
+ "allowLargeResults": allow_large_results,
1880
+ "flattenResults": flatten_results,
1881
+ "writeDisposition": write_disposition,
1882
+ "createDisposition": create_disposition,
1883
+ }
1884
+ )
1899
1885
 
1900
1886
  if (
1901
1887
  "useLegacySql" in configuration["query"]
@@ -1939,75 +1925,6 @@ def _escape(s: str) -> str:
1939
1925
  return e
1940
1926
 
1941
1927
 
1942
- @deprecated(
1943
- planned_removal_date="April 01, 2025",
1944
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.split_tablename",
1945
- category=AirflowProviderDeprecationWarning,
1946
- )
1947
- def split_tablename(
1948
- table_input: str, default_project_id: str, var_name: str | None = None
1949
- ) -> tuple[str, str, str]:
1950
- if "." not in table_input:
1951
- raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
1952
-
1953
- if not default_project_id:
1954
- raise ValueError("INTERNAL: No default project is specified")
1955
-
1956
- def var_print(var_name):
1957
- if var_name is None:
1958
- return ""
1959
- else:
1960
- return f"Format exception for {var_name}: "
1961
-
1962
- if table_input.count(".") + table_input.count(":") > 3:
1963
- raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
1964
- cmpt = table_input.rsplit(":", 1)
1965
- project_id = None
1966
- rest = table_input
1967
- if len(cmpt) == 1:
1968
- project_id = None
1969
- rest = cmpt[0]
1970
- elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
1971
- if cmpt[-1].count(".") != 2:
1972
- project_id = cmpt[0]
1973
- rest = cmpt[1]
1974
- else:
1975
- raise ValueError(
1976
- f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
1977
- )
1978
-
1979
- cmpt = rest.split(".")
1980
- if len(cmpt) == 3:
1981
- if project_id:
1982
- raise ValueError(f"{var_print(var_name)}Use either : or . to specify project")
1983
- project_id = cmpt[0]
1984
- dataset_id = cmpt[1]
1985
- table_id = cmpt[2]
1986
-
1987
- elif len(cmpt) == 2:
1988
- dataset_id = cmpt[0]
1989
- table_id = cmpt[1]
1990
- else:
1991
- raise ValueError(
1992
- f"{var_print(var_name)}Expect format of (<project.|<project:)<dataset>.<table>, got {table_input}"
1993
- )
1994
-
1995
- # Exclude partition from the table name
1996
- table_id = table_id.split("$")[0]
1997
-
1998
- if project_id is None:
1999
- if var_name is not None:
2000
- log.info(
2001
- 'Project is not included in %s: %s; using project "%s"',
2002
- var_name,
2003
- table_input,
2004
- default_project_id,
2005
- )
2006
- project_id = default_project_id
2007
-
2008
- return project_id, dataset_id, table_id
2009
-
2010
-
2011
1928
  def _cleanse_time_partitioning(
2012
1929
  destination_dataset_table: str | None, time_partitioning_in: dict | None
2013
1930
  ) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option
@@ -2117,52 +2034,17 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2117
2034
  job_id=job_id,
2118
2035
  project=project_id,
2119
2036
  token=token,
2120
- session=cast(Session, session),
2037
+ session=cast("Session", session),
2121
2038
  )
2122
2039
 
2123
2040
  async def _get_job(
2124
2041
  self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
2125
2042
  ) -> BigQueryJob | UnknownJob:
2126
- """
2127
- Get BigQuery job by its ID, project ID and location.
2128
-
2129
- WARNING.
2130
- This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
2131
- https://github.com/apache/airflow/issues/35833
2132
- https://github.com/talkiq/gcloud-aio/issues/584
2133
-
2134
- This method was developed, because neither the `google-cloud-bigquery` nor the `gcloud-aio-bigquery`
2135
- provides asynchronous access to a BigQuery jobs with location parameter. That's why this method wraps
2136
- synchronous client call with the event loop's run_in_executor() method.
2137
-
2138
- This workaround must be deleted along with the method _get_job_sync() and replaced by more robust and
2139
- cleaner solution in one of two cases:
2140
- 1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
2141
- optional parameter `location`
2142
- 2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
2143
- """
2144
- loop = asyncio.get_event_loop()
2145
- job = await loop.run_in_executor(None, self._get_job_sync, job_id, project_id, location)
2043
+ """Get BigQuery job by its ID, project ID and location."""
2044
+ sync_hook = await self.get_sync_hook()
2045
+ job = sync_hook.get_job(job_id=job_id, project_id=project_id, location=location)
2146
2046
  return job
2147
2047
 
2148
- def _get_job_sync(self, job_id, project_id, location):
2149
- """
2150
- Get BigQuery job by its ID, project ID and location synchronously.
2151
-
2152
- WARNING
2153
- This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
2154
- https://github.com/apache/airflow/issues/35833
2155
- https://github.com/talkiq/gcloud-aio/issues/584
2156
-
2157
- This workaround must be deleted along with the method _get_job() and replaced by more robust and
2158
- cleaner solution in one of two cases:
2159
- 1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
2160
- optional parameter `location`
2161
- 2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
2162
- """
2163
- hook = BigQueryHook(**self._hook_kwargs)
2164
- return hook.get_job(job_id=job_id, project_id=project_id, location=location)
2165
-
2166
2048
  async def get_job_status(
2167
2049
  self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
2168
2050
  ) -> dict[str, str]:
@@ -2182,7 +2064,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2182
2064
  async with ClientSession() as session:
2183
2065
  self.log.info("Executing get_job_output..")
2184
2066
  job_client = await self.get_job_instance(project_id, job_id, session)
2185
- job_query_response = await job_client.get_query_results(cast(Session, session))
2067
+ job_query_response = await job_client.get_query_results(cast("Session", session))
2186
2068
  return job_query_response
2187
2069
 
2188
2070
  async def create_job_for_partition_get(
@@ -2202,7 +2084,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2202
2084
  + (f" WHERE table_name='{table_id}'" if table_id else ""),
2203
2085
  "useLegacySql": False,
2204
2086
  }
2205
- job_query_resp = await job_client.query(query_request, cast(Session, session))
2087
+ job_query_resp = await job_client.query(query_request, cast("Session", session))
2206
2088
  return job_query_resp["jobReference"]["jobId"]
2207
2089
 
2208
2090
  async def cancel_job(self, job_id: str, project_id: str | None, location: str | None) -> None:
@@ -2265,7 +2147,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2265
2147
  self,
2266
2148
  sql: str,
2267
2149
  pass_value: Any,
2268
- records: list[Any],
2150
+ records: list[Any] | None = None,
2269
2151
  tolerance: float | None = None,
2270
2152
  ) -> None:
2271
2153
  """
@@ -2382,12 +2264,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2382
2264
  test_results[metric] = float(ratios[metric]) < threshold
2383
2265
 
2384
2266
  self.log.info(
2385
- (
2386
- "Current metric for %s: %s\n"
2387
- "Past metric for %s: %s\n"
2388
- "Ratio for %s: %s\n"
2389
- "Threshold: %s\n"
2390
- ),
2267
+ ("Current metric for %s: %s\nPast metric for %s: %s\nRatio for %s: %s\nThreshold: %s\n"),
2391
2268
  metric,
2392
2269
  cur,
2393
2270
  metric,
@@ -2452,5 +2329,5 @@ class BigQueryTableAsyncHook(GoogleBaseAsyncHook):
2452
2329
  table_name=table_id,
2453
2330
  project=project_id,
2454
2331
  token=token,
2455
- session=cast(Session, session),
2332
+ session=cast("Session", session),
2456
2333
  )