apache-airflow-providers-google 10.26.0rc1__py3-none-any.whl → 11.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +2 -1
  3. airflow/providers/google/ads/operators/ads.py +2 -1
  4. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
  5. airflow/providers/google/assets/gcs.py +17 -1
  6. airflow/providers/google/cloud/hooks/automl.py +3 -6
  7. airflow/providers/google/cloud/hooks/bigquery.py +41 -1486
  8. airflow/providers/google/cloud/hooks/bigquery_dts.py +4 -11
  9. airflow/providers/google/cloud/hooks/bigtable.py +3 -6
  10. airflow/providers/google/cloud/hooks/cloud_batch.py +6 -3
  11. airflow/providers/google/cloud/hooks/cloud_build.py +3 -15
  12. airflow/providers/google/cloud/hooks/cloud_composer.py +2 -17
  13. airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -6
  14. airflow/providers/google/cloud/hooks/cloud_run.py +10 -5
  15. airflow/providers/google/cloud/hooks/cloud_sql.py +5 -7
  16. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -7
  17. airflow/providers/google/cloud/hooks/compute.py +3 -6
  18. airflow/providers/google/cloud/hooks/compute_ssh.py +0 -5
  19. airflow/providers/google/cloud/hooks/datacatalog.py +3 -6
  20. airflow/providers/google/cloud/hooks/dataflow.py +3 -14
  21. airflow/providers/google/cloud/hooks/dataform.py +2 -9
  22. airflow/providers/google/cloud/hooks/datafusion.py +4 -15
  23. airflow/providers/google/cloud/hooks/dataplex.py +4 -7
  24. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  25. airflow/providers/google/cloud/hooks/dataproc.py +77 -22
  26. airflow/providers/google/cloud/hooks/dataproc_metastore.py +2 -9
  27. airflow/providers/google/cloud/hooks/datastore.py +3 -6
  28. airflow/providers/google/cloud/hooks/dlp.py +3 -6
  29. airflow/providers/google/cloud/hooks/functions.py +2 -6
  30. airflow/providers/google/cloud/hooks/gcs.py +2 -18
  31. airflow/providers/google/cloud/hooks/gdm.py +1 -17
  32. airflow/providers/google/cloud/hooks/kms.py +3 -6
  33. airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -97
  34. airflow/providers/google/cloud/hooks/life_sciences.py +2 -6
  35. airflow/providers/google/cloud/hooks/looker.py +2 -1
  36. airflow/providers/google/cloud/hooks/mlengine.py +0 -8
  37. airflow/providers/google/cloud/hooks/natural_language.py +3 -6
  38. airflow/providers/google/cloud/hooks/os_login.py +3 -6
  39. airflow/providers/google/cloud/hooks/pubsub.py +3 -6
  40. airflow/providers/google/cloud/hooks/secret_manager.py +3 -73
  41. airflow/providers/google/cloud/hooks/spanner.py +3 -6
  42. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -6
  43. airflow/providers/google/cloud/hooks/stackdriver.py +3 -6
  44. airflow/providers/google/cloud/hooks/tasks.py +3 -6
  45. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -6
  46. airflow/providers/google/cloud/hooks/translate.py +455 -9
  47. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -6
  48. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -6
  49. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +3 -6
  50. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +2 -9
  51. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -9
  52. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -14
  53. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -6
  54. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -9
  55. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -1
  56. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -1
  57. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -6
  58. airflow/providers/google/cloud/hooks/vision.py +3 -6
  59. airflow/providers/google/cloud/hooks/workflows.py +2 -9
  60. airflow/providers/google/cloud/links/dataproc.py +0 -1
  61. airflow/providers/google/cloud/links/translate.py +91 -0
  62. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  63. airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -3
  64. airflow/providers/google/cloud/openlineage/utils.py +54 -21
  65. airflow/providers/google/cloud/operators/automl.py +5 -4
  66. airflow/providers/google/cloud/operators/bigquery.py +2 -341
  67. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  68. airflow/providers/google/cloud/operators/bigtable.py +2 -1
  69. airflow/providers/google/cloud/operators/cloud_batch.py +2 -1
  70. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  71. airflow/providers/google/cloud/operators/cloud_composer.py +2 -1
  72. airflow/providers/google/cloud/operators/cloud_memorystore.py +2 -1
  73. airflow/providers/google/cloud/operators/cloud_run.py +2 -1
  74. airflow/providers/google/cloud/operators/cloud_sql.py +2 -1
  75. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -1
  76. airflow/providers/google/cloud/operators/compute.py +2 -1
  77. airflow/providers/google/cloud/operators/datacatalog.py +2 -1
  78. airflow/providers/google/cloud/operators/dataflow.py +2 -517
  79. airflow/providers/google/cloud/operators/dataform.py +2 -1
  80. airflow/providers/google/cloud/operators/datafusion.py +2 -1
  81. airflow/providers/google/cloud/operators/dataplex.py +37 -31
  82. airflow/providers/google/cloud/operators/dataprep.py +2 -1
  83. airflow/providers/google/cloud/operators/dataproc.py +3 -633
  84. airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
  85. airflow/providers/google/cloud/operators/datastore.py +2 -1
  86. airflow/providers/google/cloud/operators/dlp.py +2 -1
  87. airflow/providers/google/cloud/operators/functions.py +2 -1
  88. airflow/providers/google/cloud/operators/gcs.py +5 -4
  89. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -11
  90. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  91. airflow/providers/google/cloud/operators/mlengine.py +2 -1
  92. airflow/providers/google/cloud/operators/natural_language.py +3 -2
  93. airflow/providers/google/cloud/operators/pubsub.py +2 -1
  94. airflow/providers/google/cloud/operators/spanner.py +2 -1
  95. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  96. airflow/providers/google/cloud/operators/stackdriver.py +2 -1
  97. airflow/providers/google/cloud/operators/tasks.py +3 -2
  98. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  99. airflow/providers/google/cloud/operators/translate.py +622 -32
  100. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  101. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -93
  102. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -13
  103. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -17
  104. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +2 -1
  105. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +2 -1
  106. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +2 -1
  107. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -13
  108. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
  109. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
  110. airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
  111. airflow/providers/google/cloud/operators/vision.py +3 -2
  112. airflow/providers/google/cloud/operators/workflows.py +3 -2
  113. airflow/providers/google/cloud/secrets/secret_manager.py +2 -19
  114. airflow/providers/google/cloud/sensors/bigquery.py +2 -81
  115. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  116. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  117. airflow/providers/google/cloud/sensors/cloud_composer.py +8 -94
  118. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  119. airflow/providers/google/cloud/sensors/dataflow.py +2 -1
  120. airflow/providers/google/cloud/sensors/dataform.py +2 -1
  121. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  122. airflow/providers/google/cloud/sensors/dataplex.py +2 -1
  123. airflow/providers/google/cloud/sensors/dataprep.py +2 -1
  124. airflow/providers/google/cloud/sensors/dataproc.py +2 -1
  125. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -1
  126. airflow/providers/google/cloud/sensors/gcs.py +4 -36
  127. airflow/providers/google/cloud/sensors/pubsub.py +2 -1
  128. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  129. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  130. airflow/providers/google/cloud/transfers/adls_to_gcs.py +2 -1
  131. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -1
  132. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -1
  133. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +75 -18
  134. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +9 -7
  135. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
  136. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -1
  137. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +2 -1
  138. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +2 -1
  139. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -1
  140. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
  141. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +13 -9
  142. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -1
  143. airflow/providers/google/cloud/transfers/gcs_to_local.py +2 -1
  144. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +2 -1
  145. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +2 -1
  146. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -1
  147. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -1
  148. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  149. airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
  150. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -1
  151. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +2 -1
  152. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -1
  153. airflow/providers/google/cloud/transfers/sql_to_gcs.py +2 -1
  154. airflow/providers/google/cloud/triggers/bigquery.py +2 -1
  155. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  156. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  157. airflow/providers/google/cloud/triggers/cloud_build.py +2 -1
  158. airflow/providers/google/cloud/triggers/cloud_composer.py +3 -2
  159. airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
  160. airflow/providers/google/cloud/triggers/cloud_sql.py +1 -1
  161. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  162. airflow/providers/google/cloud/triggers/dataflow.py +2 -1
  163. airflow/providers/google/cloud/triggers/datafusion.py +2 -1
  164. airflow/providers/google/cloud/triggers/dataplex.py +1 -1
  165. airflow/providers/google/cloud/triggers/dataproc.py +2 -1
  166. airflow/providers/google/cloud/triggers/gcs.py +3 -2
  167. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -1
  168. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  169. airflow/providers/google/cloud/triggers/pubsub.py +2 -1
  170. airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
  171. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  172. airflow/providers/google/cloud/utils/dataform.py +1 -1
  173. airflow/providers/google/cloud/utils/field_validator.py +2 -1
  174. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +2 -1
  175. airflow/providers/google/common/hooks/base_google.py +4 -11
  176. airflow/providers/google/common/hooks/discovery_api.py +1 -6
  177. airflow/providers/google/firebase/hooks/firestore.py +1 -1
  178. airflow/providers/google/firebase/operators/firestore.py +2 -1
  179. airflow/providers/google/get_provider_info.py +7 -22
  180. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -1
  181. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +2 -3
  182. airflow/providers/google/marketing_platform/hooks/display_video.py +4 -3
  183. airflow/providers/google/marketing_platform/hooks/search_ads.py +6 -6
  184. airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -1
  185. airflow/providers/google/marketing_platform/operators/campaign_manager.py +2 -42
  186. airflow/providers/google/marketing_platform/operators/display_video.py +2 -47
  187. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -1
  188. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -7
  189. airflow/providers/google/marketing_platform/sensors/display_video.py +2 -13
  190. airflow/providers/google/suite/hooks/calendar.py +2 -8
  191. airflow/providers/google/suite/hooks/drive.py +2 -6
  192. airflow/providers/google/suite/hooks/sheets.py +2 -7
  193. airflow/providers/google/suite/operators/sheets.py +2 -7
  194. airflow/providers/google/suite/sensors/drive.py +2 -7
  195. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -7
  196. airflow/providers/google/suite/transfers/gcs_to_sheets.py +2 -7
  197. airflow/providers/google/suite/transfers/local_to_drive.py +2 -7
  198. airflow/providers/google/suite/transfers/sql_to_sheets.py +2 -7
  199. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0rc1.dist-info}/METADATA +10 -10
  200. apache_airflow_providers_google-11.0.0rc1.dist-info/RECORD +315 -0
  201. airflow/providers/google/marketing_platform/hooks/analytics.py +0 -211
  202. airflow/providers/google/marketing_platform/operators/analytics.py +0 -551
  203. apache_airflow_providers_google-10.26.0rc1.dist-info/RECORD +0 -317
  204. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0rc1.dist-info}/WHEEL +0 -0
  205. {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -26,10 +26,10 @@ import logging
26
26
  import re
27
27
  import time
28
28
  import uuid
29
+ from collections.abc import Iterable, Mapping, Sequence
29
30
  from copy import deepcopy
30
31
  from datetime import datetime, timedelta
31
- from functools import cached_property
32
- from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
32
+ from typing import TYPE_CHECKING, Any, NoReturn, Union, cast
33
33
 
34
34
  from aiohttp import ClientSession as ClientSession
35
35
  from gcloud.aio.bigquery import Job, Table as Table_async
@@ -37,7 +37,6 @@ from google.cloud.bigquery import (
37
37
  DEFAULT_RETRY,
38
38
  Client,
39
39
  CopyJob,
40
- ExternalConfig,
41
40
  ExtractJob,
42
41
  LoadJob,
43
42
  QueryJob,
@@ -47,14 +46,13 @@ from google.cloud.bigquery import (
47
46
  from google.cloud.bigquery.dataset import AccessEntry, Dataset, DatasetListItem, DatasetReference
48
47
  from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
49
48
  from google.cloud.bigquery.table import (
50
- EncryptionConfiguration,
51
49
  Row,
52
50
  RowIterator,
53
51
  Table,
54
52
  TableReference,
55
53
  )
56
54
  from google.cloud.exceptions import NotFound
57
- from googleapiclient.discovery import Resource, build
55
+ from googleapiclient.discovery import build
58
56
  from pandas_gbq import read_gbq
59
57
  from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
60
58
  from requests import Session
@@ -159,11 +157,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
159
157
  labels: dict | None = None,
160
158
  **kwargs,
161
159
  ) -> None:
162
- if kwargs.get("delegate_to") is not None:
163
- raise RuntimeError(
164
- "The `delegate_to` parameter has been deprecated before and finally removed in this version"
165
- " of Google Provider. You MUST convert it to `impersonate_chain`"
166
- )
167
160
  super().__init__(**kwargs)
168
161
  self.use_legacy_sql: bool = self._get_field("use_legacy_sql", use_legacy_sql)
169
162
  self.location: str | None = self._get_field("location", location)
@@ -173,15 +166,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
173
166
  self.labels = self._get_field("labels", labels or {})
174
167
  self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
175
168
 
176
- @cached_property
177
- @deprecated(
178
- planned_removal_date="November 01, 2024",
179
- reason="This property is no longer in actual use. ",
180
- category=AirflowProviderDeprecationWarning,
181
- )
182
- def credentials_path(self) -> str:
183
- return "bigquery_hook_credentials.json"
184
-
185
169
  def get_conn(self) -> BigQueryConnection:
186
170
  """Get a BigQuery PEP 249 connection object."""
187
171
  http_authorized = self._authorize()
@@ -195,16 +179,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
195
179
  hook=self,
196
180
  )
197
181
 
198
- @deprecated(
199
- planned_removal_date="November 01, 2024",
200
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_client.",
201
- category=AirflowProviderDeprecationWarning,
202
- )
203
- def get_service(self) -> Resource:
204
- """Get a BigQuery service object. Deprecated."""
205
- http_authorized = self._authorize()
206
- return build("bigquery", "v2", http=http_authorized, cache_discovery=False)
207
-
208
182
  def get_client(self, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None) -> Client:
209
183
  """
210
184
  Get an authenticated BigQuery Client.
@@ -602,165 +576,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
602
576
  not_found_ok=True,
603
577
  )
604
578
 
605
- @deprecated(
606
- planned_removal_date="November 01, 2024",
607
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table",
608
- instructions="Use the replacement method with passing the `table_resource` object. "
609
- "This gives more flexibility.",
610
- category=AirflowProviderDeprecationWarning,
611
- )
612
- @GoogleBaseHook.fallback_to_default_project_id
613
- def create_external_table(
614
- self,
615
- external_project_dataset_table: str,
616
- schema_fields: list,
617
- source_uris: list,
618
- source_format: str = "CSV",
619
- autodetect: bool = False,
620
- compression: str = "NONE",
621
- ignore_unknown_values: bool = False,
622
- max_bad_records: int = 0,
623
- skip_leading_rows: int = 0,
624
- field_delimiter: str = ",",
625
- quote_character: str | None = None,
626
- allow_quoted_newlines: bool = False,
627
- allow_jagged_rows: bool = False,
628
- encoding: str = "UTF-8",
629
- src_fmt_configs: dict | None = None,
630
- labels: dict | None = None,
631
- description: str | None = None,
632
- encryption_configuration: dict | None = None,
633
- location: str | None = None,
634
- project_id: str = PROVIDE_PROJECT_ID,
635
- ) -> Table:
636
- """
637
- Create an external table in the dataset with data from Google Cloud Storage.
638
-
639
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
640
-
641
- This method is deprecated. Please use :func:`.create_empty_table` with
642
- the ``table_resource`` object. See function documentation for more
643
- details about these parameters.
644
-
645
- :param external_project_dataset_table:
646
- The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
647
- table name to create external table.
648
- If ``<project>`` is not included, project will be the
649
- project defined in the connection json.
650
- :param schema_fields: The schema field list as defined here:
651
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
652
- :param source_uris: The source Google Cloud
653
- Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild
654
- per-object name can be used.
655
- :param source_format: File format to export.
656
- :param autodetect: Try to detect schema and format options automatically.
657
- Any option specified explicitly will be honored.
658
- :param compression: [Optional] The compression type of the data source.
659
- Possible values include GZIP and NONE.
660
- The default value is NONE.
661
- This setting is ignored for Google Cloud Bigtable,
662
- Google Cloud Datastore backups and Avro formats.
663
- :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow
664
- extra values that are not represented in the table schema.
665
- If true, the extra values are ignored. If false, records with extra columns
666
- are treated as bad records, and if there are too many bad records, an
667
- invalid error is returned in the job result.
668
- :param max_bad_records: The maximum number of bad records that BigQuery can
669
- ignore when running the job.
670
- :param skip_leading_rows: Number of rows to skip when loading from a CSV.
671
- :param field_delimiter: The delimiter to use when loading from a CSV.
672
- :param quote_character: The value that is used to quote data sections in a CSV
673
- file.
674
- :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not
675
- (false).
676
- :param allow_jagged_rows: Accept rows that are missing trailing optional columns.
677
- The missing values are treated as nulls. If false, records with missing
678
- trailing columns are treated as bad records, and if there are too many bad
679
- records, an invalid error is returned in the job result. Only applicable when
680
- source_format is CSV.
681
- :param encoding: The character encoding of the data. See:
682
-
683
- .. seealso::
684
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
685
- :param src_fmt_configs: configure optional fields specific to the source format
686
- :param labels: A dictionary containing labels for the BiqQuery table.
687
- :param description: A string containing the description for the BigQuery table.
688
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
689
-
690
- .. code-block:: python
691
-
692
- encryption_configuration = {
693
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
694
- }
695
- """
696
- location = location or self.location
697
- src_fmt_configs = src_fmt_configs or {}
698
- source_format = source_format.upper()
699
- compression = compression.upper()
700
-
701
- external_config_api_repr = {
702
- "autodetect": autodetect,
703
- "sourceFormat": source_format,
704
- "sourceUris": source_uris,
705
- "compression": compression,
706
- "ignoreUnknownValues": ignore_unknown_values,
707
- }
708
-
709
- # if following fields are not specified in src_fmt_configs,
710
- # honor the top-level params for backward-compatibility
711
- backward_compatibility_configs = {
712
- "skipLeadingRows": skip_leading_rows,
713
- "fieldDelimiter": field_delimiter,
714
- "quote": quote_character,
715
- "allowQuotedNewlines": allow_quoted_newlines,
716
- "allowJaggedRows": allow_jagged_rows,
717
- "encoding": encoding,
718
- }
719
- src_fmt_to_param_mapping = {"CSV": "csvOptions", "GOOGLE_SHEETS": "googleSheetsOptions"}
720
- src_fmt_to_configs_mapping = {
721
- "csvOptions": [
722
- "allowJaggedRows",
723
- "allowQuotedNewlines",
724
- "fieldDelimiter",
725
- "skipLeadingRows",
726
- "quote",
727
- "encoding",
728
- ],
729
- "googleSheetsOptions": ["skipLeadingRows"],
730
- }
731
- if source_format in src_fmt_to_param_mapping:
732
- valid_configs = src_fmt_to_configs_mapping[src_fmt_to_param_mapping[source_format]]
733
- src_fmt_configs = _validate_src_fmt_configs(
734
- source_format, src_fmt_configs, valid_configs, backward_compatibility_configs
735
- )
736
- external_config_api_repr[src_fmt_to_param_mapping[source_format]] = src_fmt_configs
737
-
738
- # build external config
739
- external_config = ExternalConfig.from_api_repr(external_config_api_repr)
740
- if schema_fields:
741
- external_config.schema = [SchemaField.from_api_repr(f) for f in schema_fields]
742
- if max_bad_records:
743
- external_config.max_bad_records = max_bad_records
744
-
745
- # build table definition
746
- table = Table(table_ref=TableReference.from_string(external_project_dataset_table, project_id))
747
- table.external_data_configuration = external_config
748
- if labels:
749
- table.labels = labels
750
-
751
- if description:
752
- table.description = description
753
-
754
- if encryption_configuration:
755
- table.encryption_configuration = EncryptionConfiguration.from_api_repr(encryption_configuration)
756
-
757
- self.log.info("Creating external table: %s", external_project_dataset_table)
758
- table_object = self.create_empty_table(
759
- table_resource=table.to_api_repr(), project_id=project_id, location=location, exists_ok=True
760
- )
761
- self.log.info("External table created successfully: %s", external_project_dataset_table)
762
- return table_object
763
-
764
579
  @GoogleBaseHook.fallback_to_default_project_id
765
580
  def update_table(
766
581
  self,
@@ -804,113 +619,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
804
619
  self.log.info("Table %s.%s.%s updated successfully", project_id, dataset_id, table_id)
805
620
  return table_object.to_api_repr()
806
621
 
807
- @deprecated(
808
- planned_removal_date="November 01, 2024",
809
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_table",
810
- category=AirflowProviderDeprecationWarning,
811
- )
812
- @GoogleBaseHook.fallback_to_default_project_id
813
- def patch_table(
814
- self,
815
- dataset_id: str,
816
- table_id: str,
817
- project_id: str = PROVIDE_PROJECT_ID,
818
- description: str | None = None,
819
- expiration_time: int | None = None,
820
- external_data_configuration: dict | None = None,
821
- friendly_name: str | None = None,
822
- labels: dict | None = None,
823
- schema: list | None = None,
824
- time_partitioning: dict | None = None,
825
- view: dict | None = None,
826
- require_partition_filter: bool | None = None,
827
- encryption_configuration: dict | None = None,
828
- ) -> None:
829
- """
830
- Patch information in an existing table.
831
-
832
- It only updates fields that are provided in the request object. This
833
- method is deprecated. Please use :func:`.update_table` instead.
834
-
835
- Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch
836
-
837
- :param dataset_id: The dataset containing the table to be patched.
838
- :param table_id: The Name of the table to be patched.
839
- :param project_id: The project containing the table to be patched.
840
- :param description: [Optional] A user-friendly description of this table.
841
- :param expiration_time: [Optional] The time when this table expires,
842
- in milliseconds since the epoch.
843
- :param external_data_configuration: [Optional] A dictionary containing
844
- properties of a table stored outside of BigQuery.
845
- :param friendly_name: [Optional] A descriptive name for this table.
846
- :param labels: [Optional] A dictionary containing labels associated with this table.
847
- :param schema: [Optional] If set, the schema field list as defined here:
848
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
849
- The supported schema modifications and unsupported schema modification are listed here:
850
- https://cloud.google.com/bigquery/docs/managing-table-schemas
851
-
852
- .. code-block:: python
853
-
854
- schema = [
855
- {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
856
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
857
- ]
858
-
859
- :param time_partitioning: [Optional] A dictionary containing time-based partitioning
860
- definition for the table.
861
- :param view: [Optional] A dictionary containing definition for the view.
862
- If set, it will patch a view instead of a table:
863
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
864
-
865
- .. code-block:: python
866
-
867
- view = {
868
- "query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 500",
869
- "useLegacySql": False,
870
- }
871
-
872
- :param require_partition_filter: [Optional] If true, queries over the this table require a
873
- partition filter. If false, queries over the table
874
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
875
-
876
- .. code-block:: python
877
-
878
- encryption_configuration = {
879
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
880
- }
881
-
882
- """
883
- table_resource: dict[str, Any] = {}
884
-
885
- if description is not None:
886
- table_resource["description"] = description
887
- if expiration_time is not None:
888
- table_resource["expirationTime"] = expiration_time
889
- if external_data_configuration:
890
- table_resource["externalDataConfiguration"] = external_data_configuration
891
- if friendly_name is not None:
892
- table_resource["friendlyName"] = friendly_name
893
- if labels:
894
- table_resource["labels"] = labels
895
- if schema:
896
- table_resource["schema"] = {"fields": schema}
897
- if time_partitioning:
898
- table_resource["timePartitioning"] = time_partitioning
899
- if view:
900
- table_resource["view"] = view
901
- if require_partition_filter is not None:
902
- table_resource["requirePartitionFilter"] = require_partition_filter
903
- if encryption_configuration:
904
- table_resource["encryptionConfiguration"] = encryption_configuration
905
-
906
- self.update_table(
907
- table_resource=table_resource,
908
- fields=list(table_resource.keys()),
909
- project_id=project_id,
910
- dataset_id=dataset_id,
911
- table_id=table_id,
912
- )
913
-
914
622
  @GoogleBaseHook.fallback_to_default_project_id
915
623
  def insert_all(
916
624
  self,
@@ -1014,96 +722,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1014
722
  self.log.info("Dataset successfully updated: %s", dataset)
1015
723
  return dataset
1016
724
 
1017
- @deprecated(
1018
- planned_removal_date="November 01, 2024",
1019
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset",
1020
- category=AirflowProviderDeprecationWarning,
1021
- )
1022
- def patch_dataset(
1023
- self, dataset_id: str, dataset_resource: dict, project_id: str = PROVIDE_PROJECT_ID
1024
- ) -> dict:
1025
- """
1026
- Patches information in an existing dataset.
1027
-
1028
- It only replaces fields that are provided in the submitted dataset resource.
1029
-
1030
- This method is deprecated. Please use :func:`.update_dataset` instead.
1031
-
1032
- More info:
1033
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch
1034
-
1035
- :param dataset_id: The BigQuery Dataset ID
1036
- :param dataset_resource: Dataset resource that will be provided
1037
- in request body.
1038
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
1039
- :param project_id: The Google Cloud Project ID
1040
- """
1041
- project_id = project_id or self.project_id
1042
- if not dataset_id or not isinstance(dataset_id, str):
1043
- raise ValueError(
1044
- f"dataset_id argument must be provided and has a type 'str'. You provided: {dataset_id}"
1045
- )
1046
-
1047
- service = self.get_service()
1048
- dataset_project_id = project_id or self.project_id
1049
-
1050
- self.log.info("Start patching dataset: %s:%s", dataset_project_id, dataset_id)
1051
- dataset = (
1052
- service.datasets()
1053
- .patch(
1054
- datasetId=dataset_id,
1055
- projectId=dataset_project_id,
1056
- body=dataset_resource,
1057
- )
1058
- .execute(num_retries=self.num_retries)
1059
- )
1060
- self.log.info("Dataset successfully patched: %s", dataset)
1061
-
1062
- return dataset
1063
-
1064
- @deprecated(
1065
- planned_removal_date="November 01, 2024",
1066
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables",
1067
- category=AirflowProviderDeprecationWarning,
1068
- )
1069
- def get_dataset_tables_list(
1070
- self,
1071
- dataset_id: str,
1072
- project_id: str = PROVIDE_PROJECT_ID,
1073
- table_prefix: str | None = None,
1074
- max_results: int | None = None,
1075
- ) -> list[dict[str, Any]]:
1076
- """
1077
- List tables of a BigQuery dataset.
1078
-
1079
- If a table prefix is specified, only tables beginning by it are
1080
- returned. This method is deprecated. Please use
1081
- :func:`.get_dataset_tables` instead.
1082
-
1083
- For more information, see:
1084
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
1085
-
1086
- :param dataset_id: The BigQuery Dataset ID
1087
- :param project_id: The Google Cloud Project ID
1088
- :param table_prefix: Tables must begin by this prefix to be returned (case sensitive)
1089
- :param max_results: The maximum number of results to return in a single response page.
1090
- Leverage the page tokens to iterate through the entire collection.
1091
- :return: List of tables associated with the dataset
1092
- """
1093
- project_id = project_id or self.project_id
1094
- tables = self.get_client().list_tables(
1095
- dataset=DatasetReference(project=project_id, dataset_id=dataset_id),
1096
- max_results=max_results,
1097
- )
1098
-
1099
- if table_prefix:
1100
- result = [t.reference.to_api_repr() for t in tables if t.table_id.startswith(table_prefix)]
1101
- else:
1102
- result = [t.reference.to_api_repr() for t in tables]
1103
-
1104
- self.log.info("%s tables found", len(result))
1105
- return result
1106
-
1107
725
  @GoogleBaseHook.fallback_to_default_project_id
1108
726
  def get_datasets_list(
1109
727
  self,
@@ -1266,29 +884,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1266
884
  ).to_api_repr()
1267
885
  return table
1268
886
 
1269
- @deprecated(
1270
- planned_removal_date="November 01, 2024",
1271
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_table",
1272
- category=AirflowProviderDeprecationWarning,
1273
- )
1274
- def run_table_delete(self, deletion_dataset_table: str, ignore_if_missing: bool = False) -> None:
1275
- """
1276
- Delete an existing table from the dataset.
1277
-
1278
- If the table does not exist, return an error unless *ignore_if_missing*
1279
- is set to True.
1280
-
1281
- This method is deprecated. Please use :func:`.delete_table` instead.
1282
-
1283
- :param deletion_dataset_table: A dotted
1284
- ``(<project>.|<project>:)<dataset>.<table>`` that indicates which table
1285
- will be deleted.
1286
- :param ignore_if_missing: if True, then return success even if the
1287
- requested table does not exist.
1288
- :return:
1289
- """
1290
- return self.delete_table(table_id=deletion_dataset_table, not_found_ok=ignore_if_missing)
1291
-
1292
887
  @GoogleBaseHook.fallback_to_default_project_id
1293
888
  def delete_table(
1294
889
  self,
@@ -1314,47 +909,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1314
909
  )
1315
910
  self.log.info("Deleted table %s", table_id)
1316
911
 
1317
- @deprecated(
1318
- planned_removal_date="November 01, 2024",
1319
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.list_rows",
1320
- category=AirflowProviderDeprecationWarning,
1321
- )
1322
- def get_tabledata(
1323
- self,
1324
- dataset_id: str,
1325
- table_id: str,
1326
- max_results: int | None = None,
1327
- selected_fields: str | None = None,
1328
- page_token: str | None = None,
1329
- start_index: int | None = None,
1330
- ) -> list[dict]:
1331
- """
1332
- Get data from given table.
1333
-
1334
- This method is deprecated. Please use :func:`.list_rows` instead.
1335
-
1336
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list
1337
-
1338
- :param dataset_id: the dataset ID of the requested table.
1339
- :param table_id: the table ID of the requested table.
1340
- :param max_results: the maximum results to return.
1341
- :param selected_fields: List of fields to return (comma-separated). If
1342
- unspecified, all fields are returned.
1343
- :param page_token: page token, returned from a previous call,
1344
- identifying the result set.
1345
- :param start_index: zero based index of the starting row to read.
1346
- :return: list of rows
1347
- """
1348
- rows = self.list_rows(
1349
- dataset_id=dataset_id,
1350
- table_id=table_id,
1351
- max_results=max_results,
1352
- selected_fields=selected_fields,
1353
- page_token=page_token,
1354
- start_index=start_index,
1355
- )
1356
- return [dict(r) for r in rows]
1357
-
1358
912
  @GoogleBaseHook.fallback_to_default_project_id
1359
913
  def list_rows(
1360
914
  self,
@@ -1551,18 +1105,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1551
1105
  job = self.get_client(project_id=project_id, location=location).get_job(job_id=job_id)
1552
1106
  return job.done(retry=retry)
1553
1107
 
1554
- @deprecated(
1555
- planned_removal_date="November 01, 2024",
1556
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_job",
1557
- category=AirflowProviderDeprecationWarning,
1558
- )
1559
- def cancel_query(self) -> None:
1560
- """Cancel all started queries that have not yet completed."""
1561
- if self.running_job_id:
1562
- self.cancel_job(job_id=self.running_job_id)
1563
- else:
1564
- self.log.info("No running BigQuery jobs to cancel.")
1565
-
1566
1108
  @GoogleBaseHook.fallback_to_default_project_id
1567
1109
  def cancel_job(
1568
1110
  self,
@@ -1705,701 +1247,51 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1705
1247
  job_api_repr.result(timeout=timeout, retry=retry)
1706
1248
  return job_api_repr
1707
1249
 
1708
- @deprecated(
1709
- planned_removal_date="November 01, 2024",
1710
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
1711
- category=AirflowProviderDeprecationWarning,
1712
- )
1713
- def run_with_configuration(self, configuration: dict) -> str:
1714
- """
1715
- Execute a BigQuery SQL query.
1716
-
1717
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
1718
-
1719
- This method is deprecated. Please use :func:`.insert_job` instead.
1720
-
1721
- :param configuration: The configuration parameter maps directly to
1722
- BigQuery's configuration field in the job object. See
1723
- https://cloud.google.com/bigquery/docs/reference/v2/jobs for
1724
- details.
1725
- """
1726
- job = self.insert_job(configuration=configuration, project_id=self.project_id)
1727
- self.running_job_id = job.job_id
1728
- return job.job_id
1729
-
1730
- @deprecated(
1731
- planned_removal_date="November 01, 2024",
1732
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
1733
- category=AirflowProviderDeprecationWarning,
1734
- )
1735
- def run_load(
1736
- self,
1737
- destination_project_dataset_table: str,
1738
- source_uris: list,
1739
- schema_fields: list | None = None,
1740
- source_format: str = "CSV",
1741
- create_disposition: str = "CREATE_IF_NEEDED",
1742
- skip_leading_rows: int = 0,
1743
- write_disposition: str = "WRITE_EMPTY",
1744
- field_delimiter: str = ",",
1745
- max_bad_records: int = 0,
1746
- quote_character: str | None = None,
1747
- ignore_unknown_values: bool = False,
1748
- allow_quoted_newlines: bool = False,
1749
- allow_jagged_rows: bool = False,
1750
- encoding: str = "UTF-8",
1751
- schema_update_options: Iterable | None = None,
1752
- src_fmt_configs: dict | None = None,
1753
- time_partitioning: dict | None = None,
1754
- cluster_fields: list | None = None,
1755
- autodetect: bool = False,
1756
- encryption_configuration: dict | None = None,
1757
- labels: dict | None = None,
1758
- description: str | None = None,
1759
- ) -> str:
1760
- """
1761
- Load data from Google Cloud Storage to BigQuery.
1762
-
1763
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
1764
-
1765
- This method is deprecated. Please use :func:`.insert_job` instead.
1766
-
1767
- :param destination_project_dataset_table:
1768
- The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
1769
- table to load data into. If ``<project>`` is not included, project will be the
1770
- project defined in the connection json. If a partition is specified the
1771
- operator will automatically append the data, create a new partition or create
1772
- a new DAY partitioned table.
1773
- :param schema_fields: The schema field list as defined here:
1774
- https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load
1775
- Required if autodetect=False; optional if autodetect=True.
1776
- :param autodetect: Attempt to autodetect the schema for CSV and JSON
1777
- source files.
1778
- :param source_uris: The source Google Cloud
1779
- Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild
1780
- per-object name can be used.
1781
- :param source_format: File format to export.
1782
- :param create_disposition: The create disposition if the table doesn't exist.
1783
- :param skip_leading_rows: Number of rows to skip when loading from a CSV.
1784
- :param write_disposition: The write disposition if the table already exists.
1785
- :param field_delimiter: The delimiter to use when loading from a CSV.
1786
- :param max_bad_records: The maximum number of bad records that BigQuery can
1787
- ignore when running the job.
1788
- :param quote_character: The value that is used to quote data sections in a CSV
1789
- file.
1790
- :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow
1791
- extra values that are not represented in the table schema.
1792
- If true, the extra values are ignored. If false, records with extra columns
1793
- are treated as bad records, and if there are too many bad records, an
1794
- invalid error is returned in the job result.
1795
- :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not
1796
- (false).
1797
- :param allow_jagged_rows: Accept rows that are missing trailing optional columns.
1798
- The missing values are treated as nulls. If false, records with missing
1799
- trailing columns are treated as bad records, and if there are too many bad
1800
- records, an invalid error is returned in the job result. Only applicable when
1801
- source_format is CSV.
1802
- :param encoding: The character encoding of the data.
1250
+ def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, force_rerun=False) -> str:
1251
+ if force_rerun:
1252
+ hash_base = str(uuid.uuid4())
1253
+ else:
1254
+ hash_base = json.dumps(configuration, sort_keys=True)
1803
1255
 
1804
- .. seealso::
1805
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
1806
- :param schema_update_options: Allows the schema of the destination
1807
- table to be updated as a side effect of the load job.
1808
- :param src_fmt_configs: configure optional fields specific to the source format
1809
- :param time_partitioning: configure optional time partitioning fields i.e.
1810
- partition by field, type and expiration as per API specifications.
1811
- :param cluster_fields: Request that the result of this load be stored sorted
1812
- by one or more columns. BigQuery supports clustering for both partitioned and
1813
- non-partitioned tables. The order of columns given determines the sort order.
1814
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1256
+ uniqueness_suffix = md5(hash_base.encode()).hexdigest()
1815
1257
 
1816
- .. code-block:: python
1258
+ if job_id:
1259
+ return f"{job_id}_{uniqueness_suffix}"
1817
1260
 
1818
- encryption_configuration = {
1819
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1820
- }
1261
+ exec_date = logical_date.isoformat()
1262
+ job_id = f"airflow_{dag_id}_{task_id}_{exec_date}_{uniqueness_suffix}"
1263
+ return re.sub(r"[:\-+.]", "_", job_id)
1821
1264
 
1822
- :param labels: A dictionary containing labels for the BiqQuery table.
1823
- :param description: A string containing the description for the BigQuery table.
1824
- """
1825
- if not self.project_id:
1826
- raise ValueError("The project_id should be set")
1265
+ def split_tablename(
1266
+ self, table_input: str, default_project_id: str, var_name: str | None = None
1267
+ ) -> tuple[str, str, str]:
1268
+ if "." not in table_input:
1269
+ raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
1827
1270
 
1828
- # To provide backward compatibility
1829
- schema_update_options = list(schema_update_options or [])
1271
+ if not default_project_id:
1272
+ raise ValueError("INTERNAL: No default project is specified")
1830
1273
 
1831
- # bigquery only allows certain source formats
1832
- # we check to make sure the passed source format is valid
1833
- # if it's not, we raise a ValueError
1834
- # Refer to this link for more details:
1835
- # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat
1836
-
1837
- if schema_fields is None and not autodetect:
1838
- raise ValueError("You must either pass a schema or autodetect=True.")
1839
-
1840
- if src_fmt_configs is None:
1841
- src_fmt_configs = {}
1842
-
1843
- source_format = source_format.upper()
1844
- allowed_formats = [
1845
- "CSV",
1846
- "NEWLINE_DELIMITED_JSON",
1847
- "AVRO",
1848
- "GOOGLE_SHEETS",
1849
- "DATASTORE_BACKUP",
1850
- "PARQUET",
1851
- ]
1852
- if source_format not in allowed_formats:
1853
- raise ValueError(
1854
- f"{source_format} is not a valid source format. "
1855
- f"Please use one of the following types: {allowed_formats}."
1856
- )
1274
+ def var_print(var_name):
1275
+ if var_name is None:
1276
+ return ""
1277
+ else:
1278
+ return f"Format exception for {var_name}: "
1857
1279
 
1858
- # bigquery also allows you to define how you want a table's schema to change
1859
- # as a side effect of a load
1860
- # for more details:
1861
- # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schemaUpdateOptions
1862
- allowed_schema_update_options = ["ALLOW_FIELD_ADDITION", "ALLOW_FIELD_RELAXATION"]
1863
- if not set(allowed_schema_update_options).issuperset(set(schema_update_options)):
1280
+ if table_input.count(".") + table_input.count(":") > 3:
1281
+ raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
1282
+ cmpt = table_input.rsplit(":", 1)
1283
+ project_id = None
1284
+ rest = table_input
1285
+ if len(cmpt) == 1:
1286
+ project_id = None
1287
+ rest = cmpt[0]
1288
+ elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
1289
+ if cmpt[-1].count(".") != 2:
1290
+ project_id = cmpt[0]
1291
+ rest = cmpt[1]
1292
+ else:
1864
1293
  raise ValueError(
1865
- f"{schema_update_options} contains invalid schema update options. "
1866
- f"Please only use one or more of the following options: {allowed_schema_update_options}"
1867
- )
1868
-
1869
- destination_project, destination_dataset, destination_table = self.split_tablename(
1870
- table_input=destination_project_dataset_table,
1871
- default_project_id=self.project_id,
1872
- var_name="destination_project_dataset_table",
1873
- )
1874
-
1875
- configuration: dict[str, Any] = {
1876
- "load": {
1877
- "autodetect": autodetect,
1878
- "createDisposition": create_disposition,
1879
- "destinationTable": {
1880
- "projectId": destination_project,
1881
- "datasetId": destination_dataset,
1882
- "tableId": destination_table,
1883
- },
1884
- "sourceFormat": source_format,
1885
- "sourceUris": source_uris,
1886
- "writeDisposition": write_disposition,
1887
- "ignoreUnknownValues": ignore_unknown_values,
1888
- }
1889
- }
1890
-
1891
- time_partitioning = _cleanse_time_partitioning(destination_project_dataset_table, time_partitioning)
1892
- if time_partitioning:
1893
- configuration["load"].update({"timePartitioning": time_partitioning})
1894
-
1895
- if cluster_fields:
1896
- configuration["load"].update({"clustering": {"fields": cluster_fields}})
1897
-
1898
- if schema_fields:
1899
- configuration["load"]["schema"] = {"fields": schema_fields}
1900
-
1901
- if schema_update_options:
1902
- if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
1903
- raise ValueError(
1904
- "schema_update_options is only "
1905
- "allowed if write_disposition is "
1906
- "'WRITE_APPEND' or 'WRITE_TRUNCATE'."
1907
- )
1908
- else:
1909
- self.log.info("Adding experimental 'schemaUpdateOptions': %s", schema_update_options)
1910
- configuration["load"]["schemaUpdateOptions"] = schema_update_options
1911
-
1912
- if max_bad_records:
1913
- configuration["load"]["maxBadRecords"] = max_bad_records
1914
-
1915
- if encryption_configuration:
1916
- configuration["load"]["destinationEncryptionConfiguration"] = encryption_configuration
1917
-
1918
- if labels or description:
1919
- configuration["load"].update({"destinationTableProperties": {}})
1920
-
1921
- if labels:
1922
- configuration["load"]["destinationTableProperties"]["labels"] = labels
1923
-
1924
- if description:
1925
- configuration["load"]["destinationTableProperties"]["description"] = description
1926
-
1927
- src_fmt_to_configs_mapping = {
1928
- "CSV": [
1929
- "allowJaggedRows",
1930
- "allowQuotedNewlines",
1931
- "autodetect",
1932
- "fieldDelimiter",
1933
- "skipLeadingRows",
1934
- "ignoreUnknownValues",
1935
- "nullMarker",
1936
- "quote",
1937
- "encoding",
1938
- "preserveAsciiControlCharacters",
1939
- ],
1940
- "DATASTORE_BACKUP": ["projectionFields"],
1941
- "NEWLINE_DELIMITED_JSON": ["autodetect", "ignoreUnknownValues"],
1942
- "PARQUET": ["autodetect", "ignoreUnknownValues"],
1943
- "AVRO": ["useAvroLogicalTypes"],
1944
- }
1945
-
1946
- valid_configs = src_fmt_to_configs_mapping[source_format]
1947
-
1948
- # if following fields are not specified in src_fmt_configs,
1949
- # honor the top-level params for backward-compatibility
1950
- backward_compatibility_configs = {
1951
- "skipLeadingRows": skip_leading_rows,
1952
- "fieldDelimiter": field_delimiter,
1953
- "ignoreUnknownValues": ignore_unknown_values,
1954
- "quote": quote_character,
1955
- "allowQuotedNewlines": allow_quoted_newlines,
1956
- "encoding": encoding,
1957
- }
1958
-
1959
- src_fmt_configs = _validate_src_fmt_configs(
1960
- source_format, src_fmt_configs, valid_configs, backward_compatibility_configs
1961
- )
1962
-
1963
- configuration["load"].update(src_fmt_configs)
1964
-
1965
- if allow_jagged_rows:
1966
- configuration["load"]["allowJaggedRows"] = allow_jagged_rows
1967
-
1968
- job = self.insert_job(configuration=configuration, project_id=self.project_id)
1969
- self.running_job_id = job.job_id
1970
- return job.job_id
1971
-
1972
- @deprecated(
1973
- planned_removal_date="November 01, 2024",
1974
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
1975
- category=AirflowProviderDeprecationWarning,
1976
- )
1977
- def run_copy(
1978
- self,
1979
- source_project_dataset_tables: list | str,
1980
- destination_project_dataset_table: str,
1981
- write_disposition: str = "WRITE_EMPTY",
1982
- create_disposition: str = "CREATE_IF_NEEDED",
1983
- labels: dict | None = None,
1984
- encryption_configuration: dict | None = None,
1985
- ) -> str:
1986
- """
1987
- Copy data from one BigQuery table to another.
1988
-
1989
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy
1990
-
1991
- This method is deprecated. Please use :func:`.insert_job` instead.
1992
-
1993
- :param source_project_dataset_tables: One or more dotted
1994
- ``(project:|project.)<dataset>.<table>``
1995
- BigQuery tables to use as the source data. Use a list if there are
1996
- multiple source tables.
1997
- If ``<project>`` is not included, project will be the project defined
1998
- in the connection json.
1999
- :param destination_project_dataset_table: The destination BigQuery
2000
- table. Format is: ``(project:|project.)<dataset>.<table>``
2001
- :param write_disposition: The write disposition if the table already exists.
2002
- :param create_disposition: The create disposition if the table doesn't exist.
2003
- :param labels: a dictionary containing labels for the job/query,
2004
- passed to BigQuery
2005
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
2006
-
2007
- .. code-block:: python
2008
-
2009
- encryption_configuration = {
2010
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
2011
- }
2012
- """
2013
- if not self.project_id:
2014
- raise ValueError("The project_id should be set")
2015
-
2016
- source_project_dataset_tables = (
2017
- [source_project_dataset_tables]
2018
- if not isinstance(source_project_dataset_tables, list)
2019
- else source_project_dataset_tables
2020
- )
2021
-
2022
- source_project_dataset_tables_fixup = []
2023
- for source_project_dataset_table in source_project_dataset_tables:
2024
- source_project, source_dataset, source_table = self.split_tablename(
2025
- table_input=source_project_dataset_table,
2026
- default_project_id=self.project_id,
2027
- var_name="source_project_dataset_table",
2028
- )
2029
- source_project_dataset_tables_fixup.append(
2030
- {"projectId": source_project, "datasetId": source_dataset, "tableId": source_table}
2031
- )
2032
-
2033
- destination_project, destination_dataset, destination_table = self.split_tablename(
2034
- table_input=destination_project_dataset_table, default_project_id=self.project_id
2035
- )
2036
- configuration = {
2037
- "copy": {
2038
- "createDisposition": create_disposition,
2039
- "writeDisposition": write_disposition,
2040
- "sourceTables": source_project_dataset_tables_fixup,
2041
- "destinationTable": {
2042
- "projectId": destination_project,
2043
- "datasetId": destination_dataset,
2044
- "tableId": destination_table,
2045
- },
2046
- }
2047
- }
2048
-
2049
- if labels:
2050
- configuration["labels"] = labels
2051
-
2052
- if encryption_configuration:
2053
- configuration["copy"]["destinationEncryptionConfiguration"] = encryption_configuration
2054
-
2055
- job = self.insert_job(configuration=configuration, project_id=self.project_id)
2056
- self.running_job_id = job.job_id
2057
- return job.job_id
2058
-
2059
- @deprecated(
2060
- planned_removal_date="November 01, 2024",
2061
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
2062
- category=AirflowProviderDeprecationWarning,
2063
- )
2064
- def run_extract(
2065
- self,
2066
- source_project_dataset_table: str,
2067
- destination_cloud_storage_uris: list[str],
2068
- compression: str = "NONE",
2069
- export_format: str = "CSV",
2070
- field_delimiter: str = ",",
2071
- print_header: bool = True,
2072
- labels: dict | None = None,
2073
- return_full_job: bool = False,
2074
- ) -> str | BigQueryJob:
2075
- """
2076
- Copy data from BigQuery to Google Cloud Storage.
2077
-
2078
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
2079
-
2080
- This method is deprecated. Please use :func:`.insert_job` instead.
2081
-
2082
- :param source_project_dataset_table: The dotted ``<dataset>.<table>``
2083
- BigQuery table to use as the source data.
2084
- :param destination_cloud_storage_uris: The destination Google Cloud
2085
- Storage URI (e.g. gs://some-bucket/some-file.txt). Follows
2086
- convention defined here:
2087
- https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple
2088
- :param compression: Type of compression to use.
2089
- :param export_format: File format to export.
2090
- :param field_delimiter: The delimiter to use when extracting to a CSV.
2091
- :param print_header: Whether to print a header for a CSV file extract.
2092
- :param labels: a dictionary containing labels for the job/query,
2093
- passed to BigQuery
2094
- :param return_full_job: return full job instead of job id only
2095
- """
2096
- if not self.project_id:
2097
- raise ValueError("The project_id should be set")
2098
-
2099
- source_project, source_dataset, source_table = self.split_tablename(
2100
- table_input=source_project_dataset_table,
2101
- default_project_id=self.project_id,
2102
- var_name="source_project_dataset_table",
2103
- )
2104
-
2105
- configuration: dict[str, Any] = {
2106
- "extract": {
2107
- "sourceTable": {
2108
- "projectId": source_project,
2109
- "datasetId": source_dataset,
2110
- "tableId": source_table,
2111
- },
2112
- "compression": compression,
2113
- "destinationUris": destination_cloud_storage_uris,
2114
- "destinationFormat": export_format,
2115
- }
2116
- }
2117
-
2118
- if labels:
2119
- configuration["labels"] = labels
2120
-
2121
- if export_format == "CSV":
2122
- # Only set fieldDelimiter and printHeader fields if using CSV.
2123
- # Google does not like it if you set these fields for other export
2124
- # formats.
2125
- configuration["extract"]["fieldDelimiter"] = field_delimiter
2126
- configuration["extract"]["printHeader"] = print_header
2127
-
2128
- job = self.insert_job(configuration=configuration, project_id=self.project_id)
2129
- self.running_job_id = job.job_id
2130
- if return_full_job:
2131
- return job
2132
- return job.job_id
2133
-
2134
- @deprecated(
2135
- planned_removal_date="November 01, 2024",
2136
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
2137
- category=AirflowProviderDeprecationWarning,
2138
- )
2139
- def run_query(
2140
- self,
2141
- sql: str,
2142
- destination_dataset_table: str | None = None,
2143
- write_disposition: str = "WRITE_EMPTY",
2144
- allow_large_results: bool = False,
2145
- flatten_results: bool | None = None,
2146
- udf_config: list | None = None,
2147
- use_legacy_sql: bool | None = None,
2148
- maximum_billing_tier: int | None = None,
2149
- maximum_bytes_billed: float | None = None,
2150
- create_disposition: str = "CREATE_IF_NEEDED",
2151
- query_params: list | None = None,
2152
- labels: dict | None = None,
2153
- schema_update_options: Iterable | None = None,
2154
- priority: str | None = None,
2155
- time_partitioning: dict | None = None,
2156
- api_resource_configs: dict | None = None,
2157
- cluster_fields: list[str] | None = None,
2158
- location: str | None = None,
2159
- encryption_configuration: dict | None = None,
2160
- ) -> str:
2161
- """
2162
- Execute a BigQuery SQL query.
2163
-
2164
- Optionally persists results in a BigQuery table.
2165
-
2166
- .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
2167
-
2168
- This method is deprecated. Please use :func:`.insert_job` instead.
2169
-
2170
- For more details about these parameters.
2171
-
2172
- :param sql: The BigQuery SQL to execute.
2173
- :param destination_dataset_table: The dotted ``<dataset>.<table>``
2174
- BigQuery table to save the query results.
2175
- :param write_disposition: What to do if the table already exists in
2176
- BigQuery.
2177
- :param allow_large_results: Whether to allow large results.
2178
- :param flatten_results: If true and query uses legacy SQL dialect, flattens
2179
- all nested and repeated fields in the query results. ``allowLargeResults``
2180
- must be true if this is set to false. For standard SQL queries, this
2181
- flag is ignored and results are never flattened.
2182
- :param udf_config: The User Defined Function configuration for the query.
2183
- See https://cloud.google.com/bigquery/user-defined-functions for details.
2184
- :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false).
2185
- If `None`, defaults to `self.use_legacy_sql`.
2186
- :param api_resource_configs: a dictionary that contain params
2187
- 'configuration' applied for Google BigQuery Jobs API:
2188
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs
2189
- for example, {'query': {'useQueryCache': False}}. You could use it
2190
- if you need to provide some params that are not supported by the
2191
- BigQueryHook like args.
2192
- :param maximum_billing_tier: Positive integer that serves as a
2193
- multiplier of the basic price.
2194
- :param maximum_bytes_billed: Limits the bytes billed for this job.
2195
- Queries that will have bytes billed beyond this limit will fail
2196
- (without incurring a charge). If unspecified, this will be
2197
- set to your project default.
2198
- :param create_disposition: Specifies whether the job is allowed to
2199
- create new tables.
2200
- :param query_params: a list of dictionary containing query parameter types and
2201
- values, passed to BigQuery
2202
- :param labels: a dictionary containing labels for the job/query,
2203
- passed to BigQuery
2204
- :param schema_update_options: Allows the schema of the destination
2205
- table to be updated as a side effect of the query job.
2206
- :param priority: Specifies a priority for the query.
2207
- Possible values include INTERACTIVE and BATCH.
2208
- If `None`, defaults to `self.priority`.
2209
- :param time_partitioning: configure optional time partitioning fields i.e.
2210
- partition by field, type and expiration as per API specifications.
2211
- :param cluster_fields: Request that the result of this query be stored sorted
2212
- by one or more columns. BigQuery supports clustering for both partitioned and
2213
- non-partitioned tables. The order of columns given determines the sort order.
2214
- :param location: The geographic location of the job. Required except for
2215
- US and EU. See details at
2216
- https://cloud.google.com/bigquery/docs/locations#specifying_your_location
2217
- :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
2218
-
2219
- .. code-block:: python
2220
-
2221
- encryption_configuration = {
2222
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
2223
- }
2224
- """
2225
- if not self.project_id:
2226
- raise ValueError("The project_id should be set")
2227
-
2228
- labels = labels or self.labels
2229
- schema_update_options = list(schema_update_options or [])
2230
-
2231
- priority = priority or self.priority
2232
-
2233
- if time_partitioning is None:
2234
- time_partitioning = {}
2235
-
2236
- if not api_resource_configs:
2237
- api_resource_configs = self.api_resource_configs
2238
- else:
2239
- _validate_value("api_resource_configs", api_resource_configs, dict)
2240
- configuration = deepcopy(api_resource_configs)
2241
- if "query" not in configuration:
2242
- configuration["query"] = {}
2243
-
2244
- else:
2245
- _validate_value("api_resource_configs['query']", configuration["query"], dict)
2246
-
2247
- if sql is None and not configuration["query"].get("query", None):
2248
- raise TypeError("`BigQueryBaseCursor.run_query` missing 1 required positional argument: `sql`")
2249
-
2250
- # BigQuery also allows you to define how you want a table's schema to change
2251
- # as a side effect of a query job
2252
- # for more details:
2253
- # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.schemaUpdateOptions
2254
-
2255
- allowed_schema_update_options = ["ALLOW_FIELD_ADDITION", "ALLOW_FIELD_RELAXATION"]
2256
-
2257
- if not set(allowed_schema_update_options).issuperset(set(schema_update_options)):
2258
- raise ValueError(
2259
- f"{schema_update_options} contains invalid schema update options."
2260
- f" Please only use one or more of the following options: {allowed_schema_update_options}"
2261
- )
2262
-
2263
- if schema_update_options:
2264
- if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
2265
- raise ValueError(
2266
- "schema_update_options is only "
2267
- "allowed if write_disposition is "
2268
- "'WRITE_APPEND' or 'WRITE_TRUNCATE'."
2269
- )
2270
-
2271
- if destination_dataset_table:
2272
- destination_project, destination_dataset, destination_table = self.split_tablename(
2273
- table_input=destination_dataset_table, default_project_id=self.project_id
2274
- )
2275
-
2276
- destination_dataset_table = { # type: ignore
2277
- "projectId": destination_project,
2278
- "datasetId": destination_dataset,
2279
- "tableId": destination_table,
2280
- }
2281
-
2282
- if cluster_fields:
2283
- cluster_fields = {"fields": cluster_fields} # type: ignore
2284
-
2285
- query_param_list: list[tuple[Any, str, str | bool | None | dict, type | tuple[type]]] = [
2286
- (sql, "query", None, (str,)),
2287
- (priority, "priority", priority, (str,)),
2288
- (use_legacy_sql, "useLegacySql", self.use_legacy_sql, bool),
2289
- (query_params, "queryParameters", None, list),
2290
- (udf_config, "userDefinedFunctionResources", None, list),
2291
- (maximum_billing_tier, "maximumBillingTier", None, int),
2292
- (maximum_bytes_billed, "maximumBytesBilled", None, float),
2293
- (time_partitioning, "timePartitioning", {}, dict),
2294
- (schema_update_options, "schemaUpdateOptions", None, list),
2295
- (destination_dataset_table, "destinationTable", None, dict),
2296
- (cluster_fields, "clustering", None, dict),
2297
- ]
2298
-
2299
- for param, param_name, param_default, param_type in query_param_list:
2300
- if param_name not in configuration["query"] and param in [None, {}, ()]:
2301
- if param_name == "timePartitioning":
2302
- param_default = _cleanse_time_partitioning(destination_dataset_table, time_partitioning)
2303
- param = param_default
2304
-
2305
- if param in [None, {}, ()]:
2306
- continue
2307
-
2308
- _api_resource_configs_duplication_check(param_name, param, configuration["query"])
2309
-
2310
- configuration["query"][param_name] = param
2311
-
2312
- # check valid type of provided param,
2313
- # it last step because we can get param from 2 sources,
2314
- # and first of all need to find it
2315
-
2316
- _validate_value(param_name, configuration["query"][param_name], param_type)
2317
-
2318
- if param_name == "schemaUpdateOptions" and param:
2319
- self.log.info("Adding experimental 'schemaUpdateOptions': %s", schema_update_options)
2320
-
2321
- if param_name == "destinationTable":
2322
- for key in ["projectId", "datasetId", "tableId"]:
2323
- if key not in configuration["query"]["destinationTable"]:
2324
- raise ValueError(
2325
- "Not correct 'destinationTable' in "
2326
- "api_resource_configs. 'destinationTable' "
2327
- "must be a dict with {'projectId':'', "
2328
- "'datasetId':'', 'tableId':''}"
2329
- )
2330
- else:
2331
- configuration["query"].update(
2332
- {
2333
- "allowLargeResults": allow_large_results,
2334
- "flattenResults": flatten_results,
2335
- "writeDisposition": write_disposition,
2336
- "createDisposition": create_disposition,
2337
- }
2338
- )
2339
-
2340
- if (
2341
- "useLegacySql" in configuration["query"]
2342
- and configuration["query"]["useLegacySql"]
2343
- and "queryParameters" in configuration["query"]
2344
- ):
2345
- raise ValueError("Query parameters are not allowed when using legacy SQL")
2346
-
2347
- if labels:
2348
- _api_resource_configs_duplication_check("labels", labels, configuration)
2349
- configuration["labels"] = labels
2350
-
2351
- if encryption_configuration:
2352
- configuration["query"]["destinationEncryptionConfiguration"] = encryption_configuration
2353
-
2354
- job = self.insert_job(configuration=configuration, project_id=self.project_id, location=location)
2355
- self.running_job_id = job.job_id
2356
- return job.job_id
2357
-
2358
- def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, force_rerun=False) -> str:
2359
- if force_rerun:
2360
- hash_base = str(uuid.uuid4())
2361
- else:
2362
- hash_base = json.dumps(configuration, sort_keys=True)
2363
-
2364
- uniqueness_suffix = md5(hash_base.encode()).hexdigest()
2365
-
2366
- if job_id:
2367
- return f"{job_id}_{uniqueness_suffix}"
2368
-
2369
- exec_date = logical_date.isoformat()
2370
- job_id = f"airflow_{dag_id}_{task_id}_{exec_date}_{uniqueness_suffix}"
2371
- return re.sub(r"[:\-+.]", "_", job_id)
2372
-
2373
- def split_tablename(
2374
- self, table_input: str, default_project_id: str, var_name: str | None = None
2375
- ) -> tuple[str, str, str]:
2376
- if "." not in table_input:
2377
- raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
2378
-
2379
- if not default_project_id:
2380
- raise ValueError("INTERNAL: No default project is specified")
2381
-
2382
- def var_print(var_name):
2383
- if var_name is None:
2384
- return ""
2385
- else:
2386
- return f"Format exception for {var_name}: "
2387
-
2388
- if table_input.count(".") + table_input.count(":") > 3:
2389
- raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
2390
- cmpt = table_input.rsplit(":", 1)
2391
- project_id = None
2392
- rest = table_input
2393
- if len(cmpt) == 1:
2394
- project_id = None
2395
- rest = cmpt[0]
2396
- elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
2397
- if cmpt[-1].count(".") != 2:
2398
- project_id = cmpt[0]
2399
- rest = cmpt[1]
2400
- else:
2401
- raise ValueError(
2402
- f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
1294
+ f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
2403
1295
  )
2404
1296
 
2405
1297
  cmpt = rest.split(".")
@@ -2548,343 +1440,6 @@ class BigQueryBaseCursor(LoggingMixin):
2548
1440
  self.labels = labels
2549
1441
  self.hook = hook
2550
1442
 
2551
- @deprecated(
2552
- planned_removal_date="November 01, 2024",
2553
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table",
2554
- category=AirflowProviderDeprecationWarning,
2555
- )
2556
- def create_empty_table(self, *args, **kwargs):
2557
- """
2558
- Create empty table. DEPRECATED.
2559
-
2560
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table`
2561
- instead.
2562
- """
2563
- return self.hook.create_empty_table(*args, **kwargs)
2564
-
2565
- @deprecated(
2566
- planned_removal_date="November 01, 2024",
2567
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset",
2568
- category=AirflowProviderDeprecationWarning,
2569
- )
2570
- def create_empty_dataset(self, *args, **kwargs) -> dict[str, Any]:
2571
- """
2572
- Create empty dataset. DEPRECATED.
2573
-
2574
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset`
2575
- instead.
2576
- """
2577
- return self.hook.create_empty_dataset(*args, **kwargs)
2578
-
2579
- @deprecated(
2580
- planned_removal_date="November 01, 2024",
2581
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables",
2582
- category=AirflowProviderDeprecationWarning,
2583
- )
2584
- def get_dataset_tables(self, *args, **kwargs) -> list[dict[str, Any]]:
2585
- """
2586
- Get dataset tables. DEPRECATED.
2587
-
2588
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables`
2589
- instead.
2590
- """
2591
- return self.hook.get_dataset_tables(*args, **kwargs)
2592
-
2593
- @deprecated(
2594
- planned_removal_date="November 01, 2024",
2595
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset",
2596
- category=AirflowProviderDeprecationWarning,
2597
- )
2598
- def delete_dataset(self, *args, **kwargs) -> None:
2599
- """
2600
- Delete dataset. DEPRECATED.
2601
-
2602
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset`
2603
- instead.
2604
- """
2605
- return self.hook.delete_dataset(*args, **kwargs)
2606
-
2607
- @deprecated(
2608
- planned_removal_date="November 01, 2024",
2609
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table",
2610
- category=AirflowProviderDeprecationWarning,
2611
- )
2612
- def create_external_table(self, *args, **kwargs):
2613
- """
2614
- Create external table. DEPRECATED.
2615
-
2616
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table`
2617
- instead.
2618
- """
2619
- return self.hook.create_external_table(*args, **kwargs)
2620
-
2621
- @deprecated(
2622
- planned_removal_date="November 01, 2024",
2623
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table",
2624
- category=AirflowProviderDeprecationWarning,
2625
- )
2626
- def patch_table(self, *args, **kwargs) -> None:
2627
- """
2628
- Patch table. DEPRECATED.
2629
-
2630
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table`
2631
- instead.
2632
- """
2633
- return self.hook.patch_table(*args, **kwargs)
2634
-
2635
- @deprecated(
2636
- planned_removal_date="November 01, 2024",
2637
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all",
2638
- category=AirflowProviderDeprecationWarning,
2639
- )
2640
- def insert_all(self, *args, **kwargs) -> None:
2641
- """
2642
- Insert all. DEPRECATED.
2643
-
2644
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all`
2645
- instead.
2646
- """
2647
- return self.hook.insert_all(*args, **kwargs)
2648
-
2649
- @deprecated(
2650
- planned_removal_date="November 01, 2024",
2651
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset",
2652
- category=AirflowProviderDeprecationWarning,
2653
- )
2654
- def update_dataset(self, *args, **kwargs) -> dict:
2655
- """
2656
- Update dataset. DEPRECATED.
2657
-
2658
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset`
2659
- instead.
2660
- """
2661
- return Dataset.to_api_repr(self.hook.update_dataset(*args, **kwargs))
2662
-
2663
- @deprecated(
2664
- planned_removal_date="November 01, 2024",
2665
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset",
2666
- category=AirflowProviderDeprecationWarning,
2667
- )
2668
- def patch_dataset(self, *args, **kwargs) -> dict:
2669
- """
2670
- Patch dataset. DEPRECATED.
2671
-
2672
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset`
2673
- instead.
2674
- """
2675
- return self.hook.patch_dataset(*args, **kwargs)
2676
-
2677
- @deprecated(
2678
- planned_removal_date="November 01, 2024",
2679
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list",
2680
- category=AirflowProviderDeprecationWarning,
2681
- )
2682
- def get_dataset_tables_list(self, *args, **kwargs) -> list[dict[str, Any]]:
2683
- """
2684
- Get dataset tables list. DEPRECATED.
2685
-
2686
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list`
2687
- instead.
2688
- """
2689
- return self.hook.get_dataset_tables_list(*args, **kwargs)
2690
-
2691
- @deprecated(
2692
- planned_removal_date="November 01, 2024",
2693
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list",
2694
- category=AirflowProviderDeprecationWarning,
2695
- )
2696
- def get_datasets_list(self, *args, **kwargs) -> list | HTTPIterator:
2697
- """
2698
- Get datasets list. DEPRECATED.
2699
-
2700
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list`
2701
- instead.
2702
- """
2703
- return self.hook.get_datasets_list(*args, **kwargs)
2704
-
2705
- @deprecated(
2706
- planned_removal_date="November 01, 2024",
2707
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset",
2708
- category=AirflowProviderDeprecationWarning,
2709
- )
2710
- def get_dataset(self, *args, **kwargs) -> Dataset:
2711
- """
2712
- Get dataset. DEPRECATED.
2713
-
2714
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset`
2715
- instead.
2716
- """
2717
- return self.hook.get_dataset(*args, **kwargs)
2718
-
2719
- @deprecated(
2720
- planned_removal_date="November 01, 2024",
2721
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access",
2722
- category=AirflowProviderDeprecationWarning,
2723
- )
2724
- def run_grant_dataset_view_access(self, *args, **kwargs) -> dict:
2725
- """
2726
- Grant view access to dataset. DEPRECATED.
2727
-
2728
- Please use
2729
- :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access`
2730
- instead.
2731
- """
2732
- return self.hook.run_grant_dataset_view_access(*args, **kwargs)
2733
-
2734
- @deprecated(
2735
- planned_removal_date="November 01, 2024",
2736
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert",
2737
- category=AirflowProviderDeprecationWarning,
2738
- )
2739
- def run_table_upsert(self, *args, **kwargs) -> dict:
2740
- """
2741
- Upsert table. DEPRECATED.
2742
-
2743
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert`
2744
- instead.
2745
- """
2746
- return self.hook.run_table_upsert(*args, **kwargs)
2747
-
2748
- @deprecated(
2749
- planned_removal_date="November 01, 2024",
2750
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete",
2751
- category=AirflowProviderDeprecationWarning,
2752
- )
2753
- def run_table_delete(self, *args, **kwargs) -> None:
2754
- """
2755
- Delete table. DEPRECATED.
2756
-
2757
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete`
2758
- instead.
2759
- """
2760
- return self.hook.run_table_delete(*args, **kwargs)
2761
-
2762
- @deprecated(
2763
- planned_removal_date="November 01, 2024",
2764
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata",
2765
- category=AirflowProviderDeprecationWarning,
2766
- )
2767
- def get_tabledata(self, *args, **kwargs) -> list[dict]:
2768
- """
2769
- Get table data. DEPRECATED.
2770
-
2771
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata`
2772
- instead.
2773
- """
2774
- return self.hook.get_tabledata(*args, **kwargs)
2775
-
2776
- @deprecated(
2777
- planned_removal_date="November 01, 2024",
2778
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema",
2779
- category=AirflowProviderDeprecationWarning,
2780
- )
2781
- def get_schema(self, *args, **kwargs) -> dict:
2782
- """
2783
- Get Schema. DEPRECATED.
2784
-
2785
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema`
2786
- instead.
2787
- """
2788
- return self.hook.get_schema(*args, **kwargs)
2789
-
2790
- @deprecated(
2791
- planned_removal_date="November 01, 2024",
2792
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete",
2793
- category=AirflowProviderDeprecationWarning,
2794
- )
2795
- def poll_job_complete(self, *args, **kwargs) -> bool:
2796
- """
2797
- Poll for job completion.DEPRECATED.
2798
-
2799
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete`
2800
- instead.
2801
- """
2802
- return self.hook.poll_job_complete(*args, **kwargs)
2803
-
2804
- @deprecated(
2805
- planned_removal_date="November 01, 2024",
2806
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query",
2807
- category=AirflowProviderDeprecationWarning,
2808
- )
2809
- def cancel_query(self, *args, **kwargs) -> None:
2810
- """
2811
- Cancel query. DEPRECATED.
2812
-
2813
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query`
2814
- instead.
2815
- """
2816
- return self.hook.cancel_query(*args, **kwargs) # type: ignore
2817
-
2818
- @deprecated(
2819
- planned_removal_date="November 01, 2024",
2820
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration",
2821
- category=AirflowProviderDeprecationWarning,
2822
- )
2823
- def run_with_configuration(self, *args, **kwargs) -> str:
2824
- """
2825
- Run with configuration. DEPRECATED.
2826
-
2827
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration`
2828
- instead.
2829
- """
2830
- return self.hook.run_with_configuration(*args, **kwargs)
2831
-
2832
- @deprecated(
2833
- planned_removal_date="November 01, 2024",
2834
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load",
2835
- category=AirflowProviderDeprecationWarning,
2836
- )
2837
- def run_load(self, *args, **kwargs) -> str:
2838
- """
2839
- Run load. DEPRECATED.
2840
-
2841
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load`
2842
- instead.
2843
- """
2844
- return self.hook.run_load(*args, **kwargs)
2845
-
2846
- @deprecated(
2847
- planned_removal_date="November 01, 2024",
2848
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy",
2849
- category=AirflowProviderDeprecationWarning,
2850
- )
2851
- def run_copy(self, *args, **kwargs) -> str:
2852
- """
2853
- Run copy. DEPRECATED.
2854
-
2855
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy`
2856
- instead.
2857
- """
2858
- return self.hook.run_copy(*args, **kwargs)
2859
-
2860
- @deprecated(
2861
- planned_removal_date="November 01, 2024",
2862
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract",
2863
- category=AirflowProviderDeprecationWarning,
2864
- )
2865
- def run_extract(self, *args, **kwargs) -> str | BigQueryJob:
2866
- """
2867
- Run extraction. DEPRECATED.
2868
-
2869
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract`
2870
- instead.
2871
- """
2872
- return self.hook.run_extract(*args, **kwargs)
2873
-
2874
- @deprecated(
2875
- planned_removal_date="November 01, 2024",
2876
- use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query",
2877
- category=AirflowProviderDeprecationWarning,
2878
- )
2879
- def run_query(self, *args, **kwargs) -> str:
2880
- """
2881
- Run query. DEPRECATED.
2882
-
2883
- Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query`
2884
- instead.
2885
- """
2886
- return self.hook.run_query(*args, **kwargs)
2887
-
2888
1443
 
2889
1444
  class BigQueryCursor(BigQueryBaseCursor):
2890
1445
  """
@@ -3541,7 +2096,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3541
2096
  query_request = {
3542
2097
  "query": "SELECT partition_id "
3543
2098
  f"FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.PARTITIONS`"
3544
- + (f" WHERE table_id={table_id}" if table_id else ""),
2099
+ + (f" WHERE table_name='{table_id}'" if table_id else ""),
3545
2100
  "useLegacySql": False,
3546
2101
  }
3547
2102
  job_query_resp = await job_client.query(query_request, cast(Session, session))