apache-airflow-providers-google 10.26.0rc1__py3-none-any.whl → 11.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +2 -1
- airflow/providers/google/ads/operators/ads.py +2 -1
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -1
- airflow/providers/google/assets/gcs.py +17 -1
- airflow/providers/google/cloud/hooks/automl.py +3 -6
- airflow/providers/google/cloud/hooks/bigquery.py +41 -1486
- airflow/providers/google/cloud/hooks/bigquery_dts.py +4 -11
- airflow/providers/google/cloud/hooks/bigtable.py +3 -6
- airflow/providers/google/cloud/hooks/cloud_batch.py +6 -3
- airflow/providers/google/cloud/hooks/cloud_build.py +3 -15
- airflow/providers/google/cloud/hooks/cloud_composer.py +2 -17
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -6
- airflow/providers/google/cloud/hooks/cloud_run.py +10 -5
- airflow/providers/google/cloud/hooks/cloud_sql.py +5 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +0 -5
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -6
- airflow/providers/google/cloud/hooks/dataflow.py +3 -14
- airflow/providers/google/cloud/hooks/dataform.py +2 -9
- airflow/providers/google/cloud/hooks/datafusion.py +4 -15
- airflow/providers/google/cloud/hooks/dataplex.py +4 -7
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +77 -22
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +2 -9
- airflow/providers/google/cloud/hooks/datastore.py +3 -6
- airflow/providers/google/cloud/hooks/dlp.py +3 -6
- airflow/providers/google/cloud/hooks/functions.py +2 -6
- airflow/providers/google/cloud/hooks/gcs.py +2 -18
- airflow/providers/google/cloud/hooks/gdm.py +1 -17
- airflow/providers/google/cloud/hooks/kms.py +3 -6
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -97
- airflow/providers/google/cloud/hooks/life_sciences.py +2 -6
- airflow/providers/google/cloud/hooks/looker.py +2 -1
- airflow/providers/google/cloud/hooks/mlengine.py +0 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -6
- airflow/providers/google/cloud/hooks/os_login.py +3 -6
- airflow/providers/google/cloud/hooks/pubsub.py +3 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +3 -73
- airflow/providers/google/cloud/hooks/spanner.py +3 -6
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -6
- airflow/providers/google/cloud/hooks/stackdriver.py +3 -6
- airflow/providers/google/cloud/hooks/tasks.py +3 -6
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -6
- airflow/providers/google/cloud/hooks/translate.py +455 -9
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -14
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -6
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -9
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -1
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -1
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -6
- airflow/providers/google/cloud/hooks/vision.py +3 -6
- airflow/providers/google/cloud/hooks/workflows.py +2 -9
- airflow/providers/google/cloud/links/dataproc.py +0 -1
- airflow/providers/google/cloud/links/translate.py +91 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -3
- airflow/providers/google/cloud/openlineage/utils.py +54 -21
- airflow/providers/google/cloud/operators/automl.py +5 -4
- airflow/providers/google/cloud/operators/bigquery.py +2 -341
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/bigtable.py +2 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +2 -1
- airflow/providers/google/cloud/operators/cloud_memorystore.py +2 -1
- airflow/providers/google/cloud/operators/cloud_run.py +2 -1
- airflow/providers/google/cloud/operators/cloud_sql.py +2 -1
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/operators/compute.py +2 -1
- airflow/providers/google/cloud/operators/datacatalog.py +2 -1
- airflow/providers/google/cloud/operators/dataflow.py +2 -517
- airflow/providers/google/cloud/operators/dataform.py +2 -1
- airflow/providers/google/cloud/operators/datafusion.py +2 -1
- airflow/providers/google/cloud/operators/dataplex.py +37 -31
- airflow/providers/google/cloud/operators/dataprep.py +2 -1
- airflow/providers/google/cloud/operators/dataproc.py +3 -633
- airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
- airflow/providers/google/cloud/operators/datastore.py +2 -1
- airflow/providers/google/cloud/operators/dlp.py +2 -1
- airflow/providers/google/cloud/operators/functions.py +2 -1
- airflow/providers/google/cloud/operators/gcs.py +5 -4
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -11
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +2 -1
- airflow/providers/google/cloud/operators/natural_language.py +3 -2
- airflow/providers/google/cloud/operators/pubsub.py +2 -1
- airflow/providers/google/cloud/operators/spanner.py +2 -1
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +2 -1
- airflow/providers/google/cloud/operators/tasks.py +3 -2
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate.py +622 -32
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -93
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -13
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -17
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -13
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +2 -1
- airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
- airflow/providers/google/cloud/operators/vision.py +3 -2
- airflow/providers/google/cloud/operators/workflows.py +3 -2
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -19
- airflow/providers/google/cloud/sensors/bigquery.py +2 -81
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_composer.py +8 -94
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +2 -1
- airflow/providers/google/cloud/sensors/dataform.py +2 -1
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataplex.py +2 -1
- airflow/providers/google/cloud/sensors/dataprep.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -1
- airflow/providers/google/cloud/sensors/gcs.py +4 -36
- airflow/providers/google/cloud/sensors/pubsub.py +2 -1
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +75 -18
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +9 -7
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +2 -1
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +13 -9
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_local.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +2 -1
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -1
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +2 -1
- airflow/providers/google/cloud/triggers/bigquery.py +2 -1
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_sql.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +2 -1
- airflow/providers/google/cloud/triggers/datafusion.py +2 -1
- airflow/providers/google/cloud/triggers/dataplex.py +1 -1
- airflow/providers/google/cloud/triggers/dataproc.py +2 -1
- airflow/providers/google/cloud/triggers/gcs.py +3 -2
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -1
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/pubsub.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +2 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -1
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +4 -11
- airflow/providers/google/common/hooks/discovery_api.py +1 -6
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +7 -22
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -1
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +2 -3
- airflow/providers/google/marketing_platform/hooks/display_video.py +4 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +6 -6
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -1
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +2 -42
- airflow/providers/google/marketing_platform/operators/display_video.py +2 -47
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -1
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -7
- airflow/providers/google/marketing_platform/sensors/display_video.py +2 -13
- airflow/providers/google/suite/hooks/calendar.py +2 -8
- airflow/providers/google/suite/hooks/drive.py +2 -6
- airflow/providers/google/suite/hooks/sheets.py +2 -7
- airflow/providers/google/suite/operators/sheets.py +2 -7
- airflow/providers/google/suite/sensors/drive.py +2 -7
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -7
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +2 -7
- airflow/providers/google/suite/transfers/local_to_drive.py +2 -7
- airflow/providers/google/suite/transfers/sql_to_sheets.py +2 -7
- {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/METADATA +17 -17
- apache_airflow_providers_google-11.0.0.dist-info/RECORD +315 -0
- airflow/providers/google/marketing_platform/hooks/analytics.py +0 -211
- airflow/providers/google/marketing_platform/operators/analytics.py +0 -551
- apache_airflow_providers_google-10.26.0rc1.dist-info/RECORD +0 -317
- {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.26.0rc1.dist-info → apache_airflow_providers_google-11.0.0.dist-info}/entry_points.txt +0 -0
@@ -26,10 +26,10 @@ import logging
|
|
26
26
|
import re
|
27
27
|
import time
|
28
28
|
import uuid
|
29
|
+
from collections.abc import Iterable, Mapping, Sequence
|
29
30
|
from copy import deepcopy
|
30
31
|
from datetime import datetime, timedelta
|
31
|
-
from
|
32
|
-
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
|
32
|
+
from typing import TYPE_CHECKING, Any, NoReturn, Union, cast
|
33
33
|
|
34
34
|
from aiohttp import ClientSession as ClientSession
|
35
35
|
from gcloud.aio.bigquery import Job, Table as Table_async
|
@@ -37,7 +37,6 @@ from google.cloud.bigquery import (
|
|
37
37
|
DEFAULT_RETRY,
|
38
38
|
Client,
|
39
39
|
CopyJob,
|
40
|
-
ExternalConfig,
|
41
40
|
ExtractJob,
|
42
41
|
LoadJob,
|
43
42
|
QueryJob,
|
@@ -47,14 +46,13 @@ from google.cloud.bigquery import (
|
|
47
46
|
from google.cloud.bigquery.dataset import AccessEntry, Dataset, DatasetListItem, DatasetReference
|
48
47
|
from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
|
49
48
|
from google.cloud.bigquery.table import (
|
50
|
-
EncryptionConfiguration,
|
51
49
|
Row,
|
52
50
|
RowIterator,
|
53
51
|
Table,
|
54
52
|
TableReference,
|
55
53
|
)
|
56
54
|
from google.cloud.exceptions import NotFound
|
57
|
-
from googleapiclient.discovery import
|
55
|
+
from googleapiclient.discovery import build
|
58
56
|
from pandas_gbq import read_gbq
|
59
57
|
from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
|
60
58
|
from requests import Session
|
@@ -159,11 +157,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
159
157
|
labels: dict | None = None,
|
160
158
|
**kwargs,
|
161
159
|
) -> None:
|
162
|
-
if kwargs.get("delegate_to") is not None:
|
163
|
-
raise RuntimeError(
|
164
|
-
"The `delegate_to` parameter has been deprecated before and finally removed in this version"
|
165
|
-
" of Google Provider. You MUST convert it to `impersonate_chain`"
|
166
|
-
)
|
167
160
|
super().__init__(**kwargs)
|
168
161
|
self.use_legacy_sql: bool = self._get_field("use_legacy_sql", use_legacy_sql)
|
169
162
|
self.location: str | None = self._get_field("location", location)
|
@@ -173,15 +166,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
173
166
|
self.labels = self._get_field("labels", labels or {})
|
174
167
|
self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
|
175
168
|
|
176
|
-
@cached_property
|
177
|
-
@deprecated(
|
178
|
-
planned_removal_date="November 01, 2024",
|
179
|
-
reason="This property is no longer in actual use. ",
|
180
|
-
category=AirflowProviderDeprecationWarning,
|
181
|
-
)
|
182
|
-
def credentials_path(self) -> str:
|
183
|
-
return "bigquery_hook_credentials.json"
|
184
|
-
|
185
169
|
def get_conn(self) -> BigQueryConnection:
|
186
170
|
"""Get a BigQuery PEP 249 connection object."""
|
187
171
|
http_authorized = self._authorize()
|
@@ -195,16 +179,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
195
179
|
hook=self,
|
196
180
|
)
|
197
181
|
|
198
|
-
@deprecated(
|
199
|
-
planned_removal_date="November 01, 2024",
|
200
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_client.",
|
201
|
-
category=AirflowProviderDeprecationWarning,
|
202
|
-
)
|
203
|
-
def get_service(self) -> Resource:
|
204
|
-
"""Get a BigQuery service object. Deprecated."""
|
205
|
-
http_authorized = self._authorize()
|
206
|
-
return build("bigquery", "v2", http=http_authorized, cache_discovery=False)
|
207
|
-
|
208
182
|
def get_client(self, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None) -> Client:
|
209
183
|
"""
|
210
184
|
Get an authenticated BigQuery Client.
|
@@ -602,165 +576,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
602
576
|
not_found_ok=True,
|
603
577
|
)
|
604
578
|
|
605
|
-
@deprecated(
|
606
|
-
planned_removal_date="November 01, 2024",
|
607
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table",
|
608
|
-
instructions="Use the replacement method with passing the `table_resource` object. "
|
609
|
-
"This gives more flexibility.",
|
610
|
-
category=AirflowProviderDeprecationWarning,
|
611
|
-
)
|
612
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
613
|
-
def create_external_table(
|
614
|
-
self,
|
615
|
-
external_project_dataset_table: str,
|
616
|
-
schema_fields: list,
|
617
|
-
source_uris: list,
|
618
|
-
source_format: str = "CSV",
|
619
|
-
autodetect: bool = False,
|
620
|
-
compression: str = "NONE",
|
621
|
-
ignore_unknown_values: bool = False,
|
622
|
-
max_bad_records: int = 0,
|
623
|
-
skip_leading_rows: int = 0,
|
624
|
-
field_delimiter: str = ",",
|
625
|
-
quote_character: str | None = None,
|
626
|
-
allow_quoted_newlines: bool = False,
|
627
|
-
allow_jagged_rows: bool = False,
|
628
|
-
encoding: str = "UTF-8",
|
629
|
-
src_fmt_configs: dict | None = None,
|
630
|
-
labels: dict | None = None,
|
631
|
-
description: str | None = None,
|
632
|
-
encryption_configuration: dict | None = None,
|
633
|
-
location: str | None = None,
|
634
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
635
|
-
) -> Table:
|
636
|
-
"""
|
637
|
-
Create an external table in the dataset with data from Google Cloud Storage.
|
638
|
-
|
639
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
|
640
|
-
|
641
|
-
This method is deprecated. Please use :func:`.create_empty_table` with
|
642
|
-
the ``table_resource`` object. See function documentation for more
|
643
|
-
details about these parameters.
|
644
|
-
|
645
|
-
:param external_project_dataset_table:
|
646
|
-
The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
|
647
|
-
table name to create external table.
|
648
|
-
If ``<project>`` is not included, project will be the
|
649
|
-
project defined in the connection json.
|
650
|
-
:param schema_fields: The schema field list as defined here:
|
651
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
|
652
|
-
:param source_uris: The source Google Cloud
|
653
|
-
Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild
|
654
|
-
per-object name can be used.
|
655
|
-
:param source_format: File format to export.
|
656
|
-
:param autodetect: Try to detect schema and format options automatically.
|
657
|
-
Any option specified explicitly will be honored.
|
658
|
-
:param compression: [Optional] The compression type of the data source.
|
659
|
-
Possible values include GZIP and NONE.
|
660
|
-
The default value is NONE.
|
661
|
-
This setting is ignored for Google Cloud Bigtable,
|
662
|
-
Google Cloud Datastore backups and Avro formats.
|
663
|
-
:param ignore_unknown_values: [Optional] Indicates if BigQuery should allow
|
664
|
-
extra values that are not represented in the table schema.
|
665
|
-
If true, the extra values are ignored. If false, records with extra columns
|
666
|
-
are treated as bad records, and if there are too many bad records, an
|
667
|
-
invalid error is returned in the job result.
|
668
|
-
:param max_bad_records: The maximum number of bad records that BigQuery can
|
669
|
-
ignore when running the job.
|
670
|
-
:param skip_leading_rows: Number of rows to skip when loading from a CSV.
|
671
|
-
:param field_delimiter: The delimiter to use when loading from a CSV.
|
672
|
-
:param quote_character: The value that is used to quote data sections in a CSV
|
673
|
-
file.
|
674
|
-
:param allow_quoted_newlines: Whether to allow quoted newlines (true) or not
|
675
|
-
(false).
|
676
|
-
:param allow_jagged_rows: Accept rows that are missing trailing optional columns.
|
677
|
-
The missing values are treated as nulls. If false, records with missing
|
678
|
-
trailing columns are treated as bad records, and if there are too many bad
|
679
|
-
records, an invalid error is returned in the job result. Only applicable when
|
680
|
-
source_format is CSV.
|
681
|
-
:param encoding: The character encoding of the data. See:
|
682
|
-
|
683
|
-
.. seealso::
|
684
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
|
685
|
-
:param src_fmt_configs: configure optional fields specific to the source format
|
686
|
-
:param labels: A dictionary containing labels for the BiqQuery table.
|
687
|
-
:param description: A string containing the description for the BigQuery table.
|
688
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
689
|
-
|
690
|
-
.. code-block:: python
|
691
|
-
|
692
|
-
encryption_configuration = {
|
693
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
694
|
-
}
|
695
|
-
"""
|
696
|
-
location = location or self.location
|
697
|
-
src_fmt_configs = src_fmt_configs or {}
|
698
|
-
source_format = source_format.upper()
|
699
|
-
compression = compression.upper()
|
700
|
-
|
701
|
-
external_config_api_repr = {
|
702
|
-
"autodetect": autodetect,
|
703
|
-
"sourceFormat": source_format,
|
704
|
-
"sourceUris": source_uris,
|
705
|
-
"compression": compression,
|
706
|
-
"ignoreUnknownValues": ignore_unknown_values,
|
707
|
-
}
|
708
|
-
|
709
|
-
# if following fields are not specified in src_fmt_configs,
|
710
|
-
# honor the top-level params for backward-compatibility
|
711
|
-
backward_compatibility_configs = {
|
712
|
-
"skipLeadingRows": skip_leading_rows,
|
713
|
-
"fieldDelimiter": field_delimiter,
|
714
|
-
"quote": quote_character,
|
715
|
-
"allowQuotedNewlines": allow_quoted_newlines,
|
716
|
-
"allowJaggedRows": allow_jagged_rows,
|
717
|
-
"encoding": encoding,
|
718
|
-
}
|
719
|
-
src_fmt_to_param_mapping = {"CSV": "csvOptions", "GOOGLE_SHEETS": "googleSheetsOptions"}
|
720
|
-
src_fmt_to_configs_mapping = {
|
721
|
-
"csvOptions": [
|
722
|
-
"allowJaggedRows",
|
723
|
-
"allowQuotedNewlines",
|
724
|
-
"fieldDelimiter",
|
725
|
-
"skipLeadingRows",
|
726
|
-
"quote",
|
727
|
-
"encoding",
|
728
|
-
],
|
729
|
-
"googleSheetsOptions": ["skipLeadingRows"],
|
730
|
-
}
|
731
|
-
if source_format in src_fmt_to_param_mapping:
|
732
|
-
valid_configs = src_fmt_to_configs_mapping[src_fmt_to_param_mapping[source_format]]
|
733
|
-
src_fmt_configs = _validate_src_fmt_configs(
|
734
|
-
source_format, src_fmt_configs, valid_configs, backward_compatibility_configs
|
735
|
-
)
|
736
|
-
external_config_api_repr[src_fmt_to_param_mapping[source_format]] = src_fmt_configs
|
737
|
-
|
738
|
-
# build external config
|
739
|
-
external_config = ExternalConfig.from_api_repr(external_config_api_repr)
|
740
|
-
if schema_fields:
|
741
|
-
external_config.schema = [SchemaField.from_api_repr(f) for f in schema_fields]
|
742
|
-
if max_bad_records:
|
743
|
-
external_config.max_bad_records = max_bad_records
|
744
|
-
|
745
|
-
# build table definition
|
746
|
-
table = Table(table_ref=TableReference.from_string(external_project_dataset_table, project_id))
|
747
|
-
table.external_data_configuration = external_config
|
748
|
-
if labels:
|
749
|
-
table.labels = labels
|
750
|
-
|
751
|
-
if description:
|
752
|
-
table.description = description
|
753
|
-
|
754
|
-
if encryption_configuration:
|
755
|
-
table.encryption_configuration = EncryptionConfiguration.from_api_repr(encryption_configuration)
|
756
|
-
|
757
|
-
self.log.info("Creating external table: %s", external_project_dataset_table)
|
758
|
-
table_object = self.create_empty_table(
|
759
|
-
table_resource=table.to_api_repr(), project_id=project_id, location=location, exists_ok=True
|
760
|
-
)
|
761
|
-
self.log.info("External table created successfully: %s", external_project_dataset_table)
|
762
|
-
return table_object
|
763
|
-
|
764
579
|
@GoogleBaseHook.fallback_to_default_project_id
|
765
580
|
def update_table(
|
766
581
|
self,
|
@@ -804,113 +619,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
804
619
|
self.log.info("Table %s.%s.%s updated successfully", project_id, dataset_id, table_id)
|
805
620
|
return table_object.to_api_repr()
|
806
621
|
|
807
|
-
@deprecated(
|
808
|
-
planned_removal_date="November 01, 2024",
|
809
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_table",
|
810
|
-
category=AirflowProviderDeprecationWarning,
|
811
|
-
)
|
812
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
813
|
-
def patch_table(
|
814
|
-
self,
|
815
|
-
dataset_id: str,
|
816
|
-
table_id: str,
|
817
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
818
|
-
description: str | None = None,
|
819
|
-
expiration_time: int | None = None,
|
820
|
-
external_data_configuration: dict | None = None,
|
821
|
-
friendly_name: str | None = None,
|
822
|
-
labels: dict | None = None,
|
823
|
-
schema: list | None = None,
|
824
|
-
time_partitioning: dict | None = None,
|
825
|
-
view: dict | None = None,
|
826
|
-
require_partition_filter: bool | None = None,
|
827
|
-
encryption_configuration: dict | None = None,
|
828
|
-
) -> None:
|
829
|
-
"""
|
830
|
-
Patch information in an existing table.
|
831
|
-
|
832
|
-
It only updates fields that are provided in the request object. This
|
833
|
-
method is deprecated. Please use :func:`.update_table` instead.
|
834
|
-
|
835
|
-
Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch
|
836
|
-
|
837
|
-
:param dataset_id: The dataset containing the table to be patched.
|
838
|
-
:param table_id: The Name of the table to be patched.
|
839
|
-
:param project_id: The project containing the table to be patched.
|
840
|
-
:param description: [Optional] A user-friendly description of this table.
|
841
|
-
:param expiration_time: [Optional] The time when this table expires,
|
842
|
-
in milliseconds since the epoch.
|
843
|
-
:param external_data_configuration: [Optional] A dictionary containing
|
844
|
-
properties of a table stored outside of BigQuery.
|
845
|
-
:param friendly_name: [Optional] A descriptive name for this table.
|
846
|
-
:param labels: [Optional] A dictionary containing labels associated with this table.
|
847
|
-
:param schema: [Optional] If set, the schema field list as defined here:
|
848
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
849
|
-
The supported schema modifications and unsupported schema modification are listed here:
|
850
|
-
https://cloud.google.com/bigquery/docs/managing-table-schemas
|
851
|
-
|
852
|
-
.. code-block:: python
|
853
|
-
|
854
|
-
schema = [
|
855
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
856
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
857
|
-
]
|
858
|
-
|
859
|
-
:param time_partitioning: [Optional] A dictionary containing time-based partitioning
|
860
|
-
definition for the table.
|
861
|
-
:param view: [Optional] A dictionary containing definition for the view.
|
862
|
-
If set, it will patch a view instead of a table:
|
863
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
864
|
-
|
865
|
-
.. code-block:: python
|
866
|
-
|
867
|
-
view = {
|
868
|
-
"query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 500",
|
869
|
-
"useLegacySql": False,
|
870
|
-
}
|
871
|
-
|
872
|
-
:param require_partition_filter: [Optional] If true, queries over the this table require a
|
873
|
-
partition filter. If false, queries over the table
|
874
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
875
|
-
|
876
|
-
.. code-block:: python
|
877
|
-
|
878
|
-
encryption_configuration = {
|
879
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
880
|
-
}
|
881
|
-
|
882
|
-
"""
|
883
|
-
table_resource: dict[str, Any] = {}
|
884
|
-
|
885
|
-
if description is not None:
|
886
|
-
table_resource["description"] = description
|
887
|
-
if expiration_time is not None:
|
888
|
-
table_resource["expirationTime"] = expiration_time
|
889
|
-
if external_data_configuration:
|
890
|
-
table_resource["externalDataConfiguration"] = external_data_configuration
|
891
|
-
if friendly_name is not None:
|
892
|
-
table_resource["friendlyName"] = friendly_name
|
893
|
-
if labels:
|
894
|
-
table_resource["labels"] = labels
|
895
|
-
if schema:
|
896
|
-
table_resource["schema"] = {"fields": schema}
|
897
|
-
if time_partitioning:
|
898
|
-
table_resource["timePartitioning"] = time_partitioning
|
899
|
-
if view:
|
900
|
-
table_resource["view"] = view
|
901
|
-
if require_partition_filter is not None:
|
902
|
-
table_resource["requirePartitionFilter"] = require_partition_filter
|
903
|
-
if encryption_configuration:
|
904
|
-
table_resource["encryptionConfiguration"] = encryption_configuration
|
905
|
-
|
906
|
-
self.update_table(
|
907
|
-
table_resource=table_resource,
|
908
|
-
fields=list(table_resource.keys()),
|
909
|
-
project_id=project_id,
|
910
|
-
dataset_id=dataset_id,
|
911
|
-
table_id=table_id,
|
912
|
-
)
|
913
|
-
|
914
622
|
@GoogleBaseHook.fallback_to_default_project_id
|
915
623
|
def insert_all(
|
916
624
|
self,
|
@@ -1014,96 +722,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1014
722
|
self.log.info("Dataset successfully updated: %s", dataset)
|
1015
723
|
return dataset
|
1016
724
|
|
1017
|
-
@deprecated(
|
1018
|
-
planned_removal_date="November 01, 2024",
|
1019
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset",
|
1020
|
-
category=AirflowProviderDeprecationWarning,
|
1021
|
-
)
|
1022
|
-
def patch_dataset(
|
1023
|
-
self, dataset_id: str, dataset_resource: dict, project_id: str = PROVIDE_PROJECT_ID
|
1024
|
-
) -> dict:
|
1025
|
-
"""
|
1026
|
-
Patches information in an existing dataset.
|
1027
|
-
|
1028
|
-
It only replaces fields that are provided in the submitted dataset resource.
|
1029
|
-
|
1030
|
-
This method is deprecated. Please use :func:`.update_dataset` instead.
|
1031
|
-
|
1032
|
-
More info:
|
1033
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch
|
1034
|
-
|
1035
|
-
:param dataset_id: The BigQuery Dataset ID
|
1036
|
-
:param dataset_resource: Dataset resource that will be provided
|
1037
|
-
in request body.
|
1038
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
|
1039
|
-
:param project_id: The Google Cloud Project ID
|
1040
|
-
"""
|
1041
|
-
project_id = project_id or self.project_id
|
1042
|
-
if not dataset_id or not isinstance(dataset_id, str):
|
1043
|
-
raise ValueError(
|
1044
|
-
f"dataset_id argument must be provided and has a type 'str'. You provided: {dataset_id}"
|
1045
|
-
)
|
1046
|
-
|
1047
|
-
service = self.get_service()
|
1048
|
-
dataset_project_id = project_id or self.project_id
|
1049
|
-
|
1050
|
-
self.log.info("Start patching dataset: %s:%s", dataset_project_id, dataset_id)
|
1051
|
-
dataset = (
|
1052
|
-
service.datasets()
|
1053
|
-
.patch(
|
1054
|
-
datasetId=dataset_id,
|
1055
|
-
projectId=dataset_project_id,
|
1056
|
-
body=dataset_resource,
|
1057
|
-
)
|
1058
|
-
.execute(num_retries=self.num_retries)
|
1059
|
-
)
|
1060
|
-
self.log.info("Dataset successfully patched: %s", dataset)
|
1061
|
-
|
1062
|
-
return dataset
|
1063
|
-
|
1064
|
-
@deprecated(
|
1065
|
-
planned_removal_date="November 01, 2024",
|
1066
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables",
|
1067
|
-
category=AirflowProviderDeprecationWarning,
|
1068
|
-
)
|
1069
|
-
def get_dataset_tables_list(
|
1070
|
-
self,
|
1071
|
-
dataset_id: str,
|
1072
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
1073
|
-
table_prefix: str | None = None,
|
1074
|
-
max_results: int | None = None,
|
1075
|
-
) -> list[dict[str, Any]]:
|
1076
|
-
"""
|
1077
|
-
List tables of a BigQuery dataset.
|
1078
|
-
|
1079
|
-
If a table prefix is specified, only tables beginning by it are
|
1080
|
-
returned. This method is deprecated. Please use
|
1081
|
-
:func:`.get_dataset_tables` instead.
|
1082
|
-
|
1083
|
-
For more information, see:
|
1084
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
|
1085
|
-
|
1086
|
-
:param dataset_id: The BigQuery Dataset ID
|
1087
|
-
:param project_id: The Google Cloud Project ID
|
1088
|
-
:param table_prefix: Tables must begin by this prefix to be returned (case sensitive)
|
1089
|
-
:param max_results: The maximum number of results to return in a single response page.
|
1090
|
-
Leverage the page tokens to iterate through the entire collection.
|
1091
|
-
:return: List of tables associated with the dataset
|
1092
|
-
"""
|
1093
|
-
project_id = project_id or self.project_id
|
1094
|
-
tables = self.get_client().list_tables(
|
1095
|
-
dataset=DatasetReference(project=project_id, dataset_id=dataset_id),
|
1096
|
-
max_results=max_results,
|
1097
|
-
)
|
1098
|
-
|
1099
|
-
if table_prefix:
|
1100
|
-
result = [t.reference.to_api_repr() for t in tables if t.table_id.startswith(table_prefix)]
|
1101
|
-
else:
|
1102
|
-
result = [t.reference.to_api_repr() for t in tables]
|
1103
|
-
|
1104
|
-
self.log.info("%s tables found", len(result))
|
1105
|
-
return result
|
1106
|
-
|
1107
725
|
@GoogleBaseHook.fallback_to_default_project_id
|
1108
726
|
def get_datasets_list(
|
1109
727
|
self,
|
@@ -1266,29 +884,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1266
884
|
).to_api_repr()
|
1267
885
|
return table
|
1268
886
|
|
1269
|
-
@deprecated(
|
1270
|
-
planned_removal_date="November 01, 2024",
|
1271
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_table",
|
1272
|
-
category=AirflowProviderDeprecationWarning,
|
1273
|
-
)
|
1274
|
-
def run_table_delete(self, deletion_dataset_table: str, ignore_if_missing: bool = False) -> None:
|
1275
|
-
"""
|
1276
|
-
Delete an existing table from the dataset.
|
1277
|
-
|
1278
|
-
If the table does not exist, return an error unless *ignore_if_missing*
|
1279
|
-
is set to True.
|
1280
|
-
|
1281
|
-
This method is deprecated. Please use :func:`.delete_table` instead.
|
1282
|
-
|
1283
|
-
:param deletion_dataset_table: A dotted
|
1284
|
-
``(<project>.|<project>:)<dataset>.<table>`` that indicates which table
|
1285
|
-
will be deleted.
|
1286
|
-
:param ignore_if_missing: if True, then return success even if the
|
1287
|
-
requested table does not exist.
|
1288
|
-
:return:
|
1289
|
-
"""
|
1290
|
-
return self.delete_table(table_id=deletion_dataset_table, not_found_ok=ignore_if_missing)
|
1291
|
-
|
1292
887
|
@GoogleBaseHook.fallback_to_default_project_id
|
1293
888
|
def delete_table(
|
1294
889
|
self,
|
@@ -1314,47 +909,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1314
909
|
)
|
1315
910
|
self.log.info("Deleted table %s", table_id)
|
1316
911
|
|
1317
|
-
@deprecated(
|
1318
|
-
planned_removal_date="November 01, 2024",
|
1319
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.list_rows",
|
1320
|
-
category=AirflowProviderDeprecationWarning,
|
1321
|
-
)
|
1322
|
-
def get_tabledata(
|
1323
|
-
self,
|
1324
|
-
dataset_id: str,
|
1325
|
-
table_id: str,
|
1326
|
-
max_results: int | None = None,
|
1327
|
-
selected_fields: str | None = None,
|
1328
|
-
page_token: str | None = None,
|
1329
|
-
start_index: int | None = None,
|
1330
|
-
) -> list[dict]:
|
1331
|
-
"""
|
1332
|
-
Get data from given table.
|
1333
|
-
|
1334
|
-
This method is deprecated. Please use :func:`.list_rows` instead.
|
1335
|
-
|
1336
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list
|
1337
|
-
|
1338
|
-
:param dataset_id: the dataset ID of the requested table.
|
1339
|
-
:param table_id: the table ID of the requested table.
|
1340
|
-
:param max_results: the maximum results to return.
|
1341
|
-
:param selected_fields: List of fields to return (comma-separated). If
|
1342
|
-
unspecified, all fields are returned.
|
1343
|
-
:param page_token: page token, returned from a previous call,
|
1344
|
-
identifying the result set.
|
1345
|
-
:param start_index: zero based index of the starting row to read.
|
1346
|
-
:return: list of rows
|
1347
|
-
"""
|
1348
|
-
rows = self.list_rows(
|
1349
|
-
dataset_id=dataset_id,
|
1350
|
-
table_id=table_id,
|
1351
|
-
max_results=max_results,
|
1352
|
-
selected_fields=selected_fields,
|
1353
|
-
page_token=page_token,
|
1354
|
-
start_index=start_index,
|
1355
|
-
)
|
1356
|
-
return [dict(r) for r in rows]
|
1357
|
-
|
1358
912
|
@GoogleBaseHook.fallback_to_default_project_id
|
1359
913
|
def list_rows(
|
1360
914
|
self,
|
@@ -1551,18 +1105,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1551
1105
|
job = self.get_client(project_id=project_id, location=location).get_job(job_id=job_id)
|
1552
1106
|
return job.done(retry=retry)
|
1553
1107
|
|
1554
|
-
@deprecated(
|
1555
|
-
planned_removal_date="November 01, 2024",
|
1556
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_job",
|
1557
|
-
category=AirflowProviderDeprecationWarning,
|
1558
|
-
)
|
1559
|
-
def cancel_query(self) -> None:
|
1560
|
-
"""Cancel all started queries that have not yet completed."""
|
1561
|
-
if self.running_job_id:
|
1562
|
-
self.cancel_job(job_id=self.running_job_id)
|
1563
|
-
else:
|
1564
|
-
self.log.info("No running BigQuery jobs to cancel.")
|
1565
|
-
|
1566
1108
|
@GoogleBaseHook.fallback_to_default_project_id
|
1567
1109
|
def cancel_job(
|
1568
1110
|
self,
|
@@ -1705,701 +1247,51 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1705
1247
|
job_api_repr.result(timeout=timeout, retry=retry)
|
1706
1248
|
return job_api_repr
|
1707
1249
|
|
1708
|
-
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
1713
|
-
def run_with_configuration(self, configuration: dict) -> str:
|
1714
|
-
"""
|
1715
|
-
Execute a BigQuery SQL query.
|
1716
|
-
|
1717
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1718
|
-
|
1719
|
-
This method is deprecated. Please use :func:`.insert_job` instead.
|
1720
|
-
|
1721
|
-
:param configuration: The configuration parameter maps directly to
|
1722
|
-
BigQuery's configuration field in the job object. See
|
1723
|
-
https://cloud.google.com/bigquery/docs/reference/v2/jobs for
|
1724
|
-
details.
|
1725
|
-
"""
|
1726
|
-
job = self.insert_job(configuration=configuration, project_id=self.project_id)
|
1727
|
-
self.running_job_id = job.job_id
|
1728
|
-
return job.job_id
|
1729
|
-
|
1730
|
-
@deprecated(
|
1731
|
-
planned_removal_date="November 01, 2024",
|
1732
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
|
1733
|
-
category=AirflowProviderDeprecationWarning,
|
1734
|
-
)
|
1735
|
-
def run_load(
|
1736
|
-
self,
|
1737
|
-
destination_project_dataset_table: str,
|
1738
|
-
source_uris: list,
|
1739
|
-
schema_fields: list | None = None,
|
1740
|
-
source_format: str = "CSV",
|
1741
|
-
create_disposition: str = "CREATE_IF_NEEDED",
|
1742
|
-
skip_leading_rows: int = 0,
|
1743
|
-
write_disposition: str = "WRITE_EMPTY",
|
1744
|
-
field_delimiter: str = ",",
|
1745
|
-
max_bad_records: int = 0,
|
1746
|
-
quote_character: str | None = None,
|
1747
|
-
ignore_unknown_values: bool = False,
|
1748
|
-
allow_quoted_newlines: bool = False,
|
1749
|
-
allow_jagged_rows: bool = False,
|
1750
|
-
encoding: str = "UTF-8",
|
1751
|
-
schema_update_options: Iterable | None = None,
|
1752
|
-
src_fmt_configs: dict | None = None,
|
1753
|
-
time_partitioning: dict | None = None,
|
1754
|
-
cluster_fields: list | None = None,
|
1755
|
-
autodetect: bool = False,
|
1756
|
-
encryption_configuration: dict | None = None,
|
1757
|
-
labels: dict | None = None,
|
1758
|
-
description: str | None = None,
|
1759
|
-
) -> str:
|
1760
|
-
"""
|
1761
|
-
Load data from Google Cloud Storage to BigQuery.
|
1762
|
-
|
1763
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1764
|
-
|
1765
|
-
This method is deprecated. Please use :func:`.insert_job` instead.
|
1766
|
-
|
1767
|
-
:param destination_project_dataset_table:
|
1768
|
-
The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
|
1769
|
-
table to load data into. If ``<project>`` is not included, project will be the
|
1770
|
-
project defined in the connection json. If a partition is specified the
|
1771
|
-
operator will automatically append the data, create a new partition or create
|
1772
|
-
a new DAY partitioned table.
|
1773
|
-
:param schema_fields: The schema field list as defined here:
|
1774
|
-
https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load
|
1775
|
-
Required if autodetect=False; optional if autodetect=True.
|
1776
|
-
:param autodetect: Attempt to autodetect the schema for CSV and JSON
|
1777
|
-
source files.
|
1778
|
-
:param source_uris: The source Google Cloud
|
1779
|
-
Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild
|
1780
|
-
per-object name can be used.
|
1781
|
-
:param source_format: File format to export.
|
1782
|
-
:param create_disposition: The create disposition if the table doesn't exist.
|
1783
|
-
:param skip_leading_rows: Number of rows to skip when loading from a CSV.
|
1784
|
-
:param write_disposition: The write disposition if the table already exists.
|
1785
|
-
:param field_delimiter: The delimiter to use when loading from a CSV.
|
1786
|
-
:param max_bad_records: The maximum number of bad records that BigQuery can
|
1787
|
-
ignore when running the job.
|
1788
|
-
:param quote_character: The value that is used to quote data sections in a CSV
|
1789
|
-
file.
|
1790
|
-
:param ignore_unknown_values: [Optional] Indicates if BigQuery should allow
|
1791
|
-
extra values that are not represented in the table schema.
|
1792
|
-
If true, the extra values are ignored. If false, records with extra columns
|
1793
|
-
are treated as bad records, and if there are too many bad records, an
|
1794
|
-
invalid error is returned in the job result.
|
1795
|
-
:param allow_quoted_newlines: Whether to allow quoted newlines (true) or not
|
1796
|
-
(false).
|
1797
|
-
:param allow_jagged_rows: Accept rows that are missing trailing optional columns.
|
1798
|
-
The missing values are treated as nulls. If false, records with missing
|
1799
|
-
trailing columns are treated as bad records, and if there are too many bad
|
1800
|
-
records, an invalid error is returned in the job result. Only applicable when
|
1801
|
-
source_format is CSV.
|
1802
|
-
:param encoding: The character encoding of the data.
|
1250
|
+
def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, force_rerun=False) -> str:
|
1251
|
+
if force_rerun:
|
1252
|
+
hash_base = str(uuid.uuid4())
|
1253
|
+
else:
|
1254
|
+
hash_base = json.dumps(configuration, sort_keys=True)
|
1803
1255
|
|
1804
|
-
|
1805
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
|
1806
|
-
:param schema_update_options: Allows the schema of the destination
|
1807
|
-
table to be updated as a side effect of the load job.
|
1808
|
-
:param src_fmt_configs: configure optional fields specific to the source format
|
1809
|
-
:param time_partitioning: configure optional time partitioning fields i.e.
|
1810
|
-
partition by field, type and expiration as per API specifications.
|
1811
|
-
:param cluster_fields: Request that the result of this load be stored sorted
|
1812
|
-
by one or more columns. BigQuery supports clustering for both partitioned and
|
1813
|
-
non-partitioned tables. The order of columns given determines the sort order.
|
1814
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1256
|
+
uniqueness_suffix = md5(hash_base.encode()).hexdigest()
|
1815
1257
|
|
1816
|
-
|
1258
|
+
if job_id:
|
1259
|
+
return f"{job_id}_{uniqueness_suffix}"
|
1817
1260
|
|
1818
|
-
|
1819
|
-
|
1820
|
-
|
1261
|
+
exec_date = logical_date.isoformat()
|
1262
|
+
job_id = f"airflow_{dag_id}_{task_id}_{exec_date}_{uniqueness_suffix}"
|
1263
|
+
return re.sub(r"[:\-+.]", "_", job_id)
|
1821
1264
|
|
1822
|
-
|
1823
|
-
|
1824
|
-
|
1825
|
-
if not
|
1826
|
-
raise ValueError("
|
1265
|
+
def split_tablename(
|
1266
|
+
self, table_input: str, default_project_id: str, var_name: str | None = None
|
1267
|
+
) -> tuple[str, str, str]:
|
1268
|
+
if "." not in table_input:
|
1269
|
+
raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
|
1827
1270
|
|
1828
|
-
|
1829
|
-
|
1271
|
+
if not default_project_id:
|
1272
|
+
raise ValueError("INTERNAL: No default project is specified")
|
1830
1273
|
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
|
1836
|
-
|
1837
|
-
if schema_fields is None and not autodetect:
|
1838
|
-
raise ValueError("You must either pass a schema or autodetect=True.")
|
1839
|
-
|
1840
|
-
if src_fmt_configs is None:
|
1841
|
-
src_fmt_configs = {}
|
1842
|
-
|
1843
|
-
source_format = source_format.upper()
|
1844
|
-
allowed_formats = [
|
1845
|
-
"CSV",
|
1846
|
-
"NEWLINE_DELIMITED_JSON",
|
1847
|
-
"AVRO",
|
1848
|
-
"GOOGLE_SHEETS",
|
1849
|
-
"DATASTORE_BACKUP",
|
1850
|
-
"PARQUET",
|
1851
|
-
]
|
1852
|
-
if source_format not in allowed_formats:
|
1853
|
-
raise ValueError(
|
1854
|
-
f"{source_format} is not a valid source format. "
|
1855
|
-
f"Please use one of the following types: {allowed_formats}."
|
1856
|
-
)
|
1274
|
+
def var_print(var_name):
|
1275
|
+
if var_name is None:
|
1276
|
+
return ""
|
1277
|
+
else:
|
1278
|
+
return f"Format exception for {var_name}: "
|
1857
1279
|
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
if
|
1280
|
+
if table_input.count(".") + table_input.count(":") > 3:
|
1281
|
+
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
|
1282
|
+
cmpt = table_input.rsplit(":", 1)
|
1283
|
+
project_id = None
|
1284
|
+
rest = table_input
|
1285
|
+
if len(cmpt) == 1:
|
1286
|
+
project_id = None
|
1287
|
+
rest = cmpt[0]
|
1288
|
+
elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
|
1289
|
+
if cmpt[-1].count(".") != 2:
|
1290
|
+
project_id = cmpt[0]
|
1291
|
+
rest = cmpt[1]
|
1292
|
+
else:
|
1864
1293
|
raise ValueError(
|
1865
|
-
f"{
|
1866
|
-
f"Please only use one or more of the following options: {allowed_schema_update_options}"
|
1867
|
-
)
|
1868
|
-
|
1869
|
-
destination_project, destination_dataset, destination_table = self.split_tablename(
|
1870
|
-
table_input=destination_project_dataset_table,
|
1871
|
-
default_project_id=self.project_id,
|
1872
|
-
var_name="destination_project_dataset_table",
|
1873
|
-
)
|
1874
|
-
|
1875
|
-
configuration: dict[str, Any] = {
|
1876
|
-
"load": {
|
1877
|
-
"autodetect": autodetect,
|
1878
|
-
"createDisposition": create_disposition,
|
1879
|
-
"destinationTable": {
|
1880
|
-
"projectId": destination_project,
|
1881
|
-
"datasetId": destination_dataset,
|
1882
|
-
"tableId": destination_table,
|
1883
|
-
},
|
1884
|
-
"sourceFormat": source_format,
|
1885
|
-
"sourceUris": source_uris,
|
1886
|
-
"writeDisposition": write_disposition,
|
1887
|
-
"ignoreUnknownValues": ignore_unknown_values,
|
1888
|
-
}
|
1889
|
-
}
|
1890
|
-
|
1891
|
-
time_partitioning = _cleanse_time_partitioning(destination_project_dataset_table, time_partitioning)
|
1892
|
-
if time_partitioning:
|
1893
|
-
configuration["load"].update({"timePartitioning": time_partitioning})
|
1894
|
-
|
1895
|
-
if cluster_fields:
|
1896
|
-
configuration["load"].update({"clustering": {"fields": cluster_fields}})
|
1897
|
-
|
1898
|
-
if schema_fields:
|
1899
|
-
configuration["load"]["schema"] = {"fields": schema_fields}
|
1900
|
-
|
1901
|
-
if schema_update_options:
|
1902
|
-
if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
|
1903
|
-
raise ValueError(
|
1904
|
-
"schema_update_options is only "
|
1905
|
-
"allowed if write_disposition is "
|
1906
|
-
"'WRITE_APPEND' or 'WRITE_TRUNCATE'."
|
1907
|
-
)
|
1908
|
-
else:
|
1909
|
-
self.log.info("Adding experimental 'schemaUpdateOptions': %s", schema_update_options)
|
1910
|
-
configuration["load"]["schemaUpdateOptions"] = schema_update_options
|
1911
|
-
|
1912
|
-
if max_bad_records:
|
1913
|
-
configuration["load"]["maxBadRecords"] = max_bad_records
|
1914
|
-
|
1915
|
-
if encryption_configuration:
|
1916
|
-
configuration["load"]["destinationEncryptionConfiguration"] = encryption_configuration
|
1917
|
-
|
1918
|
-
if labels or description:
|
1919
|
-
configuration["load"].update({"destinationTableProperties": {}})
|
1920
|
-
|
1921
|
-
if labels:
|
1922
|
-
configuration["load"]["destinationTableProperties"]["labels"] = labels
|
1923
|
-
|
1924
|
-
if description:
|
1925
|
-
configuration["load"]["destinationTableProperties"]["description"] = description
|
1926
|
-
|
1927
|
-
src_fmt_to_configs_mapping = {
|
1928
|
-
"CSV": [
|
1929
|
-
"allowJaggedRows",
|
1930
|
-
"allowQuotedNewlines",
|
1931
|
-
"autodetect",
|
1932
|
-
"fieldDelimiter",
|
1933
|
-
"skipLeadingRows",
|
1934
|
-
"ignoreUnknownValues",
|
1935
|
-
"nullMarker",
|
1936
|
-
"quote",
|
1937
|
-
"encoding",
|
1938
|
-
"preserveAsciiControlCharacters",
|
1939
|
-
],
|
1940
|
-
"DATASTORE_BACKUP": ["projectionFields"],
|
1941
|
-
"NEWLINE_DELIMITED_JSON": ["autodetect", "ignoreUnknownValues"],
|
1942
|
-
"PARQUET": ["autodetect", "ignoreUnknownValues"],
|
1943
|
-
"AVRO": ["useAvroLogicalTypes"],
|
1944
|
-
}
|
1945
|
-
|
1946
|
-
valid_configs = src_fmt_to_configs_mapping[source_format]
|
1947
|
-
|
1948
|
-
# if following fields are not specified in src_fmt_configs,
|
1949
|
-
# honor the top-level params for backward-compatibility
|
1950
|
-
backward_compatibility_configs = {
|
1951
|
-
"skipLeadingRows": skip_leading_rows,
|
1952
|
-
"fieldDelimiter": field_delimiter,
|
1953
|
-
"ignoreUnknownValues": ignore_unknown_values,
|
1954
|
-
"quote": quote_character,
|
1955
|
-
"allowQuotedNewlines": allow_quoted_newlines,
|
1956
|
-
"encoding": encoding,
|
1957
|
-
}
|
1958
|
-
|
1959
|
-
src_fmt_configs = _validate_src_fmt_configs(
|
1960
|
-
source_format, src_fmt_configs, valid_configs, backward_compatibility_configs
|
1961
|
-
)
|
1962
|
-
|
1963
|
-
configuration["load"].update(src_fmt_configs)
|
1964
|
-
|
1965
|
-
if allow_jagged_rows:
|
1966
|
-
configuration["load"]["allowJaggedRows"] = allow_jagged_rows
|
1967
|
-
|
1968
|
-
job = self.insert_job(configuration=configuration, project_id=self.project_id)
|
1969
|
-
self.running_job_id = job.job_id
|
1970
|
-
return job.job_id
|
1971
|
-
|
1972
|
-
@deprecated(
|
1973
|
-
planned_removal_date="November 01, 2024",
|
1974
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
|
1975
|
-
category=AirflowProviderDeprecationWarning,
|
1976
|
-
)
|
1977
|
-
def run_copy(
|
1978
|
-
self,
|
1979
|
-
source_project_dataset_tables: list | str,
|
1980
|
-
destination_project_dataset_table: str,
|
1981
|
-
write_disposition: str = "WRITE_EMPTY",
|
1982
|
-
create_disposition: str = "CREATE_IF_NEEDED",
|
1983
|
-
labels: dict | None = None,
|
1984
|
-
encryption_configuration: dict | None = None,
|
1985
|
-
) -> str:
|
1986
|
-
"""
|
1987
|
-
Copy data from one BigQuery table to another.
|
1988
|
-
|
1989
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy
|
1990
|
-
|
1991
|
-
This method is deprecated. Please use :func:`.insert_job` instead.
|
1992
|
-
|
1993
|
-
:param source_project_dataset_tables: One or more dotted
|
1994
|
-
``(project:|project.)<dataset>.<table>``
|
1995
|
-
BigQuery tables to use as the source data. Use a list if there are
|
1996
|
-
multiple source tables.
|
1997
|
-
If ``<project>`` is not included, project will be the project defined
|
1998
|
-
in the connection json.
|
1999
|
-
:param destination_project_dataset_table: The destination BigQuery
|
2000
|
-
table. Format is: ``(project:|project.)<dataset>.<table>``
|
2001
|
-
:param write_disposition: The write disposition if the table already exists.
|
2002
|
-
:param create_disposition: The create disposition if the table doesn't exist.
|
2003
|
-
:param labels: a dictionary containing labels for the job/query,
|
2004
|
-
passed to BigQuery
|
2005
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
2006
|
-
|
2007
|
-
.. code-block:: python
|
2008
|
-
|
2009
|
-
encryption_configuration = {
|
2010
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
2011
|
-
}
|
2012
|
-
"""
|
2013
|
-
if not self.project_id:
|
2014
|
-
raise ValueError("The project_id should be set")
|
2015
|
-
|
2016
|
-
source_project_dataset_tables = (
|
2017
|
-
[source_project_dataset_tables]
|
2018
|
-
if not isinstance(source_project_dataset_tables, list)
|
2019
|
-
else source_project_dataset_tables
|
2020
|
-
)
|
2021
|
-
|
2022
|
-
source_project_dataset_tables_fixup = []
|
2023
|
-
for source_project_dataset_table in source_project_dataset_tables:
|
2024
|
-
source_project, source_dataset, source_table = self.split_tablename(
|
2025
|
-
table_input=source_project_dataset_table,
|
2026
|
-
default_project_id=self.project_id,
|
2027
|
-
var_name="source_project_dataset_table",
|
2028
|
-
)
|
2029
|
-
source_project_dataset_tables_fixup.append(
|
2030
|
-
{"projectId": source_project, "datasetId": source_dataset, "tableId": source_table}
|
2031
|
-
)
|
2032
|
-
|
2033
|
-
destination_project, destination_dataset, destination_table = self.split_tablename(
|
2034
|
-
table_input=destination_project_dataset_table, default_project_id=self.project_id
|
2035
|
-
)
|
2036
|
-
configuration = {
|
2037
|
-
"copy": {
|
2038
|
-
"createDisposition": create_disposition,
|
2039
|
-
"writeDisposition": write_disposition,
|
2040
|
-
"sourceTables": source_project_dataset_tables_fixup,
|
2041
|
-
"destinationTable": {
|
2042
|
-
"projectId": destination_project,
|
2043
|
-
"datasetId": destination_dataset,
|
2044
|
-
"tableId": destination_table,
|
2045
|
-
},
|
2046
|
-
}
|
2047
|
-
}
|
2048
|
-
|
2049
|
-
if labels:
|
2050
|
-
configuration["labels"] = labels
|
2051
|
-
|
2052
|
-
if encryption_configuration:
|
2053
|
-
configuration["copy"]["destinationEncryptionConfiguration"] = encryption_configuration
|
2054
|
-
|
2055
|
-
job = self.insert_job(configuration=configuration, project_id=self.project_id)
|
2056
|
-
self.running_job_id = job.job_id
|
2057
|
-
return job.job_id
|
2058
|
-
|
2059
|
-
@deprecated(
|
2060
|
-
planned_removal_date="November 01, 2024",
|
2061
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
|
2062
|
-
category=AirflowProviderDeprecationWarning,
|
2063
|
-
)
|
2064
|
-
def run_extract(
|
2065
|
-
self,
|
2066
|
-
source_project_dataset_table: str,
|
2067
|
-
destination_cloud_storage_uris: list[str],
|
2068
|
-
compression: str = "NONE",
|
2069
|
-
export_format: str = "CSV",
|
2070
|
-
field_delimiter: str = ",",
|
2071
|
-
print_header: bool = True,
|
2072
|
-
labels: dict | None = None,
|
2073
|
-
return_full_job: bool = False,
|
2074
|
-
) -> str | BigQueryJob:
|
2075
|
-
"""
|
2076
|
-
Copy data from BigQuery to Google Cloud Storage.
|
2077
|
-
|
2078
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
2079
|
-
|
2080
|
-
This method is deprecated. Please use :func:`.insert_job` instead.
|
2081
|
-
|
2082
|
-
:param source_project_dataset_table: The dotted ``<dataset>.<table>``
|
2083
|
-
BigQuery table to use as the source data.
|
2084
|
-
:param destination_cloud_storage_uris: The destination Google Cloud
|
2085
|
-
Storage URI (e.g. gs://some-bucket/some-file.txt). Follows
|
2086
|
-
convention defined here:
|
2087
|
-
https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple
|
2088
|
-
:param compression: Type of compression to use.
|
2089
|
-
:param export_format: File format to export.
|
2090
|
-
:param field_delimiter: The delimiter to use when extracting to a CSV.
|
2091
|
-
:param print_header: Whether to print a header for a CSV file extract.
|
2092
|
-
:param labels: a dictionary containing labels for the job/query,
|
2093
|
-
passed to BigQuery
|
2094
|
-
:param return_full_job: return full job instead of job id only
|
2095
|
-
"""
|
2096
|
-
if not self.project_id:
|
2097
|
-
raise ValueError("The project_id should be set")
|
2098
|
-
|
2099
|
-
source_project, source_dataset, source_table = self.split_tablename(
|
2100
|
-
table_input=source_project_dataset_table,
|
2101
|
-
default_project_id=self.project_id,
|
2102
|
-
var_name="source_project_dataset_table",
|
2103
|
-
)
|
2104
|
-
|
2105
|
-
configuration: dict[str, Any] = {
|
2106
|
-
"extract": {
|
2107
|
-
"sourceTable": {
|
2108
|
-
"projectId": source_project,
|
2109
|
-
"datasetId": source_dataset,
|
2110
|
-
"tableId": source_table,
|
2111
|
-
},
|
2112
|
-
"compression": compression,
|
2113
|
-
"destinationUris": destination_cloud_storage_uris,
|
2114
|
-
"destinationFormat": export_format,
|
2115
|
-
}
|
2116
|
-
}
|
2117
|
-
|
2118
|
-
if labels:
|
2119
|
-
configuration["labels"] = labels
|
2120
|
-
|
2121
|
-
if export_format == "CSV":
|
2122
|
-
# Only set fieldDelimiter and printHeader fields if using CSV.
|
2123
|
-
# Google does not like it if you set these fields for other export
|
2124
|
-
# formats.
|
2125
|
-
configuration["extract"]["fieldDelimiter"] = field_delimiter
|
2126
|
-
configuration["extract"]["printHeader"] = print_header
|
2127
|
-
|
2128
|
-
job = self.insert_job(configuration=configuration, project_id=self.project_id)
|
2129
|
-
self.running_job_id = job.job_id
|
2130
|
-
if return_full_job:
|
2131
|
-
return job
|
2132
|
-
return job.job_id
|
2133
|
-
|
2134
|
-
@deprecated(
|
2135
|
-
planned_removal_date="November 01, 2024",
|
2136
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job",
|
2137
|
-
category=AirflowProviderDeprecationWarning,
|
2138
|
-
)
|
2139
|
-
def run_query(
|
2140
|
-
self,
|
2141
|
-
sql: str,
|
2142
|
-
destination_dataset_table: str | None = None,
|
2143
|
-
write_disposition: str = "WRITE_EMPTY",
|
2144
|
-
allow_large_results: bool = False,
|
2145
|
-
flatten_results: bool | None = None,
|
2146
|
-
udf_config: list | None = None,
|
2147
|
-
use_legacy_sql: bool | None = None,
|
2148
|
-
maximum_billing_tier: int | None = None,
|
2149
|
-
maximum_bytes_billed: float | None = None,
|
2150
|
-
create_disposition: str = "CREATE_IF_NEEDED",
|
2151
|
-
query_params: list | None = None,
|
2152
|
-
labels: dict | None = None,
|
2153
|
-
schema_update_options: Iterable | None = None,
|
2154
|
-
priority: str | None = None,
|
2155
|
-
time_partitioning: dict | None = None,
|
2156
|
-
api_resource_configs: dict | None = None,
|
2157
|
-
cluster_fields: list[str] | None = None,
|
2158
|
-
location: str | None = None,
|
2159
|
-
encryption_configuration: dict | None = None,
|
2160
|
-
) -> str:
|
2161
|
-
"""
|
2162
|
-
Execute a BigQuery SQL query.
|
2163
|
-
|
2164
|
-
Optionally persists results in a BigQuery table.
|
2165
|
-
|
2166
|
-
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
2167
|
-
|
2168
|
-
This method is deprecated. Please use :func:`.insert_job` instead.
|
2169
|
-
|
2170
|
-
For more details about these parameters.
|
2171
|
-
|
2172
|
-
:param sql: The BigQuery SQL to execute.
|
2173
|
-
:param destination_dataset_table: The dotted ``<dataset>.<table>``
|
2174
|
-
BigQuery table to save the query results.
|
2175
|
-
:param write_disposition: What to do if the table already exists in
|
2176
|
-
BigQuery.
|
2177
|
-
:param allow_large_results: Whether to allow large results.
|
2178
|
-
:param flatten_results: If true and query uses legacy SQL dialect, flattens
|
2179
|
-
all nested and repeated fields in the query results. ``allowLargeResults``
|
2180
|
-
must be true if this is set to false. For standard SQL queries, this
|
2181
|
-
flag is ignored and results are never flattened.
|
2182
|
-
:param udf_config: The User Defined Function configuration for the query.
|
2183
|
-
See https://cloud.google.com/bigquery/user-defined-functions for details.
|
2184
|
-
:param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false).
|
2185
|
-
If `None`, defaults to `self.use_legacy_sql`.
|
2186
|
-
:param api_resource_configs: a dictionary that contain params
|
2187
|
-
'configuration' applied for Google BigQuery Jobs API:
|
2188
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs
|
2189
|
-
for example, {'query': {'useQueryCache': False}}. You could use it
|
2190
|
-
if you need to provide some params that are not supported by the
|
2191
|
-
BigQueryHook like args.
|
2192
|
-
:param maximum_billing_tier: Positive integer that serves as a
|
2193
|
-
multiplier of the basic price.
|
2194
|
-
:param maximum_bytes_billed: Limits the bytes billed for this job.
|
2195
|
-
Queries that will have bytes billed beyond this limit will fail
|
2196
|
-
(without incurring a charge). If unspecified, this will be
|
2197
|
-
set to your project default.
|
2198
|
-
:param create_disposition: Specifies whether the job is allowed to
|
2199
|
-
create new tables.
|
2200
|
-
:param query_params: a list of dictionary containing query parameter types and
|
2201
|
-
values, passed to BigQuery
|
2202
|
-
:param labels: a dictionary containing labels for the job/query,
|
2203
|
-
passed to BigQuery
|
2204
|
-
:param schema_update_options: Allows the schema of the destination
|
2205
|
-
table to be updated as a side effect of the query job.
|
2206
|
-
:param priority: Specifies a priority for the query.
|
2207
|
-
Possible values include INTERACTIVE and BATCH.
|
2208
|
-
If `None`, defaults to `self.priority`.
|
2209
|
-
:param time_partitioning: configure optional time partitioning fields i.e.
|
2210
|
-
partition by field, type and expiration as per API specifications.
|
2211
|
-
:param cluster_fields: Request that the result of this query be stored sorted
|
2212
|
-
by one or more columns. BigQuery supports clustering for both partitioned and
|
2213
|
-
non-partitioned tables. The order of columns given determines the sort order.
|
2214
|
-
:param location: The geographic location of the job. Required except for
|
2215
|
-
US and EU. See details at
|
2216
|
-
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
2217
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
2218
|
-
|
2219
|
-
.. code-block:: python
|
2220
|
-
|
2221
|
-
encryption_configuration = {
|
2222
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
2223
|
-
}
|
2224
|
-
"""
|
2225
|
-
if not self.project_id:
|
2226
|
-
raise ValueError("The project_id should be set")
|
2227
|
-
|
2228
|
-
labels = labels or self.labels
|
2229
|
-
schema_update_options = list(schema_update_options or [])
|
2230
|
-
|
2231
|
-
priority = priority or self.priority
|
2232
|
-
|
2233
|
-
if time_partitioning is None:
|
2234
|
-
time_partitioning = {}
|
2235
|
-
|
2236
|
-
if not api_resource_configs:
|
2237
|
-
api_resource_configs = self.api_resource_configs
|
2238
|
-
else:
|
2239
|
-
_validate_value("api_resource_configs", api_resource_configs, dict)
|
2240
|
-
configuration = deepcopy(api_resource_configs)
|
2241
|
-
if "query" not in configuration:
|
2242
|
-
configuration["query"] = {}
|
2243
|
-
|
2244
|
-
else:
|
2245
|
-
_validate_value("api_resource_configs['query']", configuration["query"], dict)
|
2246
|
-
|
2247
|
-
if sql is None and not configuration["query"].get("query", None):
|
2248
|
-
raise TypeError("`BigQueryBaseCursor.run_query` missing 1 required positional argument: `sql`")
|
2249
|
-
|
2250
|
-
# BigQuery also allows you to define how you want a table's schema to change
|
2251
|
-
# as a side effect of a query job
|
2252
|
-
# for more details:
|
2253
|
-
# https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.schemaUpdateOptions
|
2254
|
-
|
2255
|
-
allowed_schema_update_options = ["ALLOW_FIELD_ADDITION", "ALLOW_FIELD_RELAXATION"]
|
2256
|
-
|
2257
|
-
if not set(allowed_schema_update_options).issuperset(set(schema_update_options)):
|
2258
|
-
raise ValueError(
|
2259
|
-
f"{schema_update_options} contains invalid schema update options."
|
2260
|
-
f" Please only use one or more of the following options: {allowed_schema_update_options}"
|
2261
|
-
)
|
2262
|
-
|
2263
|
-
if schema_update_options:
|
2264
|
-
if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
|
2265
|
-
raise ValueError(
|
2266
|
-
"schema_update_options is only "
|
2267
|
-
"allowed if write_disposition is "
|
2268
|
-
"'WRITE_APPEND' or 'WRITE_TRUNCATE'."
|
2269
|
-
)
|
2270
|
-
|
2271
|
-
if destination_dataset_table:
|
2272
|
-
destination_project, destination_dataset, destination_table = self.split_tablename(
|
2273
|
-
table_input=destination_dataset_table, default_project_id=self.project_id
|
2274
|
-
)
|
2275
|
-
|
2276
|
-
destination_dataset_table = { # type: ignore
|
2277
|
-
"projectId": destination_project,
|
2278
|
-
"datasetId": destination_dataset,
|
2279
|
-
"tableId": destination_table,
|
2280
|
-
}
|
2281
|
-
|
2282
|
-
if cluster_fields:
|
2283
|
-
cluster_fields = {"fields": cluster_fields} # type: ignore
|
2284
|
-
|
2285
|
-
query_param_list: list[tuple[Any, str, str | bool | None | dict, type | tuple[type]]] = [
|
2286
|
-
(sql, "query", None, (str,)),
|
2287
|
-
(priority, "priority", priority, (str,)),
|
2288
|
-
(use_legacy_sql, "useLegacySql", self.use_legacy_sql, bool),
|
2289
|
-
(query_params, "queryParameters", None, list),
|
2290
|
-
(udf_config, "userDefinedFunctionResources", None, list),
|
2291
|
-
(maximum_billing_tier, "maximumBillingTier", None, int),
|
2292
|
-
(maximum_bytes_billed, "maximumBytesBilled", None, float),
|
2293
|
-
(time_partitioning, "timePartitioning", {}, dict),
|
2294
|
-
(schema_update_options, "schemaUpdateOptions", None, list),
|
2295
|
-
(destination_dataset_table, "destinationTable", None, dict),
|
2296
|
-
(cluster_fields, "clustering", None, dict),
|
2297
|
-
]
|
2298
|
-
|
2299
|
-
for param, param_name, param_default, param_type in query_param_list:
|
2300
|
-
if param_name not in configuration["query"] and param in [None, {}, ()]:
|
2301
|
-
if param_name == "timePartitioning":
|
2302
|
-
param_default = _cleanse_time_partitioning(destination_dataset_table, time_partitioning)
|
2303
|
-
param = param_default
|
2304
|
-
|
2305
|
-
if param in [None, {}, ()]:
|
2306
|
-
continue
|
2307
|
-
|
2308
|
-
_api_resource_configs_duplication_check(param_name, param, configuration["query"])
|
2309
|
-
|
2310
|
-
configuration["query"][param_name] = param
|
2311
|
-
|
2312
|
-
# check valid type of provided param,
|
2313
|
-
# it last step because we can get param from 2 sources,
|
2314
|
-
# and first of all need to find it
|
2315
|
-
|
2316
|
-
_validate_value(param_name, configuration["query"][param_name], param_type)
|
2317
|
-
|
2318
|
-
if param_name == "schemaUpdateOptions" and param:
|
2319
|
-
self.log.info("Adding experimental 'schemaUpdateOptions': %s", schema_update_options)
|
2320
|
-
|
2321
|
-
if param_name == "destinationTable":
|
2322
|
-
for key in ["projectId", "datasetId", "tableId"]:
|
2323
|
-
if key not in configuration["query"]["destinationTable"]:
|
2324
|
-
raise ValueError(
|
2325
|
-
"Not correct 'destinationTable' in "
|
2326
|
-
"api_resource_configs. 'destinationTable' "
|
2327
|
-
"must be a dict with {'projectId':'', "
|
2328
|
-
"'datasetId':'', 'tableId':''}"
|
2329
|
-
)
|
2330
|
-
else:
|
2331
|
-
configuration["query"].update(
|
2332
|
-
{
|
2333
|
-
"allowLargeResults": allow_large_results,
|
2334
|
-
"flattenResults": flatten_results,
|
2335
|
-
"writeDisposition": write_disposition,
|
2336
|
-
"createDisposition": create_disposition,
|
2337
|
-
}
|
2338
|
-
)
|
2339
|
-
|
2340
|
-
if (
|
2341
|
-
"useLegacySql" in configuration["query"]
|
2342
|
-
and configuration["query"]["useLegacySql"]
|
2343
|
-
and "queryParameters" in configuration["query"]
|
2344
|
-
):
|
2345
|
-
raise ValueError("Query parameters are not allowed when using legacy SQL")
|
2346
|
-
|
2347
|
-
if labels:
|
2348
|
-
_api_resource_configs_duplication_check("labels", labels, configuration)
|
2349
|
-
configuration["labels"] = labels
|
2350
|
-
|
2351
|
-
if encryption_configuration:
|
2352
|
-
configuration["query"]["destinationEncryptionConfiguration"] = encryption_configuration
|
2353
|
-
|
2354
|
-
job = self.insert_job(configuration=configuration, project_id=self.project_id, location=location)
|
2355
|
-
self.running_job_id = job.job_id
|
2356
|
-
return job.job_id
|
2357
|
-
|
2358
|
-
def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, force_rerun=False) -> str:
|
2359
|
-
if force_rerun:
|
2360
|
-
hash_base = str(uuid.uuid4())
|
2361
|
-
else:
|
2362
|
-
hash_base = json.dumps(configuration, sort_keys=True)
|
2363
|
-
|
2364
|
-
uniqueness_suffix = md5(hash_base.encode()).hexdigest()
|
2365
|
-
|
2366
|
-
if job_id:
|
2367
|
-
return f"{job_id}_{uniqueness_suffix}"
|
2368
|
-
|
2369
|
-
exec_date = logical_date.isoformat()
|
2370
|
-
job_id = f"airflow_{dag_id}_{task_id}_{exec_date}_{uniqueness_suffix}"
|
2371
|
-
return re.sub(r"[:\-+.]", "_", job_id)
|
2372
|
-
|
2373
|
-
def split_tablename(
|
2374
|
-
self, table_input: str, default_project_id: str, var_name: str | None = None
|
2375
|
-
) -> tuple[str, str, str]:
|
2376
|
-
if "." not in table_input:
|
2377
|
-
raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
|
2378
|
-
|
2379
|
-
if not default_project_id:
|
2380
|
-
raise ValueError("INTERNAL: No default project is specified")
|
2381
|
-
|
2382
|
-
def var_print(var_name):
|
2383
|
-
if var_name is None:
|
2384
|
-
return ""
|
2385
|
-
else:
|
2386
|
-
return f"Format exception for {var_name}: "
|
2387
|
-
|
2388
|
-
if table_input.count(".") + table_input.count(":") > 3:
|
2389
|
-
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
|
2390
|
-
cmpt = table_input.rsplit(":", 1)
|
2391
|
-
project_id = None
|
2392
|
-
rest = table_input
|
2393
|
-
if len(cmpt) == 1:
|
2394
|
-
project_id = None
|
2395
|
-
rest = cmpt[0]
|
2396
|
-
elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
|
2397
|
-
if cmpt[-1].count(".") != 2:
|
2398
|
-
project_id = cmpt[0]
|
2399
|
-
rest = cmpt[1]
|
2400
|
-
else:
|
2401
|
-
raise ValueError(
|
2402
|
-
f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
|
1294
|
+
f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
|
2403
1295
|
)
|
2404
1296
|
|
2405
1297
|
cmpt = rest.split(".")
|
@@ -2548,343 +1440,6 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2548
1440
|
self.labels = labels
|
2549
1441
|
self.hook = hook
|
2550
1442
|
|
2551
|
-
@deprecated(
|
2552
|
-
planned_removal_date="November 01, 2024",
|
2553
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table",
|
2554
|
-
category=AirflowProviderDeprecationWarning,
|
2555
|
-
)
|
2556
|
-
def create_empty_table(self, *args, **kwargs):
|
2557
|
-
"""
|
2558
|
-
Create empty table. DEPRECATED.
|
2559
|
-
|
2560
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table`
|
2561
|
-
instead.
|
2562
|
-
"""
|
2563
|
-
return self.hook.create_empty_table(*args, **kwargs)
|
2564
|
-
|
2565
|
-
@deprecated(
|
2566
|
-
planned_removal_date="November 01, 2024",
|
2567
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset",
|
2568
|
-
category=AirflowProviderDeprecationWarning,
|
2569
|
-
)
|
2570
|
-
def create_empty_dataset(self, *args, **kwargs) -> dict[str, Any]:
|
2571
|
-
"""
|
2572
|
-
Create empty dataset. DEPRECATED.
|
2573
|
-
|
2574
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset`
|
2575
|
-
instead.
|
2576
|
-
"""
|
2577
|
-
return self.hook.create_empty_dataset(*args, **kwargs)
|
2578
|
-
|
2579
|
-
@deprecated(
|
2580
|
-
planned_removal_date="November 01, 2024",
|
2581
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables",
|
2582
|
-
category=AirflowProviderDeprecationWarning,
|
2583
|
-
)
|
2584
|
-
def get_dataset_tables(self, *args, **kwargs) -> list[dict[str, Any]]:
|
2585
|
-
"""
|
2586
|
-
Get dataset tables. DEPRECATED.
|
2587
|
-
|
2588
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables`
|
2589
|
-
instead.
|
2590
|
-
"""
|
2591
|
-
return self.hook.get_dataset_tables(*args, **kwargs)
|
2592
|
-
|
2593
|
-
@deprecated(
|
2594
|
-
planned_removal_date="November 01, 2024",
|
2595
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset",
|
2596
|
-
category=AirflowProviderDeprecationWarning,
|
2597
|
-
)
|
2598
|
-
def delete_dataset(self, *args, **kwargs) -> None:
|
2599
|
-
"""
|
2600
|
-
Delete dataset. DEPRECATED.
|
2601
|
-
|
2602
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset`
|
2603
|
-
instead.
|
2604
|
-
"""
|
2605
|
-
return self.hook.delete_dataset(*args, **kwargs)
|
2606
|
-
|
2607
|
-
@deprecated(
|
2608
|
-
planned_removal_date="November 01, 2024",
|
2609
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table",
|
2610
|
-
category=AirflowProviderDeprecationWarning,
|
2611
|
-
)
|
2612
|
-
def create_external_table(self, *args, **kwargs):
|
2613
|
-
"""
|
2614
|
-
Create external table. DEPRECATED.
|
2615
|
-
|
2616
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table`
|
2617
|
-
instead.
|
2618
|
-
"""
|
2619
|
-
return self.hook.create_external_table(*args, **kwargs)
|
2620
|
-
|
2621
|
-
@deprecated(
|
2622
|
-
planned_removal_date="November 01, 2024",
|
2623
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table",
|
2624
|
-
category=AirflowProviderDeprecationWarning,
|
2625
|
-
)
|
2626
|
-
def patch_table(self, *args, **kwargs) -> None:
|
2627
|
-
"""
|
2628
|
-
Patch table. DEPRECATED.
|
2629
|
-
|
2630
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table`
|
2631
|
-
instead.
|
2632
|
-
"""
|
2633
|
-
return self.hook.patch_table(*args, **kwargs)
|
2634
|
-
|
2635
|
-
@deprecated(
|
2636
|
-
planned_removal_date="November 01, 2024",
|
2637
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all",
|
2638
|
-
category=AirflowProviderDeprecationWarning,
|
2639
|
-
)
|
2640
|
-
def insert_all(self, *args, **kwargs) -> None:
|
2641
|
-
"""
|
2642
|
-
Insert all. DEPRECATED.
|
2643
|
-
|
2644
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all`
|
2645
|
-
instead.
|
2646
|
-
"""
|
2647
|
-
return self.hook.insert_all(*args, **kwargs)
|
2648
|
-
|
2649
|
-
@deprecated(
|
2650
|
-
planned_removal_date="November 01, 2024",
|
2651
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset",
|
2652
|
-
category=AirflowProviderDeprecationWarning,
|
2653
|
-
)
|
2654
|
-
def update_dataset(self, *args, **kwargs) -> dict:
|
2655
|
-
"""
|
2656
|
-
Update dataset. DEPRECATED.
|
2657
|
-
|
2658
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset`
|
2659
|
-
instead.
|
2660
|
-
"""
|
2661
|
-
return Dataset.to_api_repr(self.hook.update_dataset(*args, **kwargs))
|
2662
|
-
|
2663
|
-
@deprecated(
|
2664
|
-
planned_removal_date="November 01, 2024",
|
2665
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset",
|
2666
|
-
category=AirflowProviderDeprecationWarning,
|
2667
|
-
)
|
2668
|
-
def patch_dataset(self, *args, **kwargs) -> dict:
|
2669
|
-
"""
|
2670
|
-
Patch dataset. DEPRECATED.
|
2671
|
-
|
2672
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset`
|
2673
|
-
instead.
|
2674
|
-
"""
|
2675
|
-
return self.hook.patch_dataset(*args, **kwargs)
|
2676
|
-
|
2677
|
-
@deprecated(
|
2678
|
-
planned_removal_date="November 01, 2024",
|
2679
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list",
|
2680
|
-
category=AirflowProviderDeprecationWarning,
|
2681
|
-
)
|
2682
|
-
def get_dataset_tables_list(self, *args, **kwargs) -> list[dict[str, Any]]:
|
2683
|
-
"""
|
2684
|
-
Get dataset tables list. DEPRECATED.
|
2685
|
-
|
2686
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list`
|
2687
|
-
instead.
|
2688
|
-
"""
|
2689
|
-
return self.hook.get_dataset_tables_list(*args, **kwargs)
|
2690
|
-
|
2691
|
-
@deprecated(
|
2692
|
-
planned_removal_date="November 01, 2024",
|
2693
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list",
|
2694
|
-
category=AirflowProviderDeprecationWarning,
|
2695
|
-
)
|
2696
|
-
def get_datasets_list(self, *args, **kwargs) -> list | HTTPIterator:
|
2697
|
-
"""
|
2698
|
-
Get datasets list. DEPRECATED.
|
2699
|
-
|
2700
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list`
|
2701
|
-
instead.
|
2702
|
-
"""
|
2703
|
-
return self.hook.get_datasets_list(*args, **kwargs)
|
2704
|
-
|
2705
|
-
@deprecated(
|
2706
|
-
planned_removal_date="November 01, 2024",
|
2707
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset",
|
2708
|
-
category=AirflowProviderDeprecationWarning,
|
2709
|
-
)
|
2710
|
-
def get_dataset(self, *args, **kwargs) -> Dataset:
|
2711
|
-
"""
|
2712
|
-
Get dataset. DEPRECATED.
|
2713
|
-
|
2714
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset`
|
2715
|
-
instead.
|
2716
|
-
"""
|
2717
|
-
return self.hook.get_dataset(*args, **kwargs)
|
2718
|
-
|
2719
|
-
@deprecated(
|
2720
|
-
planned_removal_date="November 01, 2024",
|
2721
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access",
|
2722
|
-
category=AirflowProviderDeprecationWarning,
|
2723
|
-
)
|
2724
|
-
def run_grant_dataset_view_access(self, *args, **kwargs) -> dict:
|
2725
|
-
"""
|
2726
|
-
Grant view access to dataset. DEPRECATED.
|
2727
|
-
|
2728
|
-
Please use
|
2729
|
-
:func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access`
|
2730
|
-
instead.
|
2731
|
-
"""
|
2732
|
-
return self.hook.run_grant_dataset_view_access(*args, **kwargs)
|
2733
|
-
|
2734
|
-
@deprecated(
|
2735
|
-
planned_removal_date="November 01, 2024",
|
2736
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert",
|
2737
|
-
category=AirflowProviderDeprecationWarning,
|
2738
|
-
)
|
2739
|
-
def run_table_upsert(self, *args, **kwargs) -> dict:
|
2740
|
-
"""
|
2741
|
-
Upsert table. DEPRECATED.
|
2742
|
-
|
2743
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert`
|
2744
|
-
instead.
|
2745
|
-
"""
|
2746
|
-
return self.hook.run_table_upsert(*args, **kwargs)
|
2747
|
-
|
2748
|
-
@deprecated(
|
2749
|
-
planned_removal_date="November 01, 2024",
|
2750
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete",
|
2751
|
-
category=AirflowProviderDeprecationWarning,
|
2752
|
-
)
|
2753
|
-
def run_table_delete(self, *args, **kwargs) -> None:
|
2754
|
-
"""
|
2755
|
-
Delete table. DEPRECATED.
|
2756
|
-
|
2757
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete`
|
2758
|
-
instead.
|
2759
|
-
"""
|
2760
|
-
return self.hook.run_table_delete(*args, **kwargs)
|
2761
|
-
|
2762
|
-
@deprecated(
|
2763
|
-
planned_removal_date="November 01, 2024",
|
2764
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata",
|
2765
|
-
category=AirflowProviderDeprecationWarning,
|
2766
|
-
)
|
2767
|
-
def get_tabledata(self, *args, **kwargs) -> list[dict]:
|
2768
|
-
"""
|
2769
|
-
Get table data. DEPRECATED.
|
2770
|
-
|
2771
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata`
|
2772
|
-
instead.
|
2773
|
-
"""
|
2774
|
-
return self.hook.get_tabledata(*args, **kwargs)
|
2775
|
-
|
2776
|
-
@deprecated(
|
2777
|
-
planned_removal_date="November 01, 2024",
|
2778
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema",
|
2779
|
-
category=AirflowProviderDeprecationWarning,
|
2780
|
-
)
|
2781
|
-
def get_schema(self, *args, **kwargs) -> dict:
|
2782
|
-
"""
|
2783
|
-
Get Schema. DEPRECATED.
|
2784
|
-
|
2785
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema`
|
2786
|
-
instead.
|
2787
|
-
"""
|
2788
|
-
return self.hook.get_schema(*args, **kwargs)
|
2789
|
-
|
2790
|
-
@deprecated(
|
2791
|
-
planned_removal_date="November 01, 2024",
|
2792
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete",
|
2793
|
-
category=AirflowProviderDeprecationWarning,
|
2794
|
-
)
|
2795
|
-
def poll_job_complete(self, *args, **kwargs) -> bool:
|
2796
|
-
"""
|
2797
|
-
Poll for job completion.DEPRECATED.
|
2798
|
-
|
2799
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete`
|
2800
|
-
instead.
|
2801
|
-
"""
|
2802
|
-
return self.hook.poll_job_complete(*args, **kwargs)
|
2803
|
-
|
2804
|
-
@deprecated(
|
2805
|
-
planned_removal_date="November 01, 2024",
|
2806
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query",
|
2807
|
-
category=AirflowProviderDeprecationWarning,
|
2808
|
-
)
|
2809
|
-
def cancel_query(self, *args, **kwargs) -> None:
|
2810
|
-
"""
|
2811
|
-
Cancel query. DEPRECATED.
|
2812
|
-
|
2813
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query`
|
2814
|
-
instead.
|
2815
|
-
"""
|
2816
|
-
return self.hook.cancel_query(*args, **kwargs) # type: ignore
|
2817
|
-
|
2818
|
-
@deprecated(
|
2819
|
-
planned_removal_date="November 01, 2024",
|
2820
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration",
|
2821
|
-
category=AirflowProviderDeprecationWarning,
|
2822
|
-
)
|
2823
|
-
def run_with_configuration(self, *args, **kwargs) -> str:
|
2824
|
-
"""
|
2825
|
-
Run with configuration. DEPRECATED.
|
2826
|
-
|
2827
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration`
|
2828
|
-
instead.
|
2829
|
-
"""
|
2830
|
-
return self.hook.run_with_configuration(*args, **kwargs)
|
2831
|
-
|
2832
|
-
@deprecated(
|
2833
|
-
planned_removal_date="November 01, 2024",
|
2834
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load",
|
2835
|
-
category=AirflowProviderDeprecationWarning,
|
2836
|
-
)
|
2837
|
-
def run_load(self, *args, **kwargs) -> str:
|
2838
|
-
"""
|
2839
|
-
Run load. DEPRECATED.
|
2840
|
-
|
2841
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load`
|
2842
|
-
instead.
|
2843
|
-
"""
|
2844
|
-
return self.hook.run_load(*args, **kwargs)
|
2845
|
-
|
2846
|
-
@deprecated(
|
2847
|
-
planned_removal_date="November 01, 2024",
|
2848
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy",
|
2849
|
-
category=AirflowProviderDeprecationWarning,
|
2850
|
-
)
|
2851
|
-
def run_copy(self, *args, **kwargs) -> str:
|
2852
|
-
"""
|
2853
|
-
Run copy. DEPRECATED.
|
2854
|
-
|
2855
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy`
|
2856
|
-
instead.
|
2857
|
-
"""
|
2858
|
-
return self.hook.run_copy(*args, **kwargs)
|
2859
|
-
|
2860
|
-
@deprecated(
|
2861
|
-
planned_removal_date="November 01, 2024",
|
2862
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract",
|
2863
|
-
category=AirflowProviderDeprecationWarning,
|
2864
|
-
)
|
2865
|
-
def run_extract(self, *args, **kwargs) -> str | BigQueryJob:
|
2866
|
-
"""
|
2867
|
-
Run extraction. DEPRECATED.
|
2868
|
-
|
2869
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract`
|
2870
|
-
instead.
|
2871
|
-
"""
|
2872
|
-
return self.hook.run_extract(*args, **kwargs)
|
2873
|
-
|
2874
|
-
@deprecated(
|
2875
|
-
planned_removal_date="November 01, 2024",
|
2876
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query",
|
2877
|
-
category=AirflowProviderDeprecationWarning,
|
2878
|
-
)
|
2879
|
-
def run_query(self, *args, **kwargs) -> str:
|
2880
|
-
"""
|
2881
|
-
Run query. DEPRECATED.
|
2882
|
-
|
2883
|
-
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query`
|
2884
|
-
instead.
|
2885
|
-
"""
|
2886
|
-
return self.hook.run_query(*args, **kwargs)
|
2887
|
-
|
2888
1443
|
|
2889
1444
|
class BigQueryCursor(BigQueryBaseCursor):
|
2890
1445
|
"""
|
@@ -3541,7 +2096,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3541
2096
|
query_request = {
|
3542
2097
|
"query": "SELECT partition_id "
|
3543
2098
|
f"FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.PARTITIONS`"
|
3544
|
-
+ (f" WHERE
|
2099
|
+
+ (f" WHERE table_name='{table_id}'" if table_id else ""),
|
3545
2100
|
"useLegacySql": False,
|
3546
2101
|
}
|
3547
2102
|
job_query_resp = await job_client.query(query_request, cast(Session, session))
|