apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -20,17 +20,18 @@
|
|
|
20
20
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
-
import asyncio
|
|
24
23
|
import json
|
|
25
24
|
import logging
|
|
26
25
|
import re
|
|
27
26
|
import time
|
|
28
27
|
import uuid
|
|
28
|
+
import warnings
|
|
29
29
|
from collections.abc import Iterable, Mapping, Sequence
|
|
30
30
|
from copy import deepcopy
|
|
31
31
|
from datetime import datetime, timedelta
|
|
32
|
-
from typing import TYPE_CHECKING, Any, NoReturn,
|
|
32
|
+
from typing import TYPE_CHECKING, Any, Literal, NoReturn, cast, overload
|
|
33
33
|
|
|
34
|
+
import pendulum
|
|
34
35
|
from aiohttp import ClientSession as ClientSession
|
|
35
36
|
from gcloud.aio.bigquery import Job, Table as Table_async
|
|
36
37
|
from google.cloud.bigquery import (
|
|
@@ -58,32 +59,39 @@ from pandas_gbq import read_gbq
|
|
|
58
59
|
from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
|
|
59
60
|
from sqlalchemy import create_engine
|
|
60
61
|
|
|
61
|
-
from airflow.exceptions import
|
|
62
|
+
from airflow.exceptions import AirflowOptionalProviderFeatureException, AirflowProviderDeprecationWarning
|
|
62
63
|
from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
|
|
64
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
63
65
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
|
64
66
|
from airflow.providers.google.cloud.utils.bigquery import bq_cast
|
|
65
67
|
from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes
|
|
66
68
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
67
69
|
from airflow.providers.google.common.deprecated import deprecated
|
|
68
70
|
from airflow.providers.google.common.hooks.base_google import (
|
|
71
|
+
_UNSET,
|
|
69
72
|
PROVIDE_PROJECT_ID,
|
|
70
73
|
GoogleBaseAsyncHook,
|
|
71
74
|
GoogleBaseHook,
|
|
72
75
|
get_field,
|
|
73
76
|
)
|
|
77
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
|
74
78
|
from airflow.utils.hashlib_wrapper import md5
|
|
75
79
|
from airflow.utils.helpers import convert_camel_to_snake
|
|
76
80
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
81
|
+
from airflow.utils.types import DagRunType
|
|
77
82
|
|
|
78
83
|
if TYPE_CHECKING:
|
|
79
84
|
import pandas as pd
|
|
85
|
+
import polars as pl
|
|
80
86
|
from google.api_core.page_iterator import HTTPIterator
|
|
81
87
|
from google.api_core.retry import Retry
|
|
82
88
|
from requests import Session
|
|
83
89
|
|
|
90
|
+
from airflow.sdk import Context
|
|
91
|
+
|
|
84
92
|
log = logging.getLogger(__name__)
|
|
85
93
|
|
|
86
|
-
BigQueryJob =
|
|
94
|
+
BigQueryJob = CopyJob | QueryJob | LoadJob | ExtractJob
|
|
87
95
|
|
|
88
96
|
|
|
89
97
|
class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
@@ -151,21 +159,47 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
151
159
|
|
|
152
160
|
def __init__(
|
|
153
161
|
self,
|
|
154
|
-
use_legacy_sql: bool =
|
|
155
|
-
location: str | None =
|
|
156
|
-
priority: str =
|
|
157
|
-
api_resource_configs: dict | None =
|
|
162
|
+
use_legacy_sql: bool | object = _UNSET,
|
|
163
|
+
location: str | None | object = _UNSET,
|
|
164
|
+
priority: str | object = _UNSET,
|
|
165
|
+
api_resource_configs: dict | None | object = _UNSET,
|
|
158
166
|
impersonation_scopes: str | Sequence[str] | None = None,
|
|
159
|
-
labels: dict | None =
|
|
167
|
+
labels: dict | None | object = _UNSET,
|
|
160
168
|
**kwargs,
|
|
161
169
|
) -> None:
|
|
162
170
|
super().__init__(**kwargs)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
# Use sentinel pattern to distinguish "not provided" from "explicitly provided"
|
|
172
|
+
if use_legacy_sql is _UNSET:
|
|
173
|
+
value = self._get_field("use_legacy_sql", _UNSET)
|
|
174
|
+
self.use_legacy_sql: bool = value if value is not None else True
|
|
175
|
+
else:
|
|
176
|
+
self.use_legacy_sql = use_legacy_sql # type: ignore[assignment]
|
|
177
|
+
|
|
178
|
+
if location is _UNSET:
|
|
179
|
+
self.location: str | None = self._get_field("location", _UNSET)
|
|
180
|
+
else:
|
|
181
|
+
self.location = location # type: ignore[assignment]
|
|
182
|
+
|
|
183
|
+
if priority is _UNSET:
|
|
184
|
+
value = self._get_field("priority", _UNSET)
|
|
185
|
+
self.priority: str = value if value is not None else "INTERACTIVE"
|
|
186
|
+
else:
|
|
187
|
+
self.priority = priority # type: ignore[assignment]
|
|
188
|
+
|
|
166
189
|
self.running_job_id: str | None = None
|
|
167
|
-
|
|
168
|
-
|
|
190
|
+
|
|
191
|
+
if api_resource_configs is _UNSET:
|
|
192
|
+
value = self._get_field("api_resource_configs", _UNSET)
|
|
193
|
+
self.api_resource_configs: dict = value if value is not None else {}
|
|
194
|
+
else:
|
|
195
|
+
self.api_resource_configs = api_resource_configs or {} # type: ignore[assignment]
|
|
196
|
+
|
|
197
|
+
if labels is _UNSET:
|
|
198
|
+
value = self._get_field("labels", _UNSET)
|
|
199
|
+
self.labels = value if value is not None else {}
|
|
200
|
+
else:
|
|
201
|
+
self.labels = labels or {} # type: ignore[assignment]
|
|
202
|
+
|
|
169
203
|
self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
|
|
170
204
|
|
|
171
205
|
def get_conn(self) -> BigQueryConnection:
|
|
@@ -275,15 +309,57 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
275
309
|
"""
|
|
276
310
|
raise NotImplementedError()
|
|
277
311
|
|
|
278
|
-
def
|
|
312
|
+
def _get_pandas_df(
|
|
279
313
|
self,
|
|
280
314
|
sql: str,
|
|
281
315
|
parameters: Iterable | Mapping[str, Any] | None = None,
|
|
282
316
|
dialect: str | None = None,
|
|
283
317
|
**kwargs,
|
|
284
318
|
) -> pd.DataFrame:
|
|
319
|
+
if dialect is None:
|
|
320
|
+
dialect = "legacy" if self.use_legacy_sql else "standard"
|
|
321
|
+
|
|
322
|
+
credentials, project_id = self.get_credentials_and_project_id()
|
|
323
|
+
|
|
324
|
+
return read_gbq(sql, project_id=project_id, dialect=dialect, credentials=credentials, **kwargs)
|
|
325
|
+
|
|
326
|
+
def _get_polars_df(self, sql, parameters=None, dialect=None, **kwargs) -> pl.DataFrame:
|
|
327
|
+
try:
|
|
328
|
+
import polars as pl
|
|
329
|
+
except ImportError:
|
|
330
|
+
raise AirflowOptionalProviderFeatureException(
|
|
331
|
+
"Polars is not installed. Please install it with `pip install polars`."
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if dialect is None:
|
|
335
|
+
dialect = "legacy" if self.use_legacy_sql else "standard"
|
|
336
|
+
|
|
337
|
+
credentials, project_id = self.get_credentials_and_project_id()
|
|
338
|
+
|
|
339
|
+
pandas_df = read_gbq(sql, project_id=project_id, dialect=dialect, credentials=credentials, **kwargs)
|
|
340
|
+
return pl.from_pandas(pandas_df)
|
|
341
|
+
|
|
342
|
+
@overload
|
|
343
|
+
def get_df(
|
|
344
|
+
self, sql, parameters=None, dialect=None, *, df_type: Literal["pandas"] = "pandas", **kwargs
|
|
345
|
+
) -> pd.DataFrame: ...
|
|
346
|
+
|
|
347
|
+
@overload
|
|
348
|
+
def get_df(
|
|
349
|
+
self, sql, parameters=None, dialect=None, *, df_type: Literal["polars"], **kwargs
|
|
350
|
+
) -> pl.DataFrame: ...
|
|
351
|
+
|
|
352
|
+
def get_df(
|
|
353
|
+
self,
|
|
354
|
+
sql,
|
|
355
|
+
parameters=None,
|
|
356
|
+
dialect=None,
|
|
357
|
+
*,
|
|
358
|
+
df_type: Literal["pandas", "polars"] = "pandas",
|
|
359
|
+
**kwargs,
|
|
360
|
+
) -> pd.DataFrame | pl.DataFrame:
|
|
285
361
|
"""
|
|
286
|
-
Get a
|
|
362
|
+
Get a DataFrame for the BigQuery results.
|
|
287
363
|
|
|
288
364
|
The DbApiHook method must be overridden because Pandas doesn't support
|
|
289
365
|
PEP 249 connections, except for SQLite.
|
|
@@ -299,12 +375,19 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
299
375
|
defaults to use `self.use_legacy_sql` if not specified
|
|
300
376
|
:param kwargs: (optional) passed into pandas_gbq.read_gbq method
|
|
301
377
|
"""
|
|
302
|
-
if
|
|
303
|
-
|
|
378
|
+
if df_type == "polars":
|
|
379
|
+
return self._get_polars_df(sql, parameters, dialect, **kwargs)
|
|
304
380
|
|
|
305
|
-
|
|
381
|
+
if df_type == "pandas":
|
|
382
|
+
return self._get_pandas_df(sql, parameters, dialect, **kwargs)
|
|
306
383
|
|
|
307
|
-
|
|
384
|
+
@deprecated(
|
|
385
|
+
planned_removal_date="November 30, 2025",
|
|
386
|
+
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_df",
|
|
387
|
+
category=AirflowProviderDeprecationWarning,
|
|
388
|
+
)
|
|
389
|
+
def get_pandas_df(self, sql, parameters=None, dialect=None, **kwargs):
|
|
390
|
+
return self._get_pandas_df(sql, parameters, dialect, **kwargs)
|
|
308
391
|
|
|
309
392
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
310
393
|
def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool:
|
|
@@ -346,135 +429,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
346
429
|
except NotFound:
|
|
347
430
|
return False
|
|
348
431
|
|
|
349
|
-
@deprecated(
|
|
350
|
-
planned_removal_date="July 30, 2025",
|
|
351
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_table",
|
|
352
|
-
category=AirflowProviderDeprecationWarning,
|
|
353
|
-
)
|
|
354
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
|
355
|
-
def create_empty_table(
|
|
356
|
-
self,
|
|
357
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
|
358
|
-
dataset_id: str | None = None,
|
|
359
|
-
table_id: str | None = None,
|
|
360
|
-
table_resource: dict[str, Any] | None = None,
|
|
361
|
-
schema_fields: list | None = None,
|
|
362
|
-
time_partitioning: dict | None = None,
|
|
363
|
-
cluster_fields: list[str] | None = None,
|
|
364
|
-
labels: dict | None = None,
|
|
365
|
-
view: dict | None = None,
|
|
366
|
-
materialized_view: dict | None = None,
|
|
367
|
-
encryption_configuration: dict | None = None,
|
|
368
|
-
retry: Retry = DEFAULT_RETRY,
|
|
369
|
-
location: str | None = None,
|
|
370
|
-
exists_ok: bool = True,
|
|
371
|
-
) -> Table:
|
|
372
|
-
"""
|
|
373
|
-
Create a new, empty table in the dataset.
|
|
374
|
-
|
|
375
|
-
To create a view, which is defined by a SQL query, parse a dictionary to
|
|
376
|
-
the *view* argument.
|
|
377
|
-
|
|
378
|
-
:param project_id: The project to create the table into.
|
|
379
|
-
:param dataset_id: The dataset to create the table into.
|
|
380
|
-
:param table_id: The Name of the table to be created.
|
|
381
|
-
:param table_resource: Table resource as described in documentation:
|
|
382
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
|
|
383
|
-
If provided all other parameters are ignored.
|
|
384
|
-
:param schema_fields: If set, the schema field list as defined here:
|
|
385
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
|
386
|
-
|
|
387
|
-
.. code-block:: python
|
|
388
|
-
|
|
389
|
-
schema_fields = [
|
|
390
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
|
391
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
|
392
|
-
]
|
|
393
|
-
|
|
394
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
|
395
|
-
:param retry: Optional. How to retry the RPC.
|
|
396
|
-
:param time_partitioning: configure optional time partitioning fields i.e.
|
|
397
|
-
partition by field, type and expiration as per API specifications.
|
|
398
|
-
|
|
399
|
-
.. seealso::
|
|
400
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
|
|
401
|
-
:param cluster_fields: [Optional] The fields used for clustering.
|
|
402
|
-
BigQuery supports clustering for both partitioned and
|
|
403
|
-
non-partitioned tables.
|
|
404
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields
|
|
405
|
-
:param view: [Optional] A dictionary containing definition for the view.
|
|
406
|
-
If set, it will create a view instead of a table:
|
|
407
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
|
408
|
-
|
|
409
|
-
.. code-block:: python
|
|
410
|
-
|
|
411
|
-
view = {
|
|
412
|
-
"query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000",
|
|
413
|
-
"useLegacySql": False,
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
:param materialized_view: [Optional] The materialized view definition.
|
|
417
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
|
418
|
-
|
|
419
|
-
.. code-block:: python
|
|
420
|
-
|
|
421
|
-
encryption_configuration = {
|
|
422
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
:param num_retries: Maximum number of retries in case of connection problems.
|
|
426
|
-
:param location: (Optional) The geographic location where the table should reside.
|
|
427
|
-
:param exists_ok: If ``True``, ignore "already exists" errors when creating the table.
|
|
428
|
-
:return: Created table
|
|
429
|
-
"""
|
|
430
|
-
_table_resource: dict[str, Any] = {}
|
|
431
|
-
|
|
432
|
-
if self.location:
|
|
433
|
-
_table_resource["location"] = self.location
|
|
434
|
-
|
|
435
|
-
if schema_fields:
|
|
436
|
-
_table_resource["schema"] = {"fields": schema_fields}
|
|
437
|
-
|
|
438
|
-
if time_partitioning:
|
|
439
|
-
_table_resource["timePartitioning"] = time_partitioning
|
|
440
|
-
|
|
441
|
-
if cluster_fields:
|
|
442
|
-
_table_resource["clustering"] = {"fields": cluster_fields}
|
|
443
|
-
|
|
444
|
-
if labels:
|
|
445
|
-
_table_resource["labels"] = labels
|
|
446
|
-
|
|
447
|
-
if view:
|
|
448
|
-
_table_resource["view"] = view
|
|
449
|
-
|
|
450
|
-
if materialized_view:
|
|
451
|
-
_table_resource["materializedView"] = materialized_view
|
|
452
|
-
|
|
453
|
-
if encryption_configuration:
|
|
454
|
-
_table_resource["encryptionConfiguration"] = encryption_configuration
|
|
455
|
-
|
|
456
|
-
table_resource = table_resource or _table_resource
|
|
457
|
-
table_resource = self._resolve_table_reference(
|
|
458
|
-
table_resource=table_resource,
|
|
459
|
-
project_id=project_id,
|
|
460
|
-
dataset_id=dataset_id,
|
|
461
|
-
table_id=table_id,
|
|
462
|
-
)
|
|
463
|
-
table = Table.from_api_repr(table_resource)
|
|
464
|
-
result = self.get_client(project_id=project_id, location=location).create_table(
|
|
465
|
-
table=table, exists_ok=exists_ok, retry=retry
|
|
466
|
-
)
|
|
467
|
-
get_hook_lineage_collector().add_output_asset(
|
|
468
|
-
context=self,
|
|
469
|
-
scheme="bigquery",
|
|
470
|
-
asset_kwargs={
|
|
471
|
-
"project_id": result.project,
|
|
472
|
-
"dataset_id": result.dataset_id,
|
|
473
|
-
"table_id": result.table_id,
|
|
474
|
-
},
|
|
475
|
-
)
|
|
476
|
-
return result
|
|
477
|
-
|
|
478
432
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
479
433
|
def create_table(
|
|
480
434
|
self,
|
|
@@ -861,7 +815,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
861
815
|
if return_iterator:
|
|
862
816
|
# The iterator returned by list_datasets() is a HTTPIterator but annotated
|
|
863
817
|
# as Iterator
|
|
864
|
-
return iterator #
|
|
818
|
+
return iterator # type: ignore
|
|
865
819
|
|
|
866
820
|
datasets_list = list(iterator)
|
|
867
821
|
self.log.info("Datasets List: %s", len(datasets_list))
|
|
@@ -1349,7 +1303,16 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
1349
1303
|
job_api_repr.result(timeout=timeout, retry=retry)
|
|
1350
1304
|
return job_api_repr
|
|
1351
1305
|
|
|
1352
|
-
def generate_job_id(
|
|
1306
|
+
def generate_job_id(
|
|
1307
|
+
self,
|
|
1308
|
+
job_id: str | None,
|
|
1309
|
+
dag_id: str,
|
|
1310
|
+
task_id: str,
|
|
1311
|
+
logical_date: datetime | None,
|
|
1312
|
+
configuration: dict,
|
|
1313
|
+
run_after: pendulum.DateTime | datetime | None = None,
|
|
1314
|
+
force_rerun: bool = False,
|
|
1315
|
+
) -> str:
|
|
1353
1316
|
if force_rerun:
|
|
1354
1317
|
hash_base = str(uuid.uuid4())
|
|
1355
1318
|
else:
|
|
@@ -1360,10 +1323,31 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
|
1360
1323
|
if job_id:
|
|
1361
1324
|
return f"{job_id}_{uniqueness_suffix}"
|
|
1362
1325
|
|
|
1363
|
-
|
|
1364
|
-
|
|
1326
|
+
if logical_date is not None:
|
|
1327
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1328
|
+
warnings.warn(
|
|
1329
|
+
"The 'logical_date' parameter is deprecated. Please use 'run_after' instead.",
|
|
1330
|
+
AirflowProviderDeprecationWarning,
|
|
1331
|
+
stacklevel=1,
|
|
1332
|
+
)
|
|
1333
|
+
job_id_timestamp = logical_date
|
|
1334
|
+
elif run_after is not None:
|
|
1335
|
+
job_id_timestamp = run_after
|
|
1336
|
+
else:
|
|
1337
|
+
job_id_timestamp = pendulum.now("UTC")
|
|
1338
|
+
|
|
1339
|
+
job_id = f"airflow_{dag_id}_{task_id}_{job_id_timestamp.isoformat()}_{uniqueness_suffix}"
|
|
1365
1340
|
return re.sub(r"[:\-+.]", "_", job_id)
|
|
1366
1341
|
|
|
1342
|
+
def get_run_after_or_logical_date(self, context: Context) -> pendulum.DateTime | datetime | None:
|
|
1343
|
+
dag_run = context.get("dag_run")
|
|
1344
|
+
if not dag_run:
|
|
1345
|
+
return pendulum.now("UTC")
|
|
1346
|
+
|
|
1347
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
1348
|
+
return dag_run.start_date
|
|
1349
|
+
return dag_run.start_date if dag_run.run_type == DagRunType.SCHEDULED else context.get("logical_date")
|
|
1350
|
+
|
|
1367
1351
|
def split_tablename(
|
|
1368
1352
|
self, table_input: str, default_project_id: str, var_name: str | None = None
|
|
1369
1353
|
) -> tuple[str, str, str]:
|
|
@@ -1777,6 +1761,7 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
|
1777
1761
|
schema_update_options: Iterable | None = None,
|
|
1778
1762
|
priority: str | None = None,
|
|
1779
1763
|
time_partitioning: dict | None = None,
|
|
1764
|
+
range_partitioning: dict | None = None,
|
|
1780
1765
|
api_resource_configs: dict | None = None,
|
|
1781
1766
|
cluster_fields: list[str] | None = None,
|
|
1782
1767
|
encryption_configuration: dict | None = None,
|
|
@@ -1789,6 +1774,10 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
|
1789
1774
|
|
|
1790
1775
|
if time_partitioning is None:
|
|
1791
1776
|
time_partitioning = {}
|
|
1777
|
+
if range_partitioning is None:
|
|
1778
|
+
range_partitioning = {}
|
|
1779
|
+
if time_partitioning and range_partitioning:
|
|
1780
|
+
raise ValueError("Only one of time_partitioning or range_partitioning can be set.")
|
|
1792
1781
|
|
|
1793
1782
|
if not api_resource_configs:
|
|
1794
1783
|
api_resource_configs = self.hook.api_resource_configs
|
|
@@ -1818,14 +1807,6 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
|
1818
1807
|
f" Please only use one or more of the following options: {allowed_schema_update_options}"
|
|
1819
1808
|
)
|
|
1820
1809
|
|
|
1821
|
-
if schema_update_options:
|
|
1822
|
-
if write_disposition not in ["WRITE_APPEND", "WRITE_TRUNCATE"]:
|
|
1823
|
-
raise ValueError(
|
|
1824
|
-
"schema_update_options is only "
|
|
1825
|
-
"allowed if write_disposition is "
|
|
1826
|
-
"'WRITE_APPEND' or 'WRITE_TRUNCATE'."
|
|
1827
|
-
)
|
|
1828
|
-
|
|
1829
1810
|
if destination_dataset_table:
|
|
1830
1811
|
destination_project, destination_dataset, destination_table = self.hook.split_tablename(
|
|
1831
1812
|
table_input=destination_dataset_table, default_project_id=self.project_id
|
|
@@ -1849,16 +1830,21 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
|
1849
1830
|
(maximum_billing_tier, "maximumBillingTier", None, int),
|
|
1850
1831
|
(maximum_bytes_billed, "maximumBytesBilled", None, float),
|
|
1851
1832
|
(time_partitioning, "timePartitioning", {}, dict),
|
|
1833
|
+
(range_partitioning, "rangePartitioning", {}, dict),
|
|
1852
1834
|
(schema_update_options, "schemaUpdateOptions", None, list),
|
|
1853
1835
|
(destination_dataset_table, "destinationTable", None, dict),
|
|
1854
1836
|
(cluster_fields, "clustering", None, dict),
|
|
1855
1837
|
]
|
|
1856
1838
|
|
|
1857
|
-
for
|
|
1858
|
-
|
|
1839
|
+
for param_raw, param_name, param_default, param_type in query_param_list:
|
|
1840
|
+
param: Any
|
|
1841
|
+
if param_name not in configuration["query"] and param_raw in [None, {}, ()]:
|
|
1859
1842
|
if param_name == "timePartitioning":
|
|
1860
|
-
|
|
1861
|
-
|
|
1843
|
+
param = _cleanse_time_partitioning(destination_dataset_table, time_partitioning)
|
|
1844
|
+
else:
|
|
1845
|
+
param = param_default
|
|
1846
|
+
else:
|
|
1847
|
+
param = param_raw
|
|
1862
1848
|
|
|
1863
1849
|
if param in [None, {}, ()]:
|
|
1864
1850
|
continue
|
|
@@ -1885,15 +1871,14 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
|
1885
1871
|
"must be a dict with {'projectId':'', "
|
|
1886
1872
|
"'datasetId':'', 'tableId':''}"
|
|
1887
1873
|
)
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
)
|
|
1874
|
+
configuration["query"].update(
|
|
1875
|
+
{
|
|
1876
|
+
"allowLargeResults": allow_large_results,
|
|
1877
|
+
"flattenResults": flatten_results,
|
|
1878
|
+
"writeDisposition": write_disposition,
|
|
1879
|
+
"createDisposition": create_disposition,
|
|
1880
|
+
}
|
|
1881
|
+
)
|
|
1897
1882
|
|
|
1898
1883
|
if (
|
|
1899
1884
|
"useLegacySql" in configuration["query"]
|
|
@@ -1937,74 +1922,6 @@ def _escape(s: str) -> str:
|
|
|
1937
1922
|
return e
|
|
1938
1923
|
|
|
1939
1924
|
|
|
1940
|
-
@deprecated(
|
|
1941
|
-
planned_removal_date="April 01, 2025",
|
|
1942
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.split_tablename",
|
|
1943
|
-
category=AirflowProviderDeprecationWarning,
|
|
1944
|
-
)
|
|
1945
|
-
def split_tablename(
|
|
1946
|
-
table_input: str, default_project_id: str, var_name: str | None = None
|
|
1947
|
-
) -> tuple[str, str, str]:
|
|
1948
|
-
if "." not in table_input:
|
|
1949
|
-
raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
|
|
1950
|
-
|
|
1951
|
-
if not default_project_id:
|
|
1952
|
-
raise ValueError("INTERNAL: No default project is specified")
|
|
1953
|
-
|
|
1954
|
-
def var_print(var_name):
|
|
1955
|
-
if var_name is None:
|
|
1956
|
-
return ""
|
|
1957
|
-
return f"Format exception for {var_name}: "
|
|
1958
|
-
|
|
1959
|
-
if table_input.count(".") + table_input.count(":") > 3:
|
|
1960
|
-
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
|
|
1961
|
-
cmpt = table_input.rsplit(":", 1)
|
|
1962
|
-
project_id = None
|
|
1963
|
-
rest = table_input
|
|
1964
|
-
if len(cmpt) == 1:
|
|
1965
|
-
project_id = None
|
|
1966
|
-
rest = cmpt[0]
|
|
1967
|
-
elif len(cmpt) == 2 and cmpt[0].count(":") <= 1:
|
|
1968
|
-
if cmpt[-1].count(".") != 2:
|
|
1969
|
-
project_id = cmpt[0]
|
|
1970
|
-
rest = cmpt[1]
|
|
1971
|
-
else:
|
|
1972
|
-
raise ValueError(
|
|
1973
|
-
f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
|
|
1974
|
-
)
|
|
1975
|
-
|
|
1976
|
-
cmpt = rest.split(".")
|
|
1977
|
-
if len(cmpt) == 3:
|
|
1978
|
-
if project_id:
|
|
1979
|
-
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project")
|
|
1980
|
-
project_id = cmpt[0]
|
|
1981
|
-
dataset_id = cmpt[1]
|
|
1982
|
-
table_id = cmpt[2]
|
|
1983
|
-
|
|
1984
|
-
elif len(cmpt) == 2:
|
|
1985
|
-
dataset_id = cmpt[0]
|
|
1986
|
-
table_id = cmpt[1]
|
|
1987
|
-
else:
|
|
1988
|
-
raise ValueError(
|
|
1989
|
-
f"{var_print(var_name)}Expect format of (<project.|<project:)<dataset>.<table>, got {table_input}"
|
|
1990
|
-
)
|
|
1991
|
-
|
|
1992
|
-
# Exclude partition from the table name
|
|
1993
|
-
table_id = table_id.split("$")[0]
|
|
1994
|
-
|
|
1995
|
-
if project_id is None:
|
|
1996
|
-
if var_name is not None:
|
|
1997
|
-
log.info(
|
|
1998
|
-
'Project is not included in %s: %s; using project "%s"',
|
|
1999
|
-
var_name,
|
|
2000
|
-
table_input,
|
|
2001
|
-
default_project_id,
|
|
2002
|
-
)
|
|
2003
|
-
project_id = default_project_id
|
|
2004
|
-
|
|
2005
|
-
return project_id, dataset_id, table_id
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
1925
|
def _cleanse_time_partitioning(
|
|
2009
1926
|
destination_dataset_table: str | None, time_partitioning_in: dict | None
|
|
2010
1927
|
) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option
|
|
@@ -2073,18 +1990,19 @@ def _format_schema_for_description(schema: dict) -> list:
|
|
|
2073
1990
|
internal_size, precision, scale, null_ok.
|
|
2074
1991
|
"""
|
|
2075
1992
|
description = []
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
1993
|
+
if "fields" in schema:
|
|
1994
|
+
for field in schema["fields"]:
|
|
1995
|
+
mode = field.get("mode", "NULLABLE")
|
|
1996
|
+
field_description = (
|
|
1997
|
+
field["name"],
|
|
1998
|
+
field["type"],
|
|
1999
|
+
None,
|
|
2000
|
+
None,
|
|
2001
|
+
None,
|
|
2002
|
+
None,
|
|
2003
|
+
mode == "NULLABLE",
|
|
2004
|
+
)
|
|
2005
|
+
description.append(field_description)
|
|
2088
2006
|
return description
|
|
2089
2007
|
|
|
2090
2008
|
|
|
@@ -2120,46 +2038,11 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
|
2120
2038
|
async def _get_job(
|
|
2121
2039
|
self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
|
|
2122
2040
|
) -> BigQueryJob | UnknownJob:
|
|
2123
|
-
"""
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
WARNING.
|
|
2127
|
-
This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
|
|
2128
|
-
https://github.com/apache/airflow/issues/35833
|
|
2129
|
-
https://github.com/talkiq/gcloud-aio/issues/584
|
|
2130
|
-
|
|
2131
|
-
This method was developed, because neither the `google-cloud-bigquery` nor the `gcloud-aio-bigquery`
|
|
2132
|
-
provides asynchronous access to a BigQuery jobs with location parameter. That's why this method wraps
|
|
2133
|
-
synchronous client call with the event loop's run_in_executor() method.
|
|
2134
|
-
|
|
2135
|
-
This workaround must be deleted along with the method _get_job_sync() and replaced by more robust and
|
|
2136
|
-
cleaner solution in one of two cases:
|
|
2137
|
-
1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
|
|
2138
|
-
optional parameter `location`
|
|
2139
|
-
2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
|
|
2140
|
-
"""
|
|
2141
|
-
loop = asyncio.get_event_loop()
|
|
2142
|
-
job = await loop.run_in_executor(None, self._get_job_sync, job_id, project_id, location)
|
|
2041
|
+
"""Get BigQuery job by its ID, project ID and location."""
|
|
2042
|
+
sync_hook = await self.get_sync_hook()
|
|
2043
|
+
job = sync_hook.get_job(job_id=job_id, project_id=project_id, location=location)
|
|
2143
2044
|
return job
|
|
2144
2045
|
|
|
2145
|
-
def _get_job_sync(self, job_id, project_id, location):
|
|
2146
|
-
"""
|
|
2147
|
-
Get BigQuery job by its ID, project ID and location synchronously.
|
|
2148
|
-
|
|
2149
|
-
WARNING
|
|
2150
|
-
This is a temporary workaround for issues below, and it's not intended to be used elsewhere!
|
|
2151
|
-
https://github.com/apache/airflow/issues/35833
|
|
2152
|
-
https://github.com/talkiq/gcloud-aio/issues/584
|
|
2153
|
-
|
|
2154
|
-
This workaround must be deleted along with the method _get_job() and replaced by more robust and
|
|
2155
|
-
cleaner solution in one of two cases:
|
|
2156
|
-
1. The `google-cloud-bigquery` library provides async client with get_job method, that supports
|
|
2157
|
-
optional parameter `location`
|
|
2158
|
-
2. The `gcloud-aio-bigquery` library supports the `location` parameter in get_job() method.
|
|
2159
|
-
"""
|
|
2160
|
-
hook = BigQueryHook(**self._hook_kwargs)
|
|
2161
|
-
return hook.get_job(job_id=job_id, project_id=project_id, location=location)
|
|
2162
|
-
|
|
2163
2046
|
async def get_job_status(
|
|
2164
2047
|
self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
|
|
2165
2048
|
) -> dict[str, str]:
|
|
@@ -2262,7 +2145,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
|
2262
2145
|
self,
|
|
2263
2146
|
sql: str,
|
|
2264
2147
|
pass_value: Any,
|
|
2265
|
-
records: list[Any],
|
|
2148
|
+
records: list[Any] | None = None,
|
|
2266
2149
|
tolerance: float | None = None,
|
|
2267
2150
|
) -> None:
|
|
2268
2151
|
"""
|
|
@@ -33,7 +33,7 @@ from google.cloud.batch_v1 import (
|
|
|
33
33
|
Task,
|
|
34
34
|
)
|
|
35
35
|
|
|
36
|
-
from airflow.
|
|
36
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
37
37
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
38
38
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
39
39
|
|
|
@@ -27,7 +27,7 @@ from google.api_core.exceptions import AlreadyExists
|
|
|
27
27
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
28
28
|
from google.cloud.devtools.cloudbuild_v1 import CloudBuildAsyncClient, CloudBuildClient, GetBuildRequest
|
|
29
29
|
|
|
30
|
-
from airflow.
|
|
30
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
31
31
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
32
32
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
33
33
|
from airflow.providers.google.common.hooks.operation_helpers import OperationHelper
|