apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -33,8 +33,9 @@ from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, Q
|
|
|
33
33
|
from google.cloud.bigquery.table import RowIterator, Table, TableListItem, TableReference
|
|
34
34
|
|
|
35
35
|
from airflow.configuration import conf
|
|
36
|
-
from airflow.exceptions import
|
|
37
|
-
from airflow.providers.common.
|
|
36
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
37
|
+
from airflow.providers.common.compat.sdk import AirflowException, AirflowSkipException
|
|
38
|
+
from airflow.providers.common.sql.operators.sql import ( # for _parse_boolean
|
|
38
39
|
SQLCheckOperator,
|
|
39
40
|
SQLColumnCheckOperator,
|
|
40
41
|
SQLIntervalCheckOperator,
|
|
@@ -59,7 +60,6 @@ from airflow.providers.google.cloud.triggers.bigquery import (
|
|
|
59
60
|
BigQueryValueCheckTrigger,
|
|
60
61
|
)
|
|
61
62
|
from airflow.providers.google.cloud.utils.bigquery import convert_job_id
|
|
62
|
-
from airflow.providers.google.common.deprecated import deprecated
|
|
63
63
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
64
64
|
from airflow.utils.helpers import exactly_one
|
|
65
65
|
|
|
@@ -67,7 +67,7 @@ if TYPE_CHECKING:
|
|
|
67
67
|
from google.api_core.retry import Retry
|
|
68
68
|
from google.cloud.bigquery import UnknownJob
|
|
69
69
|
|
|
70
|
-
from airflow.
|
|
70
|
+
from airflow.providers.common.compat.sdk import Context
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
|
|
@@ -93,10 +93,23 @@ class IfExistAction(enum.Enum):
|
|
|
93
93
|
SKIP = "skip"
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
class _BigQueryHookWithFlexibleProjectId(BigQueryHook):
|
|
97
|
+
@property
|
|
98
|
+
def project_id(self) -> str:
|
|
99
|
+
_, project_id = self.get_credentials_and_project_id()
|
|
100
|
+
return project_id or PROVIDE_PROJECT_ID
|
|
101
|
+
|
|
102
|
+
@project_id.setter
|
|
103
|
+
def project_id(self, value: str) -> None:
|
|
104
|
+
cached_creds, _ = self.get_credentials_and_project_id()
|
|
105
|
+
self._cached_project_id = value or PROVIDE_PROJECT_ID
|
|
106
|
+
self._cached_credntials = cached_creds
|
|
107
|
+
|
|
108
|
+
|
|
96
109
|
class _BigQueryDbHookMixin:
|
|
97
|
-
def get_db_hook(self: BigQueryCheckOperator) ->
|
|
110
|
+
def get_db_hook(self: BigQueryCheckOperator) -> _BigQueryHookWithFlexibleProjectId: # type:ignore[misc]
|
|
98
111
|
"""Get BigQuery DB Hook."""
|
|
99
|
-
|
|
112
|
+
hook = _BigQueryHookWithFlexibleProjectId(
|
|
100
113
|
gcp_conn_id=self.gcp_conn_id,
|
|
101
114
|
use_legacy_sql=self.use_legacy_sql,
|
|
102
115
|
location=self.location,
|
|
@@ -104,6 +117,11 @@ class _BigQueryDbHookMixin:
|
|
|
104
117
|
labels=self.labels,
|
|
105
118
|
)
|
|
106
119
|
|
|
120
|
+
# mypy assuming project_id is read only, as project_id is a property in GoogleBaseHook.
|
|
121
|
+
if self.project_id:
|
|
122
|
+
hook.project_id = self.project_id # type:ignore[misc]
|
|
123
|
+
return hook
|
|
124
|
+
|
|
107
125
|
|
|
108
126
|
class _BigQueryOperatorsEncryptionConfigurationMixin:
|
|
109
127
|
"""A class to handle the configuration for BigQueryHook.insert_job method."""
|
|
@@ -190,6 +208,7 @@ class BigQueryCheckOperator(
|
|
|
190
208
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs.
|
|
191
209
|
For example, [{ 'name': 'corpus', 'parameterType': { 'type': 'STRING' },
|
|
192
210
|
'parameterValue': { 'value': 'romeoandjuliet' } }]. (templated)
|
|
211
|
+
:param project_id: Google Cloud Project where the job is running
|
|
193
212
|
"""
|
|
194
213
|
|
|
195
214
|
template_fields: Sequence[str] = (
|
|
@@ -208,6 +227,7 @@ class BigQueryCheckOperator(
|
|
|
208
227
|
*,
|
|
209
228
|
sql: str,
|
|
210
229
|
gcp_conn_id: str = "google_cloud_default",
|
|
230
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
211
231
|
use_legacy_sql: bool = True,
|
|
212
232
|
location: str | None = None,
|
|
213
233
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
@@ -228,6 +248,7 @@ class BigQueryCheckOperator(
|
|
|
228
248
|
self.deferrable = deferrable
|
|
229
249
|
self.poll_interval = poll_interval
|
|
230
250
|
self.query_params = query_params
|
|
251
|
+
self.project_id = project_id
|
|
231
252
|
|
|
232
253
|
def _submit_job(
|
|
233
254
|
self,
|
|
@@ -243,7 +264,7 @@ class BigQueryCheckOperator(
|
|
|
243
264
|
|
|
244
265
|
return hook.insert_job(
|
|
245
266
|
configuration=configuration,
|
|
246
|
-
project_id=
|
|
267
|
+
project_id=self.project_id,
|
|
247
268
|
location=self.location,
|
|
248
269
|
job_id=job_id,
|
|
249
270
|
nowait=True,
|
|
@@ -257,6 +278,8 @@ class BigQueryCheckOperator(
|
|
|
257
278
|
gcp_conn_id=self.gcp_conn_id,
|
|
258
279
|
impersonation_chain=self.impersonation_chain,
|
|
259
280
|
)
|
|
281
|
+
if self.project_id is None:
|
|
282
|
+
self.project_id = hook.project_id
|
|
260
283
|
job = self._submit_job(hook, job_id="")
|
|
261
284
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
|
262
285
|
if job.running():
|
|
@@ -265,7 +288,7 @@ class BigQueryCheckOperator(
|
|
|
265
288
|
trigger=BigQueryCheckTrigger(
|
|
266
289
|
conn_id=self.gcp_conn_id,
|
|
267
290
|
job_id=job.job_id,
|
|
268
|
-
project_id=
|
|
291
|
+
project_id=self.project_id,
|
|
269
292
|
location=self.location or hook.location,
|
|
270
293
|
poll_interval=self.poll_interval,
|
|
271
294
|
impersonation_chain=self.impersonation_chain,
|
|
@@ -288,9 +311,7 @@ class BigQueryCheckOperator(
|
|
|
288
311
|
if not records:
|
|
289
312
|
raise AirflowException(f"The following query returned zero rows: {self.sql}")
|
|
290
313
|
if not all(records):
|
|
291
|
-
self._raise_exception(
|
|
292
|
-
f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
|
|
293
|
-
)
|
|
314
|
+
self._raise_exception(f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}")
|
|
294
315
|
|
|
295
316
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
|
|
296
317
|
"""
|
|
@@ -342,6 +363,7 @@ class BigQueryValueCheckOperator(
|
|
|
342
363
|
:param deferrable: Run operator in the deferrable mode.
|
|
343
364
|
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
|
344
365
|
check for the status of job.
|
|
366
|
+
:param project_id: Google Cloud Project where the job is running
|
|
345
367
|
"""
|
|
346
368
|
|
|
347
369
|
template_fields: Sequence[str] = (
|
|
@@ -363,6 +385,7 @@ class BigQueryValueCheckOperator(
|
|
|
363
385
|
tolerance: Any = None,
|
|
364
386
|
encryption_configuration: dict | None = None,
|
|
365
387
|
gcp_conn_id: str = "google_cloud_default",
|
|
388
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
366
389
|
use_legacy_sql: bool = True,
|
|
367
390
|
location: str | None = None,
|
|
368
391
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
@@ -380,6 +403,7 @@ class BigQueryValueCheckOperator(
|
|
|
380
403
|
self.labels = labels
|
|
381
404
|
self.deferrable = deferrable
|
|
382
405
|
self.poll_interval = poll_interval
|
|
406
|
+
self.project_id = project_id
|
|
383
407
|
|
|
384
408
|
def _submit_job(
|
|
385
409
|
self,
|
|
@@ -398,18 +422,19 @@ class BigQueryValueCheckOperator(
|
|
|
398
422
|
|
|
399
423
|
return hook.insert_job(
|
|
400
424
|
configuration=configuration,
|
|
401
|
-
project_id=
|
|
425
|
+
project_id=self.project_id,
|
|
402
426
|
location=self.location,
|
|
403
427
|
job_id=job_id,
|
|
404
428
|
nowait=True,
|
|
405
429
|
)
|
|
406
430
|
|
|
407
|
-
def execute(self, context: Context) -> None:
|
|
431
|
+
def execute(self, context: Context) -> None:
|
|
408
432
|
if not self.deferrable:
|
|
409
433
|
super().execute(context=context)
|
|
410
434
|
else:
|
|
411
435
|
hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
|
412
|
-
|
|
436
|
+
if self.project_id is None:
|
|
437
|
+
self.project_id = hook.project_id
|
|
413
438
|
job = self._submit_job(hook, job_id="")
|
|
414
439
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
|
415
440
|
if job.running():
|
|
@@ -418,7 +443,7 @@ class BigQueryValueCheckOperator(
|
|
|
418
443
|
trigger=BigQueryValueCheckTrigger(
|
|
419
444
|
conn_id=self.gcp_conn_id,
|
|
420
445
|
job_id=job.job_id,
|
|
421
|
-
project_id=
|
|
446
|
+
project_id=self.project_id,
|
|
422
447
|
location=self.location or hook.location,
|
|
423
448
|
sql=self.sql,
|
|
424
449
|
pass_value=self.pass_value,
|
|
@@ -575,6 +600,9 @@ class BigQueryIntervalCheckOperator(
|
|
|
575
600
|
hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
|
576
601
|
self.log.info("Using ratio formula: %s", self.ratio_formula)
|
|
577
602
|
|
|
603
|
+
if self.project_id is None:
|
|
604
|
+
self.project_id = hook.project_id
|
|
605
|
+
|
|
578
606
|
self.log.info("Executing SQL check: %s", self.sql1)
|
|
579
607
|
job_1 = self._submit_job(hook, sql=self.sql1, job_id="")
|
|
580
608
|
context["ti"].xcom_push(key="job_id", value=job_1.job_id)
|
|
@@ -587,7 +615,7 @@ class BigQueryIntervalCheckOperator(
|
|
|
587
615
|
conn_id=self.gcp_conn_id,
|
|
588
616
|
first_job_id=job_1.job_id,
|
|
589
617
|
second_job_id=job_2.job_id,
|
|
590
|
-
project_id=
|
|
618
|
+
project_id=self.project_id,
|
|
591
619
|
table=self.table,
|
|
592
620
|
location=self.location or hook.location,
|
|
593
621
|
metrics_thresholds=self.metrics_thresholds,
|
|
@@ -654,6 +682,7 @@ class BigQueryColumnCheckOperator(
|
|
|
654
682
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
655
683
|
account from the list granting this role to the originating account (templated).
|
|
656
684
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
|
685
|
+
:param project_id: Google Cloud Project where the job is running
|
|
657
686
|
"""
|
|
658
687
|
|
|
659
688
|
template_fields: Sequence[str] = tuple(set(SQLColumnCheckOperator.template_fields) | {"gcp_conn_id"})
|
|
@@ -670,6 +699,7 @@ class BigQueryColumnCheckOperator(
|
|
|
670
699
|
accept_none: bool = True,
|
|
671
700
|
encryption_configuration: dict | None = None,
|
|
672
701
|
gcp_conn_id: str = "google_cloud_default",
|
|
702
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
673
703
|
use_legacy_sql: bool = True,
|
|
674
704
|
location: str | None = None,
|
|
675
705
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
@@ -695,6 +725,7 @@ class BigQueryColumnCheckOperator(
|
|
|
695
725
|
self.location = location
|
|
696
726
|
self.impersonation_chain = impersonation_chain
|
|
697
727
|
self.labels = labels
|
|
728
|
+
self.project_id = project_id
|
|
698
729
|
|
|
699
730
|
def _submit_job(
|
|
700
731
|
self,
|
|
@@ -706,7 +737,7 @@ class BigQueryColumnCheckOperator(
|
|
|
706
737
|
self.include_encryption_configuration(configuration, "query")
|
|
707
738
|
return hook.insert_job(
|
|
708
739
|
configuration=configuration,
|
|
709
|
-
project_id=
|
|
740
|
+
project_id=self.project_id,
|
|
710
741
|
location=self.location,
|
|
711
742
|
job_id=job_id,
|
|
712
743
|
nowait=False,
|
|
@@ -715,6 +746,9 @@ class BigQueryColumnCheckOperator(
|
|
|
715
746
|
def execute(self, context=None):
|
|
716
747
|
"""Perform checks on the given columns."""
|
|
717
748
|
hook = self.get_db_hook()
|
|
749
|
+
|
|
750
|
+
if self.project_id is None:
|
|
751
|
+
self.project_id = hook.project_id
|
|
718
752
|
failed_tests = []
|
|
719
753
|
|
|
720
754
|
job = self._submit_job(hook, job_id="")
|
|
@@ -786,6 +820,7 @@ class BigQueryTableCheckOperator(
|
|
|
786
820
|
account from the list granting this role to the originating account (templated).
|
|
787
821
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
|
788
822
|
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
|
823
|
+
:param project_id: Google Cloud Project where the job is running
|
|
789
824
|
|
|
790
825
|
.. code-block:: python
|
|
791
826
|
|
|
@@ -805,6 +840,7 @@ class BigQueryTableCheckOperator(
|
|
|
805
840
|
checks: dict,
|
|
806
841
|
partition_clause: str | None = None,
|
|
807
842
|
gcp_conn_id: str = "google_cloud_default",
|
|
843
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
|
808
844
|
use_legacy_sql: bool = True,
|
|
809
845
|
location: str | None = None,
|
|
810
846
|
impersonation_chain: str | Sequence[str] | None = None,
|
|
@@ -819,6 +855,7 @@ class BigQueryTableCheckOperator(
|
|
|
819
855
|
self.impersonation_chain = impersonation_chain
|
|
820
856
|
self.labels = labels
|
|
821
857
|
self.encryption_configuration = encryption_configuration
|
|
858
|
+
self.project_id = project_id
|
|
822
859
|
|
|
823
860
|
def _submit_job(
|
|
824
861
|
self,
|
|
@@ -832,7 +869,7 @@ class BigQueryTableCheckOperator(
|
|
|
832
869
|
|
|
833
870
|
return hook.insert_job(
|
|
834
871
|
configuration=configuration,
|
|
835
|
-
project_id=
|
|
872
|
+
project_id=self.project_id,
|
|
836
873
|
location=self.location,
|
|
837
874
|
job_id=job_id,
|
|
838
875
|
nowait=False,
|
|
@@ -841,6 +878,8 @@ class BigQueryTableCheckOperator(
|
|
|
841
878
|
def execute(self, context=None):
|
|
842
879
|
"""Execute the given checks on the table."""
|
|
843
880
|
hook = self.get_db_hook()
|
|
881
|
+
if self.project_id is None:
|
|
882
|
+
self.project_id = hook.project_id
|
|
844
883
|
job = self._submit_job(hook, job_id="")
|
|
845
884
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
|
846
885
|
records = job.result().to_dataframe()
|
|
@@ -974,6 +1013,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
|
|
|
974
1013
|
"project_id",
|
|
975
1014
|
"max_results",
|
|
976
1015
|
"selected_fields",
|
|
1016
|
+
"gcp_conn_id",
|
|
977
1017
|
"impersonation_chain",
|
|
978
1018
|
)
|
|
979
1019
|
ui_color = BigQueryUIColors.QUERY.value
|
|
@@ -1117,7 +1157,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
|
|
|
1117
1157
|
"BigQueryHook.list_rows() returns iterator when return_iterator is False (default)"
|
|
1118
1158
|
)
|
|
1119
1159
|
self.log.info("Total extracted rows: %s", len(rows))
|
|
1120
|
-
|
|
1160
|
+
table_data: list[dict[str, Any]] | list[Any]
|
|
1121
1161
|
if self.as_dict:
|
|
1122
1162
|
table_data = [dict(row) for row in rows]
|
|
1123
1163
|
else:
|
|
@@ -1215,6 +1255,7 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
|
|
|
1215
1255
|
"table_resource",
|
|
1216
1256
|
"project_id",
|
|
1217
1257
|
"gcs_schema_object",
|
|
1258
|
+
"gcp_conn_id",
|
|
1218
1259
|
"impersonation_chain",
|
|
1219
1260
|
)
|
|
1220
1261
|
template_fields_renderers = {"table_resource": "json"}
|
|
@@ -1285,291 +1326,6 @@ class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
|
|
|
1285
1326
|
if self._table:
|
|
1286
1327
|
persist_kwargs = {
|
|
1287
1328
|
"context": context,
|
|
1288
|
-
"task_instance": self,
|
|
1289
|
-
"project_id": self._table.to_api_repr()["tableReference"]["projectId"],
|
|
1290
|
-
"dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
|
|
1291
|
-
"table_id": self._table.to_api_repr()["tableReference"]["tableId"],
|
|
1292
|
-
}
|
|
1293
|
-
self.log.info(
|
|
1294
|
-
"Table %s.%s.%s created successfully",
|
|
1295
|
-
self._table.project,
|
|
1296
|
-
self._table.dataset_id,
|
|
1297
|
-
self._table.table_id,
|
|
1298
|
-
)
|
|
1299
|
-
else:
|
|
1300
|
-
raise AirflowException("Table creation failed.")
|
|
1301
|
-
except Conflict:
|
|
1302
|
-
error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
|
|
1303
|
-
if self.if_exists == IfExistAction.LOG:
|
|
1304
|
-
self.log.info(error_msg)
|
|
1305
|
-
persist_kwargs = {
|
|
1306
|
-
"context": context,
|
|
1307
|
-
"task_instance": self,
|
|
1308
|
-
"project_id": self.project_id or bq_hook.project_id,
|
|
1309
|
-
"dataset_id": self.dataset_id,
|
|
1310
|
-
"table_id": self.table_id,
|
|
1311
|
-
}
|
|
1312
|
-
elif self.if_exists == IfExistAction.FAIL:
|
|
1313
|
-
raise AirflowException(error_msg)
|
|
1314
|
-
else:
|
|
1315
|
-
raise AirflowSkipException(error_msg)
|
|
1316
|
-
|
|
1317
|
-
BigQueryTableLink.persist(**persist_kwargs)
|
|
1318
|
-
|
|
1319
|
-
def get_openlineage_facets_on_complete(self, _):
|
|
1320
|
-
"""Implement _on_complete as we will use table resource returned by create method."""
|
|
1321
|
-
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
1322
|
-
from airflow.providers.google.cloud.openlineage.utils import (
|
|
1323
|
-
BIGQUERY_NAMESPACE,
|
|
1324
|
-
get_facets_from_bq_table,
|
|
1325
|
-
)
|
|
1326
|
-
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
1327
|
-
|
|
1328
|
-
if not self._table:
|
|
1329
|
-
self.log.debug("OpenLineage did not find `self._table` attribute.")
|
|
1330
|
-
return OperatorLineage()
|
|
1331
|
-
|
|
1332
|
-
output_dataset = Dataset(
|
|
1333
|
-
namespace=BIGQUERY_NAMESPACE,
|
|
1334
|
-
name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
|
|
1335
|
-
facets=get_facets_from_bq_table(self._table),
|
|
1336
|
-
)
|
|
1337
|
-
|
|
1338
|
-
return OperatorLineage(outputs=[output_dataset])
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
@deprecated(
|
|
1342
|
-
planned_removal_date="July 30, 2025",
|
|
1343
|
-
use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
|
|
1344
|
-
category=AirflowProviderDeprecationWarning,
|
|
1345
|
-
)
|
|
1346
|
-
class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1347
|
-
"""
|
|
1348
|
-
Creates a new table in the specified BigQuery dataset, optionally with schema.
|
|
1349
|
-
|
|
1350
|
-
The schema to be used for the BigQuery table may be specified in one of
|
|
1351
|
-
two ways. You may either directly pass the schema fields in, or you may
|
|
1352
|
-
point the operator to a Google Cloud Storage object name. The object in
|
|
1353
|
-
Google Cloud Storage must be a JSON file with the schema fields in it.
|
|
1354
|
-
You can also create a table without schema.
|
|
1355
|
-
|
|
1356
|
-
.. seealso::
|
|
1357
|
-
For more information on how to use this operator, take a look at the guide:
|
|
1358
|
-
:ref:`howto/operator:BigQueryCreateEmptyTableOperator`
|
|
1359
|
-
|
|
1360
|
-
:param project_id: The project to create the table into. (templated)
|
|
1361
|
-
:param dataset_id: The dataset to create the table into. (templated)
|
|
1362
|
-
:param table_id: The Name of the table to be created. (templated)
|
|
1363
|
-
:param table_resource: Table resource as described in documentation:
|
|
1364
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
|
|
1365
|
-
If provided all other parameters are ignored. (templated)
|
|
1366
|
-
:param schema_fields: If set, the schema field list as defined here:
|
|
1367
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
|
1368
|
-
|
|
1369
|
-
**Example**::
|
|
1370
|
-
|
|
1371
|
-
schema_fields = [
|
|
1372
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
|
1373
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
|
1374
|
-
]
|
|
1375
|
-
|
|
1376
|
-
:param gcs_schema_object: Full path to the JSON file containing
|
|
1377
|
-
schema (templated). For
|
|
1378
|
-
example: ``gs://test-bucket/dir1/dir2/employee_schema.json``
|
|
1379
|
-
:param time_partitioning: configure optional time partitioning fields i.e.
|
|
1380
|
-
partition by field, type and expiration as per API specifications.
|
|
1381
|
-
|
|
1382
|
-
.. seealso::
|
|
1383
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
|
|
1384
|
-
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
|
|
1385
|
-
interact with the Bigquery service.
|
|
1386
|
-
:param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
|
1387
|
-
and interact with the Google Cloud Storage service.
|
|
1388
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
|
1389
|
-
|
|
1390
|
-
**Example (with schema JSON in GCS)**::
|
|
1391
|
-
|
|
1392
|
-
CreateTable = BigQueryCreateEmptyTableOperator(
|
|
1393
|
-
task_id="BigQueryCreateEmptyTableOperator_task",
|
|
1394
|
-
dataset_id="ODS",
|
|
1395
|
-
table_id="Employees",
|
|
1396
|
-
project_id="internal-gcp-project",
|
|
1397
|
-
gcs_schema_object="gs://schema-bucket/employee_schema.json",
|
|
1398
|
-
gcp_conn_id="airflow-conn-id",
|
|
1399
|
-
google_cloud_storage_conn_id="airflow-conn-id",
|
|
1400
|
-
)
|
|
1401
|
-
|
|
1402
|
-
**Corresponding Schema file** (``employee_schema.json``)::
|
|
1403
|
-
|
|
1404
|
-
[
|
|
1405
|
-
{"mode": "NULLABLE", "name": "emp_name", "type": "STRING"},
|
|
1406
|
-
{"mode": "REQUIRED", "name": "salary", "type": "INTEGER"},
|
|
1407
|
-
]
|
|
1408
|
-
|
|
1409
|
-
**Example (with schema in the DAG)**::
|
|
1410
|
-
|
|
1411
|
-
CreateTable = BigQueryCreateEmptyTableOperator(
|
|
1412
|
-
task_id="BigQueryCreateEmptyTableOperator_task",
|
|
1413
|
-
dataset_id="ODS",
|
|
1414
|
-
table_id="Employees",
|
|
1415
|
-
project_id="internal-gcp-project",
|
|
1416
|
-
schema_fields=[
|
|
1417
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
|
1418
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
|
1419
|
-
],
|
|
1420
|
-
gcp_conn_id="airflow-conn-id-account",
|
|
1421
|
-
google_cloud_storage_conn_id="airflow-conn-id",
|
|
1422
|
-
)
|
|
1423
|
-
|
|
1424
|
-
:param view: (Optional) A dictionary containing definition for the view.
|
|
1425
|
-
If set, it will create a view instead of a table:
|
|
1426
|
-
|
|
1427
|
-
.. seealso::
|
|
1428
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
|
1429
|
-
:param materialized_view: (Optional) The materialized view definition.
|
|
1430
|
-
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
|
1431
|
-
|
|
1432
|
-
.. code-block:: python
|
|
1433
|
-
|
|
1434
|
-
encryption_configuration = {
|
|
1435
|
-
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
|
1436
|
-
}
|
|
1437
|
-
:param location: The location used for the operation.
|
|
1438
|
-
:param cluster_fields: (Optional) The fields used for clustering.
|
|
1439
|
-
BigQuery supports clustering for both partitioned and
|
|
1440
|
-
non-partitioned tables.
|
|
1441
|
-
|
|
1442
|
-
.. seealso::
|
|
1443
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields
|
|
1444
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
|
1445
|
-
credentials, or chained list of accounts required to get the access_token
|
|
1446
|
-
of the last account in the list, which will be impersonated in the request.
|
|
1447
|
-
If set as a string, the account must grant the originating account
|
|
1448
|
-
the Service Account Token Creator IAM role.
|
|
1449
|
-
If set as a sequence, the identities from the list must grant
|
|
1450
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
1451
|
-
account from the list granting this role to the originating account (templated).
|
|
1452
|
-
:param if_exists: What should Airflow do if the table exists. If set to `log`, the TI will be passed to
|
|
1453
|
-
success and an error message will be logged. Set to `ignore` to ignore the error, set to `fail` to
|
|
1454
|
-
fail the TI, and set to `skip` to skip it.
|
|
1455
|
-
:param exists_ok: Deprecated - use `if_exists="ignore"` instead.
|
|
1456
|
-
"""
|
|
1457
|
-
|
|
1458
|
-
template_fields: Sequence[str] = (
|
|
1459
|
-
"dataset_id",
|
|
1460
|
-
"table_id",
|
|
1461
|
-
"table_resource",
|
|
1462
|
-
"project_id",
|
|
1463
|
-
"gcs_schema_object",
|
|
1464
|
-
"labels",
|
|
1465
|
-
"view",
|
|
1466
|
-
"materialized_view",
|
|
1467
|
-
"impersonation_chain",
|
|
1468
|
-
)
|
|
1469
|
-
template_fields_renderers = {"table_resource": "json", "materialized_view": "json"}
|
|
1470
|
-
ui_color = BigQueryUIColors.TABLE.value
|
|
1471
|
-
operator_extra_links = (BigQueryTableLink(),)
|
|
1472
|
-
|
|
1473
|
-
def __init__(
|
|
1474
|
-
self,
|
|
1475
|
-
*,
|
|
1476
|
-
dataset_id: str,
|
|
1477
|
-
table_id: str,
|
|
1478
|
-
table_resource: dict[str, Any] | None = None,
|
|
1479
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
|
1480
|
-
schema_fields: list | None = None,
|
|
1481
|
-
gcs_schema_object: str | None = None,
|
|
1482
|
-
time_partitioning: dict | None = None,
|
|
1483
|
-
gcp_conn_id: str = "google_cloud_default",
|
|
1484
|
-
google_cloud_storage_conn_id: str = "google_cloud_default",
|
|
1485
|
-
labels: dict | None = None,
|
|
1486
|
-
view: dict | None = None,
|
|
1487
|
-
materialized_view: dict | None = None,
|
|
1488
|
-
encryption_configuration: dict | None = None,
|
|
1489
|
-
location: str | None = None,
|
|
1490
|
-
cluster_fields: list[str] | None = None,
|
|
1491
|
-
impersonation_chain: str | Sequence[str] | None = None,
|
|
1492
|
-
if_exists: str = "log",
|
|
1493
|
-
bigquery_conn_id: str | None = None,
|
|
1494
|
-
exists_ok: bool | None = None,
|
|
1495
|
-
**kwargs,
|
|
1496
|
-
) -> None:
|
|
1497
|
-
if bigquery_conn_id:
|
|
1498
|
-
warnings.warn(
|
|
1499
|
-
"The bigquery_conn_id parameter has been deprecated. Use the gcp_conn_id parameter instead.",
|
|
1500
|
-
AirflowProviderDeprecationWarning,
|
|
1501
|
-
stacklevel=2,
|
|
1502
|
-
)
|
|
1503
|
-
gcp_conn_id = bigquery_conn_id
|
|
1504
|
-
|
|
1505
|
-
super().__init__(**kwargs)
|
|
1506
|
-
|
|
1507
|
-
self.project_id = project_id
|
|
1508
|
-
self.dataset_id = dataset_id
|
|
1509
|
-
self.table_id = table_id
|
|
1510
|
-
self.schema_fields = schema_fields
|
|
1511
|
-
self.gcs_schema_object = gcs_schema_object
|
|
1512
|
-
self.gcp_conn_id = gcp_conn_id
|
|
1513
|
-
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
|
|
1514
|
-
self.time_partitioning = time_partitioning or {}
|
|
1515
|
-
self.labels = labels
|
|
1516
|
-
self.view = view
|
|
1517
|
-
self.materialized_view = materialized_view
|
|
1518
|
-
self.encryption_configuration = encryption_configuration
|
|
1519
|
-
self.location = location
|
|
1520
|
-
self.cluster_fields = cluster_fields
|
|
1521
|
-
self.table_resource = table_resource
|
|
1522
|
-
self.impersonation_chain = impersonation_chain
|
|
1523
|
-
self._table: Table | None = None
|
|
1524
|
-
if exists_ok is not None:
|
|
1525
|
-
warnings.warn(
|
|
1526
|
-
"`exists_ok` parameter is deprecated, please use `if_exists`",
|
|
1527
|
-
AirflowProviderDeprecationWarning,
|
|
1528
|
-
stacklevel=2,
|
|
1529
|
-
)
|
|
1530
|
-
self.if_exists = IfExistAction.IGNORE if exists_ok else IfExistAction.LOG
|
|
1531
|
-
else:
|
|
1532
|
-
self.if_exists = IfExistAction(if_exists)
|
|
1533
|
-
|
|
1534
|
-
def execute(self, context: Context) -> None:
|
|
1535
|
-
bq_hook = BigQueryHook(
|
|
1536
|
-
gcp_conn_id=self.gcp_conn_id,
|
|
1537
|
-
location=self.location,
|
|
1538
|
-
impersonation_chain=self.impersonation_chain,
|
|
1539
|
-
)
|
|
1540
|
-
|
|
1541
|
-
if not self.schema_fields and self.gcs_schema_object:
|
|
1542
|
-
gcs_bucket, gcs_object = _parse_gcs_url(self.gcs_schema_object)
|
|
1543
|
-
gcs_hook = GCSHook(
|
|
1544
|
-
gcp_conn_id=self.google_cloud_storage_conn_id,
|
|
1545
|
-
impersonation_chain=self.impersonation_chain,
|
|
1546
|
-
)
|
|
1547
|
-
schema_fields_string = gcs_hook.download_as_byte_array(gcs_bucket, gcs_object).decode("utf-8")
|
|
1548
|
-
schema_fields = json.loads(schema_fields_string)
|
|
1549
|
-
else:
|
|
1550
|
-
schema_fields = self.schema_fields
|
|
1551
|
-
|
|
1552
|
-
try:
|
|
1553
|
-
self.log.info("Creating table")
|
|
1554
|
-
# Save table as attribute for further use by OpenLineage
|
|
1555
|
-
self._table = bq_hook.create_empty_table(
|
|
1556
|
-
project_id=self.project_id,
|
|
1557
|
-
dataset_id=self.dataset_id,
|
|
1558
|
-
table_id=self.table_id,
|
|
1559
|
-
schema_fields=schema_fields,
|
|
1560
|
-
time_partitioning=self.time_partitioning,
|
|
1561
|
-
cluster_fields=self.cluster_fields,
|
|
1562
|
-
labels=self.labels,
|
|
1563
|
-
view=self.view,
|
|
1564
|
-
materialized_view=self.materialized_view,
|
|
1565
|
-
encryption_configuration=self.encryption_configuration,
|
|
1566
|
-
table_resource=self.table_resource,
|
|
1567
|
-
exists_ok=self.if_exists == IfExistAction.IGNORE,
|
|
1568
|
-
)
|
|
1569
|
-
if self._table:
|
|
1570
|
-
persist_kwargs = {
|
|
1571
|
-
"context": context,
|
|
1572
|
-
"task_instance": self,
|
|
1573
1329
|
"project_id": self._table.to_api_repr()["tableReference"]["projectId"],
|
|
1574
1330
|
"dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
|
|
1575
1331
|
"table_id": self._table.to_api_repr()["tableReference"]["tableId"],
|
|
@@ -1588,7 +1344,6 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
|
1588
1344
|
self.log.info(error_msg)
|
|
1589
1345
|
persist_kwargs = {
|
|
1590
1346
|
"context": context,
|
|
1591
|
-
"task_instance": self,
|
|
1592
1347
|
"project_id": self.project_id or bq_hook.project_id,
|
|
1593
1348
|
"dataset_id": self.dataset_id,
|
|
1594
1349
|
"table_id": self.table_id,
|
|
@@ -1622,326 +1377,6 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
|
1622
1377
|
return OperatorLineage(outputs=[output_dataset])
|
|
1623
1378
|
|
|
1624
1379
|
|
|
1625
|
-
@deprecated(
|
|
1626
|
-
planned_removal_date="July 30, 2025",
|
|
1627
|
-
use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
|
|
1628
|
-
category=AirflowProviderDeprecationWarning,
|
|
1629
|
-
)
|
|
1630
|
-
class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1631
|
-
"""
|
|
1632
|
-
Create a new external table with data from Google Cloud Storage.
|
|
1633
|
-
|
|
1634
|
-
The schema to be used for the BigQuery table may be specified in one of
|
|
1635
|
-
two ways. You may either directly pass the schema fields in, or you may
|
|
1636
|
-
point the operator to a Google Cloud Storage object name. The object in
|
|
1637
|
-
Google Cloud Storage must be a JSON file with the schema fields in it.
|
|
1638
|
-
|
|
1639
|
-
.. seealso::
|
|
1640
|
-
For more information on how to use this operator, take a look at the guide:
|
|
1641
|
-
:ref:`howto/operator:BigQueryCreateExternalTableOperator`
|
|
1642
|
-
|
|
1643
|
-
:param bucket: The bucket to point the external table to. (templated)
|
|
1644
|
-
:param source_objects: List of Google Cloud Storage URIs to point
|
|
1645
|
-
table to. If source_format is 'DATASTORE_BACKUP', the list must only contain a single URI.
|
|
1646
|
-
:param destination_project_dataset_table: The dotted ``(<project>.)<dataset>.<table>``
|
|
1647
|
-
BigQuery table to load data into (templated). If ``<project>`` is not included,
|
|
1648
|
-
project will be the project defined in the connection json.
|
|
1649
|
-
:param schema_fields: If set, the schema field list as defined here:
|
|
1650
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
|
1651
|
-
|
|
1652
|
-
**Example**::
|
|
1653
|
-
|
|
1654
|
-
schema_fields = [
|
|
1655
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
|
1656
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
|
1657
|
-
]
|
|
1658
|
-
|
|
1659
|
-
Should not be set when source_format is 'DATASTORE_BACKUP'.
|
|
1660
|
-
:param table_resource: Table resource as described in documentation:
|
|
1661
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
|
|
1662
|
-
If provided all other parameters are ignored. External schema from object will be resolved.
|
|
1663
|
-
:param schema_object: If set, a GCS object path pointing to a .json file that
|
|
1664
|
-
contains the schema for the table. (templated)
|
|
1665
|
-
:param gcs_schema_bucket: GCS bucket name where the schema JSON is stored (templated).
|
|
1666
|
-
The default value is self.bucket.
|
|
1667
|
-
:param source_format: File format of the data.
|
|
1668
|
-
:param autodetect: Try to detect schema and format options automatically.
|
|
1669
|
-
The schema_fields and schema_object options will be honored when specified explicitly.
|
|
1670
|
-
https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
|
|
1671
|
-
:param compression: (Optional) The compression type of the data source.
|
|
1672
|
-
Possible values include GZIP and NONE.
|
|
1673
|
-
The default value is NONE.
|
|
1674
|
-
This setting is ignored for Google Cloud Bigtable,
|
|
1675
|
-
Google Cloud Datastore backups and Avro formats.
|
|
1676
|
-
:param skip_leading_rows: Number of rows to skip when loading from a CSV.
|
|
1677
|
-
:param field_delimiter: The delimiter to use for the CSV.
|
|
1678
|
-
:param max_bad_records: The maximum number of bad records that BigQuery can
|
|
1679
|
-
ignore when running the job.
|
|
1680
|
-
:param quote_character: The value that is used to quote data sections in a CSV file.
|
|
1681
|
-
:param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false).
|
|
1682
|
-
:param allow_jagged_rows: Accept rows that are missing trailing optional columns.
|
|
1683
|
-
The missing values are treated as nulls. If false, records with missing trailing
|
|
1684
|
-
columns are treated as bad records, and if there are too many bad records, an
|
|
1685
|
-
invalid error is returned in the job result. Only applicable to CSV, ignored
|
|
1686
|
-
for other formats.
|
|
1687
|
-
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
|
|
1688
|
-
interact with the Bigquery service.
|
|
1689
|
-
:param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud
|
|
1690
|
-
and interact with the Google Cloud Storage service.
|
|
1691
|
-
:param src_fmt_configs: configure optional fields specific to the source format
|
|
1692
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
|
1693
|
-
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
|
1694
|
-
|
|
1695
|
-
.. code-block:: python
|
|
1696
|
-
|
|
1697
|
-
encryption_configuration = {
|
|
1698
|
-
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
|
1699
|
-
}
|
|
1700
|
-
:param location: The location used for the operation.
|
|
1701
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
|
1702
|
-
credentials, or chained list of accounts required to get the access_token
|
|
1703
|
-
of the last account in the list, which will be impersonated in the request.
|
|
1704
|
-
If set as a string, the account must grant the originating account
|
|
1705
|
-
the Service Account Token Creator IAM role.
|
|
1706
|
-
If set as a sequence, the identities from the list must grant
|
|
1707
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
1708
|
-
account from the list granting this role to the originating account (templated).
|
|
1709
|
-
"""
|
|
1710
|
-
|
|
1711
|
-
template_fields: Sequence[str] = (
|
|
1712
|
-
"bucket",
|
|
1713
|
-
"source_objects",
|
|
1714
|
-
"schema_object",
|
|
1715
|
-
"gcs_schema_bucket",
|
|
1716
|
-
"destination_project_dataset_table",
|
|
1717
|
-
"labels",
|
|
1718
|
-
"table_resource",
|
|
1719
|
-
"impersonation_chain",
|
|
1720
|
-
)
|
|
1721
|
-
template_fields_renderers = {"table_resource": "json"}
|
|
1722
|
-
ui_color = BigQueryUIColors.TABLE.value
|
|
1723
|
-
operator_extra_links = (BigQueryTableLink(),)
|
|
1724
|
-
|
|
1725
|
-
def __init__(
|
|
1726
|
-
self,
|
|
1727
|
-
*,
|
|
1728
|
-
bucket: str | None = None,
|
|
1729
|
-
source_objects: list[str] | None = None,
|
|
1730
|
-
destination_project_dataset_table: str | None = None,
|
|
1731
|
-
table_resource: dict[str, Any] | None = None,
|
|
1732
|
-
schema_fields: list | None = None,
|
|
1733
|
-
schema_object: str | None = None,
|
|
1734
|
-
gcs_schema_bucket: str | None = None,
|
|
1735
|
-
source_format: str | None = None,
|
|
1736
|
-
autodetect: bool = False,
|
|
1737
|
-
compression: str | None = None,
|
|
1738
|
-
skip_leading_rows: int | None = None,
|
|
1739
|
-
field_delimiter: str | None = None,
|
|
1740
|
-
max_bad_records: int = 0,
|
|
1741
|
-
quote_character: str | None = None,
|
|
1742
|
-
allow_quoted_newlines: bool = False,
|
|
1743
|
-
allow_jagged_rows: bool = False,
|
|
1744
|
-
gcp_conn_id: str = "google_cloud_default",
|
|
1745
|
-
google_cloud_storage_conn_id: str = "google_cloud_default",
|
|
1746
|
-
src_fmt_configs: dict | None = None,
|
|
1747
|
-
labels: dict | None = None,
|
|
1748
|
-
encryption_configuration: dict | None = None,
|
|
1749
|
-
location: str | None = None,
|
|
1750
|
-
impersonation_chain: str | Sequence[str] | None = None,
|
|
1751
|
-
bigquery_conn_id: str | None = None,
|
|
1752
|
-
**kwargs,
|
|
1753
|
-
) -> None:
|
|
1754
|
-
if bigquery_conn_id:
|
|
1755
|
-
warnings.warn(
|
|
1756
|
-
"The bigquery_conn_id parameter has been deprecated. Use the gcp_conn_id parameter instead.",
|
|
1757
|
-
AirflowProviderDeprecationWarning,
|
|
1758
|
-
stacklevel=2,
|
|
1759
|
-
)
|
|
1760
|
-
gcp_conn_id = bigquery_conn_id
|
|
1761
|
-
|
|
1762
|
-
super().__init__(**kwargs)
|
|
1763
|
-
|
|
1764
|
-
self.table_resource = table_resource
|
|
1765
|
-
self.bucket = bucket or ""
|
|
1766
|
-
self.source_objects = source_objects or []
|
|
1767
|
-
self.schema_object = schema_object or None
|
|
1768
|
-
self.gcs_schema_bucket = gcs_schema_bucket or ""
|
|
1769
|
-
self.destination_project_dataset_table = destination_project_dataset_table or ""
|
|
1770
|
-
|
|
1771
|
-
# BQ config
|
|
1772
|
-
kwargs_passed = any(
|
|
1773
|
-
[
|
|
1774
|
-
destination_project_dataset_table,
|
|
1775
|
-
schema_fields,
|
|
1776
|
-
source_format,
|
|
1777
|
-
compression,
|
|
1778
|
-
skip_leading_rows,
|
|
1779
|
-
field_delimiter,
|
|
1780
|
-
max_bad_records,
|
|
1781
|
-
autodetect,
|
|
1782
|
-
quote_character,
|
|
1783
|
-
allow_quoted_newlines,
|
|
1784
|
-
allow_jagged_rows,
|
|
1785
|
-
src_fmt_configs,
|
|
1786
|
-
labels,
|
|
1787
|
-
encryption_configuration,
|
|
1788
|
-
]
|
|
1789
|
-
)
|
|
1790
|
-
|
|
1791
|
-
if not table_resource:
|
|
1792
|
-
warnings.warn(
|
|
1793
|
-
"Passing table parameters via keywords arguments will be deprecated. "
|
|
1794
|
-
"Please provide table definition using `table_resource` parameter.",
|
|
1795
|
-
AirflowProviderDeprecationWarning,
|
|
1796
|
-
stacklevel=2,
|
|
1797
|
-
)
|
|
1798
|
-
if not bucket:
|
|
1799
|
-
raise ValueError("`bucket` is required when not using `table_resource`.")
|
|
1800
|
-
if not gcs_schema_bucket:
|
|
1801
|
-
gcs_schema_bucket = bucket
|
|
1802
|
-
if not source_objects:
|
|
1803
|
-
raise ValueError("`source_objects` is required when not using `table_resource`.")
|
|
1804
|
-
if not source_format:
|
|
1805
|
-
source_format = "CSV"
|
|
1806
|
-
if not compression:
|
|
1807
|
-
compression = "NONE"
|
|
1808
|
-
if not skip_leading_rows:
|
|
1809
|
-
skip_leading_rows = 0
|
|
1810
|
-
if not field_delimiter:
|
|
1811
|
-
field_delimiter = ","
|
|
1812
|
-
if not destination_project_dataset_table:
|
|
1813
|
-
raise ValueError(
|
|
1814
|
-
"`destination_project_dataset_table` is required when not using `table_resource`."
|
|
1815
|
-
)
|
|
1816
|
-
self.bucket = bucket
|
|
1817
|
-
self.source_objects = source_objects
|
|
1818
|
-
self.schema_object = schema_object
|
|
1819
|
-
self.gcs_schema_bucket = gcs_schema_bucket
|
|
1820
|
-
self.destination_project_dataset_table = destination_project_dataset_table
|
|
1821
|
-
self.schema_fields = schema_fields
|
|
1822
|
-
self.source_format = source_format
|
|
1823
|
-
self.compression = compression
|
|
1824
|
-
self.skip_leading_rows = skip_leading_rows
|
|
1825
|
-
self.field_delimiter = field_delimiter
|
|
1826
|
-
self.table_resource = None
|
|
1827
|
-
else:
|
|
1828
|
-
pass
|
|
1829
|
-
|
|
1830
|
-
if table_resource and kwargs_passed:
|
|
1831
|
-
raise ValueError("You provided both `table_resource` and exclusive keywords arguments.")
|
|
1832
|
-
|
|
1833
|
-
self.max_bad_records = max_bad_records
|
|
1834
|
-
self.quote_character = quote_character
|
|
1835
|
-
self.allow_quoted_newlines = allow_quoted_newlines
|
|
1836
|
-
self.allow_jagged_rows = allow_jagged_rows
|
|
1837
|
-
self.gcp_conn_id = gcp_conn_id
|
|
1838
|
-
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
|
|
1839
|
-
self.autodetect = autodetect
|
|
1840
|
-
|
|
1841
|
-
self.src_fmt_configs = src_fmt_configs or {}
|
|
1842
|
-
self.labels = labels
|
|
1843
|
-
self.encryption_configuration = encryption_configuration
|
|
1844
|
-
self.location = location
|
|
1845
|
-
self.impersonation_chain = impersonation_chain
|
|
1846
|
-
self._table: Table | None = None
|
|
1847
|
-
|
|
1848
|
-
def execute(self, context: Context) -> None:
|
|
1849
|
-
bq_hook = BigQueryHook(
|
|
1850
|
-
gcp_conn_id=self.gcp_conn_id,
|
|
1851
|
-
location=self.location,
|
|
1852
|
-
impersonation_chain=self.impersonation_chain,
|
|
1853
|
-
)
|
|
1854
|
-
if self.table_resource:
|
|
1855
|
-
# Save table as attribute for further use by OpenLineage
|
|
1856
|
-
self._table = bq_hook.create_empty_table(
|
|
1857
|
-
table_resource=self.table_resource,
|
|
1858
|
-
)
|
|
1859
|
-
if self._table:
|
|
1860
|
-
BigQueryTableLink.persist(
|
|
1861
|
-
context=context,
|
|
1862
|
-
task_instance=self,
|
|
1863
|
-
dataset_id=self._table.dataset_id,
|
|
1864
|
-
project_id=self._table.project,
|
|
1865
|
-
table_id=self._table.table_id,
|
|
1866
|
-
)
|
|
1867
|
-
return
|
|
1868
|
-
|
|
1869
|
-
if not self.schema_fields and self.schema_object and self.source_format != "DATASTORE_BACKUP":
|
|
1870
|
-
gcs_hook = GCSHook(
|
|
1871
|
-
gcp_conn_id=self.google_cloud_storage_conn_id,
|
|
1872
|
-
impersonation_chain=self.impersonation_chain,
|
|
1873
|
-
)
|
|
1874
|
-
schema_fields = json.loads(
|
|
1875
|
-
gcs_hook.download(self.gcs_schema_bucket, self.schema_object).decode("utf-8")
|
|
1876
|
-
)
|
|
1877
|
-
else:
|
|
1878
|
-
schema_fields = self.schema_fields
|
|
1879
|
-
|
|
1880
|
-
source_uris = [f"gs://{self.bucket}/{source_object}" for source_object in self.source_objects]
|
|
1881
|
-
|
|
1882
|
-
project_id, dataset_id, table_id = bq_hook.split_tablename(
|
|
1883
|
-
table_input=self.destination_project_dataset_table,
|
|
1884
|
-
default_project_id=bq_hook.project_id or "",
|
|
1885
|
-
)
|
|
1886
|
-
|
|
1887
|
-
external_data_configuration = {
|
|
1888
|
-
"source_uris": source_uris,
|
|
1889
|
-
"source_format": self.source_format,
|
|
1890
|
-
"autodetect": self.autodetect,
|
|
1891
|
-
"compression": self.compression,
|
|
1892
|
-
"maxBadRecords": self.max_bad_records,
|
|
1893
|
-
}
|
|
1894
|
-
if self.source_format == "CSV":
|
|
1895
|
-
external_data_configuration["csvOptions"] = {
|
|
1896
|
-
"fieldDelimiter": self.field_delimiter,
|
|
1897
|
-
"skipLeadingRows": self.skip_leading_rows,
|
|
1898
|
-
"quote": self.quote_character,
|
|
1899
|
-
"allowQuotedNewlines": self.allow_quoted_newlines,
|
|
1900
|
-
"allowJaggedRows": self.allow_jagged_rows,
|
|
1901
|
-
}
|
|
1902
|
-
|
|
1903
|
-
table_resource = {
|
|
1904
|
-
"tableReference": {
|
|
1905
|
-
"projectId": project_id,
|
|
1906
|
-
"datasetId": dataset_id,
|
|
1907
|
-
"tableId": table_id,
|
|
1908
|
-
},
|
|
1909
|
-
"labels": self.labels,
|
|
1910
|
-
"schema": {"fields": schema_fields},
|
|
1911
|
-
"externalDataConfiguration": external_data_configuration,
|
|
1912
|
-
"location": self.location,
|
|
1913
|
-
"encryptionConfiguration": self.encryption_configuration,
|
|
1914
|
-
}
|
|
1915
|
-
|
|
1916
|
-
# Save table as attribute for further use by OpenLineage
|
|
1917
|
-
self._table = bq_hook.create_empty_table(table_resource=table_resource)
|
|
1918
|
-
if self._table:
|
|
1919
|
-
BigQueryTableLink.persist(
|
|
1920
|
-
context=context,
|
|
1921
|
-
task_instance=self,
|
|
1922
|
-
dataset_id=self._table.dataset_id,
|
|
1923
|
-
project_id=self._table.project,
|
|
1924
|
-
table_id=self._table.table_id,
|
|
1925
|
-
)
|
|
1926
|
-
|
|
1927
|
-
def get_openlineage_facets_on_complete(self, _):
|
|
1928
|
-
"""Implement _on_complete as we will use table resource returned by create method."""
|
|
1929
|
-
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
1930
|
-
from airflow.providers.google.cloud.openlineage.utils import (
|
|
1931
|
-
BIGQUERY_NAMESPACE,
|
|
1932
|
-
get_facets_from_bq_table,
|
|
1933
|
-
)
|
|
1934
|
-
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
1935
|
-
|
|
1936
|
-
output_dataset = Dataset(
|
|
1937
|
-
namespace=BIGQUERY_NAMESPACE,
|
|
1938
|
-
name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
|
|
1939
|
-
facets=get_facets_from_bq_table(self._table),
|
|
1940
|
-
)
|
|
1941
|
-
|
|
1942
|
-
return OperatorLineage(outputs=[output_dataset])
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
1380
|
class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
1946
1381
|
"""
|
|
1947
1382
|
Delete an existing dataset from your Project in BigQuery.
|
|
@@ -1983,6 +1418,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
|
1983
1418
|
template_fields: Sequence[str] = (
|
|
1984
1419
|
"dataset_id",
|
|
1985
1420
|
"project_id",
|
|
1421
|
+
"gcp_conn_id",
|
|
1986
1422
|
"impersonation_chain",
|
|
1987
1423
|
)
|
|
1988
1424
|
ui_color = BigQueryUIColors.DATASET.value
|
|
@@ -2062,6 +1498,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2062
1498
|
"dataset_id",
|
|
2063
1499
|
"project_id",
|
|
2064
1500
|
"dataset_reference",
|
|
1501
|
+
"gcp_conn_id",
|
|
2065
1502
|
"impersonation_chain",
|
|
2066
1503
|
)
|
|
2067
1504
|
template_fields_renderers = {"dataset_reference": "json"}
|
|
@@ -2116,7 +1553,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2116
1553
|
)
|
|
2117
1554
|
persist_kwargs = {
|
|
2118
1555
|
"context": context,
|
|
2119
|
-
"task_instance": self,
|
|
2120
1556
|
"project_id": dataset["datasetReference"]["projectId"],
|
|
2121
1557
|
"dataset_id": dataset["datasetReference"]["datasetId"],
|
|
2122
1558
|
}
|
|
@@ -2128,7 +1564,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2128
1564
|
)
|
|
2129
1565
|
persist_kwargs = {
|
|
2130
1566
|
"context": context,
|
|
2131
|
-
"task_instance": self,
|
|
2132
1567
|
"project_id": project_id,
|
|
2133
1568
|
"dataset_id": dataset_id,
|
|
2134
1569
|
}
|
|
@@ -2168,6 +1603,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2168
1603
|
template_fields: Sequence[str] = (
|
|
2169
1604
|
"dataset_id",
|
|
2170
1605
|
"project_id",
|
|
1606
|
+
"gcp_conn_id",
|
|
2171
1607
|
"impersonation_chain",
|
|
2172
1608
|
)
|
|
2173
1609
|
ui_color = BigQueryUIColors.DATASET.value
|
|
@@ -2200,7 +1636,6 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2200
1636
|
dataset_api_repr = dataset.to_api_repr()
|
|
2201
1637
|
BigQueryDatasetLink.persist(
|
|
2202
1638
|
context=context,
|
|
2203
|
-
task_instance=self,
|
|
2204
1639
|
dataset_id=dataset_api_repr["datasetReference"]["datasetId"],
|
|
2205
1640
|
project_id=dataset_api_repr["datasetReference"]["projectId"],
|
|
2206
1641
|
)
|
|
@@ -2233,6 +1668,7 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
|
|
|
2233
1668
|
template_fields: Sequence[str] = (
|
|
2234
1669
|
"dataset_id",
|
|
2235
1670
|
"project_id",
|
|
1671
|
+
"gcp_conn_id",
|
|
2236
1672
|
"impersonation_chain",
|
|
2237
1673
|
)
|
|
2238
1674
|
ui_color = BigQueryUIColors.DATASET.value
|
|
@@ -2303,6 +1739,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
|
2303
1739
|
"dataset_id",
|
|
2304
1740
|
"table_id",
|
|
2305
1741
|
"project_id",
|
|
1742
|
+
"gcp_conn_id",
|
|
2306
1743
|
"impersonation_chain",
|
|
2307
1744
|
)
|
|
2308
1745
|
template_fields_renderers = {"table_resource": "json"}
|
|
@@ -2349,7 +1786,6 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
|
2349
1786
|
if self._table:
|
|
2350
1787
|
BigQueryTableLink.persist(
|
|
2351
1788
|
context=context,
|
|
2352
|
-
task_instance=self,
|
|
2353
1789
|
dataset_id=self._table["tableReference"]["datasetId"],
|
|
2354
1790
|
project_id=self._table["tableReference"]["projectId"],
|
|
2355
1791
|
table_id=self._table["tableReference"]["tableId"],
|
|
@@ -2410,6 +1846,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2410
1846
|
template_fields: Sequence[str] = (
|
|
2411
1847
|
"dataset_id",
|
|
2412
1848
|
"project_id",
|
|
1849
|
+
"gcp_conn_id",
|
|
2413
1850
|
"impersonation_chain",
|
|
2414
1851
|
)
|
|
2415
1852
|
template_fields_renderers = {"dataset_resource": "json"}
|
|
@@ -2452,7 +1889,6 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
|
2452
1889
|
dataset_api_repr = dataset.to_api_repr()
|
|
2453
1890
|
BigQueryDatasetLink.persist(
|
|
2454
1891
|
context=context,
|
|
2455
|
-
task_instance=self,
|
|
2456
1892
|
dataset_id=dataset_api_repr["datasetReference"]["datasetId"],
|
|
2457
1893
|
project_id=dataset_api_repr["datasetReference"]["projectId"],
|
|
2458
1894
|
)
|
|
@@ -2486,6 +1922,7 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
|
|
|
2486
1922
|
|
|
2487
1923
|
template_fields: Sequence[str] = (
|
|
2488
1924
|
"deletion_dataset_table",
|
|
1925
|
+
"gcp_conn_id",
|
|
2489
1926
|
"impersonation_chain",
|
|
2490
1927
|
)
|
|
2491
1928
|
ui_color = BigQueryUIColors.TABLE.value
|
|
@@ -2580,6 +2017,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
|
2580
2017
|
template_fields: Sequence[str] = (
|
|
2581
2018
|
"dataset_id",
|
|
2582
2019
|
"table_resource",
|
|
2020
|
+
"gcp_conn_id",
|
|
2583
2021
|
"impersonation_chain",
|
|
2584
2022
|
"project_id",
|
|
2585
2023
|
)
|
|
@@ -2624,7 +2062,6 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
|
2624
2062
|
if self._table:
|
|
2625
2063
|
BigQueryTableLink.persist(
|
|
2626
2064
|
context=context,
|
|
2627
|
-
task_instance=self,
|
|
2628
2065
|
dataset_id=self._table["tableReference"]["datasetId"],
|
|
2629
2066
|
project_id=self._table["tableReference"]["projectId"],
|
|
2630
2067
|
table_id=self._table["tableReference"]["tableId"],
|
|
@@ -2708,6 +2145,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
|
2708
2145
|
"dataset_id",
|
|
2709
2146
|
"table_id",
|
|
2710
2147
|
"project_id",
|
|
2148
|
+
"gcp_conn_id",
|
|
2711
2149
|
"impersonation_chain",
|
|
2712
2150
|
)
|
|
2713
2151
|
template_fields_renderers = {"schema_fields_updates": "json"}
|
|
@@ -2754,7 +2192,6 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
|
2754
2192
|
if self._table:
|
|
2755
2193
|
BigQueryTableLink.persist(
|
|
2756
2194
|
context=context,
|
|
2757
|
-
task_instance=self,
|
|
2758
2195
|
dataset_id=self._table["tableReference"]["datasetId"],
|
|
2759
2196
|
project_id=self._table["tableReference"]["projectId"],
|
|
2760
2197
|
table_id=self._table["tableReference"]["tableId"],
|
|
@@ -2838,6 +2275,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
2838
2275
|
template_fields: Sequence[str] = (
|
|
2839
2276
|
"configuration",
|
|
2840
2277
|
"job_id",
|
|
2278
|
+
"gcp_conn_id",
|
|
2841
2279
|
"impersonation_chain",
|
|
2842
2280
|
"project_id",
|
|
2843
2281
|
)
|
|
@@ -2949,8 +2387,9 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
2949
2387
|
job_id=self.job_id,
|
|
2950
2388
|
dag_id=self.dag_id,
|
|
2951
2389
|
task_id=self.task_id,
|
|
2952
|
-
logical_date=
|
|
2390
|
+
logical_date=None,
|
|
2953
2391
|
configuration=self.configuration,
|
|
2392
|
+
run_after=hook.get_run_after_or_logical_date(context),
|
|
2954
2393
|
force_rerun=self.force_rerun,
|
|
2955
2394
|
)
|
|
2956
2395
|
|
|
@@ -3000,7 +2439,6 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
3000
2439
|
table = job_configuration[job_type][table_prop]
|
|
3001
2440
|
persist_kwargs = {
|
|
3002
2441
|
"context": context,
|
|
3003
|
-
"task_instance": self,
|
|
3004
2442
|
"project_id": self.project_id,
|
|
3005
2443
|
"table_id": table,
|
|
3006
2444
|
}
|
|
@@ -3014,7 +2452,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
3014
2452
|
|
|
3015
2453
|
if self.project_id:
|
|
3016
2454
|
job_id_path = convert_job_id(
|
|
3017
|
-
job_id=self.job_id,
|
|
2455
|
+
job_id=self.job_id,
|
|
3018
2456
|
project_id=self.project_id,
|
|
3019
2457
|
location=self.location,
|
|
3020
2458
|
)
|
|
@@ -3022,7 +2460,6 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
3022
2460
|
|
|
3023
2461
|
persist_kwargs = {
|
|
3024
2462
|
"context": context,
|
|
3025
|
-
"task_instance": self,
|
|
3026
2463
|
"project_id": self.project_id,
|
|
3027
2464
|
"location": self.location,
|
|
3028
2465
|
"job_id": self.job_id,
|