apache-airflow-providers-google 12.0.0rc2__py3-none-any.whl → 13.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/LICENSE +0 -52
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +27 -13
- airflow/providers/google/ads/transfers/ads_to_gcs.py +18 -4
- airflow/providers/google/assets/bigquery.py +17 -0
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +2 -3
- airflow/providers/google/cloud/hooks/alloy_db.py +736 -8
- airflow/providers/google/cloud/hooks/automl.py +10 -4
- airflow/providers/google/cloud/hooks/bigquery.py +125 -22
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +2 -3
- airflow/providers/google/cloud/hooks/cloud_batch.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_build.py +4 -5
- airflow/providers/google/cloud/hooks/cloud_composer.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_run.py +3 -4
- airflow/providers/google/cloud/hooks/cloud_sql.py +7 -3
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +119 -7
- airflow/providers/google/cloud/hooks/compute.py +3 -3
- airflow/providers/google/cloud/hooks/datacatalog.py +3 -4
- airflow/providers/google/cloud/hooks/dataflow.py +12 -12
- airflow/providers/google/cloud/hooks/dataform.py +2 -3
- airflow/providers/google/cloud/hooks/datafusion.py +2 -2
- airflow/providers/google/cloud/hooks/dataplex.py +1032 -11
- airflow/providers/google/cloud/hooks/dataproc.py +4 -5
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +3 -4
- airflow/providers/google/cloud/hooks/dlp.py +3 -4
- airflow/providers/google/cloud/hooks/gcs.py +7 -6
- airflow/providers/google/cloud/hooks/kms.py +2 -3
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +8 -8
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +482 -0
- airflow/providers/google/cloud/hooks/natural_language.py +2 -3
- airflow/providers/google/cloud/hooks/os_login.py +2 -3
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +2 -3
- airflow/providers/google/cloud/hooks/spanner.py +2 -2
- airflow/providers/google/cloud/hooks/speech_to_text.py +2 -3
- airflow/providers/google/cloud/hooks/stackdriver.py +4 -4
- airflow/providers/google/cloud/hooks/tasks.py +3 -4
- airflow/providers/google/cloud/hooks/text_to_speech.py +2 -3
- airflow/providers/google/cloud/hooks/translate.py +236 -5
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +9 -4
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +4 -5
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -181
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -3
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +3 -4
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +2 -3
- airflow/providers/google/cloud/hooks/video_intelligence.py +2 -3
- airflow/providers/google/cloud/hooks/vision.py +3 -4
- airflow/providers/google/cloud/hooks/workflows.py +2 -3
- airflow/providers/google/cloud/links/alloy_db.py +46 -0
- airflow/providers/google/cloud/links/bigquery.py +25 -0
- airflow/providers/google/cloud/links/dataplex.py +172 -2
- airflow/providers/google/cloud/links/kubernetes_engine.py +1 -2
- airflow/providers/google/cloud/links/managed_kafka.py +104 -0
- airflow/providers/google/cloud/links/translate.py +28 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +3 -3
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +11 -10
- airflow/providers/google/cloud/openlineage/facets.py +67 -0
- airflow/providers/google/cloud/openlineage/mixins.py +438 -173
- airflow/providers/google/cloud/openlineage/utils.py +394 -61
- airflow/providers/google/cloud/operators/alloy_db.py +980 -69
- airflow/providers/google/cloud/operators/automl.py +83 -245
- airflow/providers/google/cloud/operators/bigquery.py +377 -74
- airflow/providers/google/cloud/operators/bigquery_dts.py +126 -13
- airflow/providers/google/cloud/operators/bigtable.py +1 -3
- airflow/providers/google/cloud/operators/cloud_base.py +1 -2
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -4
- airflow/providers/google/cloud/operators/cloud_build.py +3 -5
- airflow/providers/google/cloud/operators/cloud_composer.py +5 -7
- airflow/providers/google/cloud/operators/cloud_memorystore.py +4 -6
- airflow/providers/google/cloud/operators/cloud_run.py +6 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +20 -8
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +62 -8
- airflow/providers/google/cloud/operators/compute.py +3 -4
- airflow/providers/google/cloud/operators/datacatalog.py +9 -11
- airflow/providers/google/cloud/operators/dataflow.py +1 -112
- airflow/providers/google/cloud/operators/dataform.py +3 -5
- airflow/providers/google/cloud/operators/datafusion.py +1 -1
- airflow/providers/google/cloud/operators/dataplex.py +2046 -7
- airflow/providers/google/cloud/operators/dataproc.py +102 -17
- airflow/providers/google/cloud/operators/dataproc_metastore.py +7 -9
- airflow/providers/google/cloud/operators/dlp.py +17 -19
- airflow/providers/google/cloud/operators/gcs.py +14 -17
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +788 -0
- airflow/providers/google/cloud/operators/natural_language.py +3 -5
- airflow/providers/google/cloud/operators/pubsub.py +39 -7
- airflow/providers/google/cloud/operators/speech_to_text.py +3 -5
- airflow/providers/google/cloud/operators/stackdriver.py +3 -5
- airflow/providers/google/cloud/operators/tasks.py +4 -6
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -4
- airflow/providers/google/cloud/operators/translate.py +414 -5
- airflow/providers/google/cloud/operators/translate_speech.py +2 -4
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +9 -8
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -8
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -322
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +4 -6
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -6
- airflow/providers/google/cloud/operators/video_intelligence.py +3 -5
- airflow/providers/google/cloud/operators/vision.py +4 -6
- airflow/providers/google/cloud/operators/workflows.py +5 -7
- airflow/providers/google/cloud/secrets/secret_manager.py +1 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/sensors/bigtable.py +2 -3
- airflow/providers/google/cloud/sensors/cloud_composer.py +32 -8
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +39 -1
- airflow/providers/google/cloud/sensors/dataplex.py +4 -6
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -2
- airflow/providers/google/cloud/sensors/gcs.py +2 -4
- airflow/providers/google/cloud/sensors/pubsub.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +3 -5
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +5 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +10 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +36 -4
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +27 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +34 -5
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +15 -0
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +25 -2
- airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_batch.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +13 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +102 -4
- airflow/providers/google/cloud/triggers/dataflow.py +2 -3
- airflow/providers/google/cloud/triggers/dataplex.py +1 -2
- airflow/providers/google/cloud/triggers/dataproc.py +2 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +1 -2
- airflow/providers/google/cloud/triggers/vertex_ai.py +7 -8
- airflow/providers/google/cloud/utils/credentials_provider.py +15 -8
- airflow/providers/google/cloud/utils/external_token_supplier.py +1 -0
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/consts.py +1 -2
- airflow/providers/google/common/hooks/base_google.py +8 -7
- airflow/providers/google/get_provider_info.py +186 -134
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +5 -7
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/METADATA +41 -58
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/RECORD +157 -159
- airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +0 -141
- airflow/providers/google/cloud/example_dags/example_looker.py +0 -64
- airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +0 -194
- airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +0 -129
- airflow/providers/google/marketing_platform/example_dags/__init__.py +0 -16
- airflow/providers/google/marketing_platform/example_dags/example_display_video.py +0 -213
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-12.0.0rc2.dist-info → apache_airflow_providers_google-13.0.0.dist-info}/entry_points.txt +0 -0
@@ -27,10 +27,6 @@ from collections.abc import Sequence
|
|
27
27
|
from functools import cached_property
|
28
28
|
from typing import TYPE_CHECKING, Any, SupportsAbs
|
29
29
|
|
30
|
-
from google.api_core.exceptions import Conflict
|
31
|
-
from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
|
32
|
-
from google.cloud.bigquery.table import RowIterator
|
33
|
-
|
34
30
|
from airflow.configuration import conf
|
35
31
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
|
36
32
|
from airflow.providers.common.sql.operators.sql import ( # type: ignore[attr-defined] # for _parse_boolean
|
@@ -43,8 +39,12 @@ from airflow.providers.common.sql.operators.sql import ( # type: ignore[attr-de
|
|
43
39
|
)
|
44
40
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
45
41
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
46
|
-
from airflow.providers.google.cloud.links.bigquery import
|
47
|
-
|
42
|
+
from airflow.providers.google.cloud.links.bigquery import (
|
43
|
+
BigQueryDatasetLink,
|
44
|
+
BigQueryJobDetailLink,
|
45
|
+
BigQueryTableLink,
|
46
|
+
)
|
47
|
+
from airflow.providers.google.cloud.openlineage.mixins import _BigQueryInsertJobOperatorOpenLineageMixin
|
48
48
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
49
49
|
from airflow.providers.google.cloud.triggers.bigquery import (
|
50
50
|
BigQueryCheckTrigger,
|
@@ -54,15 +54,19 @@ from airflow.providers.google.cloud.triggers.bigquery import (
|
|
54
54
|
BigQueryValueCheckTrigger,
|
55
55
|
)
|
56
56
|
from airflow.providers.google.cloud.utils.bigquery import convert_job_id
|
57
|
+
from airflow.providers.google.common.deprecated import deprecated
|
57
58
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
58
59
|
from airflow.utils.helpers import exactly_one
|
60
|
+
from google.api_core.exceptions import Conflict
|
61
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
62
|
+
from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
|
63
|
+
from google.cloud.bigquery.table import RowIterator, Table, TableListItem, TableReference
|
59
64
|
|
60
65
|
if TYPE_CHECKING:
|
66
|
+
from airflow.utils.context import Context
|
61
67
|
from google.api_core.retry import Retry
|
62
68
|
from google.cloud.bigquery import UnknownJob
|
63
69
|
|
64
|
-
from airflow.utils.context import Context
|
65
|
-
|
66
70
|
|
67
71
|
BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
|
68
72
|
|
@@ -1158,6 +1162,185 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncrypt
|
|
1158
1162
|
return event["records"]
|
1159
1163
|
|
1160
1164
|
|
1165
|
+
class BigQueryCreateTableOperator(GoogleCloudBaseOperator):
|
1166
|
+
"""
|
1167
|
+
Creates a new table in the specified BigQuery dataset, optionally with schema.
|
1168
|
+
|
1169
|
+
The schema to be used for the BigQuery table may be specified in one of
|
1170
|
+
two ways. You may either directly pass the schema fields in, or you may
|
1171
|
+
point the operator to a Google Cloud Storage object name. The object in
|
1172
|
+
Google Cloud Storage must be a JSON file with the schema fields in it.
|
1173
|
+
You can also create a table without schema.
|
1174
|
+
|
1175
|
+
.. seealso::
|
1176
|
+
For more information on how to use this operator, take a look at the guide:
|
1177
|
+
:ref:`howto/operator:BigQueryCreateTableOperator`
|
1178
|
+
|
1179
|
+
:param project_id: Optional. The project to create the table into.
|
1180
|
+
:param dataset_id: Required. The dataset to create the table into.
|
1181
|
+
:param table_id: Required. The Name of the table to be created.
|
1182
|
+
:param table_resource: Required. Table resource as described in documentation:
|
1183
|
+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
|
1184
|
+
If ``table`` is a reference, an empty table is created with the specified ID. The dataset that
|
1185
|
+
the table belongs to must already exist.
|
1186
|
+
:param if_exists: Optional. What should Airflow do if the table exists. If set to `log`,
|
1187
|
+
the TI will be passed to success and an error message will be logged. Set to `ignore` to ignore
|
1188
|
+
the error, set to `fail` to fail the TI, and set to `skip` to skip it.
|
1189
|
+
:param gcs_schema_object: Optional. Full path to the JSON file containing schema. For
|
1190
|
+
example: ``gs://test-bucket/dir1/dir2/employee_schema.json``
|
1191
|
+
:param gcp_conn_id: Optional. The connection ID used to connect to Google Cloud and
|
1192
|
+
interact with the Bigquery service.
|
1193
|
+
:param google_cloud_storage_conn_id: Optional. The connection ID used to connect to Google Cloud.
|
1194
|
+
and interact with the Google Cloud Storage service.
|
1195
|
+
:param location: Optional. The location used for the operation.
|
1196
|
+
:param retry: Optional. A retry object used to retry requests. If `None` is specified, requests
|
1197
|
+
will not be retried.
|
1198
|
+
:param timeout: Optional. The amount of time, in seconds, to wait for the request to complete.
|
1199
|
+
Note that if `retry` is specified, the timeout applies to each individual attempt.
|
1200
|
+
:param impersonation_chain: Optional. Service account to impersonate using short-term
|
1201
|
+
credentials, or chained list of accounts required to get the access_token
|
1202
|
+
of the last account in the list, which will be impersonated in the request.
|
1203
|
+
If set as a string, the account must grant the originating account
|
1204
|
+
the Service Account Token Creator IAM role.
|
1205
|
+
If set as a sequence, the identities from the list must grant
|
1206
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1207
|
+
account from the list granting this role to the originating account.
|
1208
|
+
"""
|
1209
|
+
|
1210
|
+
template_fields: Sequence[str] = (
|
1211
|
+
"dataset_id",
|
1212
|
+
"table_id",
|
1213
|
+
"table_resource",
|
1214
|
+
"project_id",
|
1215
|
+
"gcs_schema_object",
|
1216
|
+
"impersonation_chain",
|
1217
|
+
)
|
1218
|
+
template_fields_renderers = {"table_resource": "json"}
|
1219
|
+
ui_color = BigQueryUIColors.TABLE.value
|
1220
|
+
operator_extra_links = (BigQueryTableLink(),)
|
1221
|
+
|
1222
|
+
def __init__(
|
1223
|
+
self,
|
1224
|
+
*,
|
1225
|
+
dataset_id: str,
|
1226
|
+
table_id: str,
|
1227
|
+
table_resource: dict[str, Any] | Table | TableReference | TableListItem,
|
1228
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1229
|
+
location: str | None = None,
|
1230
|
+
gcs_schema_object: str | None = None,
|
1231
|
+
gcp_conn_id: str = "google_cloud_default",
|
1232
|
+
google_cloud_storage_conn_id: str = "google_cloud_default",
|
1233
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
1234
|
+
if_exists: str = "log",
|
1235
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
1236
|
+
timeout: float | None = None,
|
1237
|
+
**kwargs,
|
1238
|
+
) -> None:
|
1239
|
+
super().__init__(**kwargs)
|
1240
|
+
self.project_id = project_id
|
1241
|
+
self.location = location
|
1242
|
+
self.dataset_id = dataset_id
|
1243
|
+
self.table_id = table_id
|
1244
|
+
self.table_resource = table_resource
|
1245
|
+
self.if_exists = IfExistAction(if_exists)
|
1246
|
+
self.gcs_schema_object = gcs_schema_object
|
1247
|
+
self.gcp_conn_id = gcp_conn_id
|
1248
|
+
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
|
1249
|
+
self.impersonation_chain = impersonation_chain
|
1250
|
+
self.retry = retry
|
1251
|
+
self.timeout = timeout
|
1252
|
+
self._table: Table | None = None
|
1253
|
+
|
1254
|
+
def execute(self, context: Context) -> None:
|
1255
|
+
bq_hook = BigQueryHook(
|
1256
|
+
gcp_conn_id=self.gcp_conn_id,
|
1257
|
+
location=self.location,
|
1258
|
+
impersonation_chain=self.impersonation_chain,
|
1259
|
+
)
|
1260
|
+
if self.gcs_schema_object:
|
1261
|
+
gcs_bucket, gcs_object = _parse_gcs_url(self.gcs_schema_object)
|
1262
|
+
gcs_hook = GCSHook(
|
1263
|
+
gcp_conn_id=self.google_cloud_storage_conn_id,
|
1264
|
+
impersonation_chain=self.impersonation_chain,
|
1265
|
+
)
|
1266
|
+
schema_fields_string = gcs_hook.download_as_byte_array(gcs_bucket, gcs_object).decode("utf-8")
|
1267
|
+
schema_fields = json.loads(schema_fields_string)
|
1268
|
+
else:
|
1269
|
+
schema_fields = None
|
1270
|
+
|
1271
|
+
try:
|
1272
|
+
self.log.info("Creating table...")
|
1273
|
+
self._table = bq_hook.create_table(
|
1274
|
+
project_id=self.project_id,
|
1275
|
+
dataset_id=self.dataset_id,
|
1276
|
+
table_id=self.table_id,
|
1277
|
+
schema_fields=schema_fields,
|
1278
|
+
table_resource=self.table_resource,
|
1279
|
+
exists_ok=self.if_exists == IfExistAction.IGNORE,
|
1280
|
+
timeout=self.timeout,
|
1281
|
+
location=self.location,
|
1282
|
+
)
|
1283
|
+
if self._table:
|
1284
|
+
persist_kwargs = {
|
1285
|
+
"context": context,
|
1286
|
+
"task_instance": self,
|
1287
|
+
"project_id": self._table.to_api_repr()["tableReference"]["projectId"],
|
1288
|
+
"dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
|
1289
|
+
"table_id": self._table.to_api_repr()["tableReference"]["tableId"],
|
1290
|
+
}
|
1291
|
+
self.log.info(
|
1292
|
+
"Table %s.%s.%s created successfully",
|
1293
|
+
self._table.project,
|
1294
|
+
self._table.dataset_id,
|
1295
|
+
self._table.table_id,
|
1296
|
+
)
|
1297
|
+
else:
|
1298
|
+
raise AirflowException("Table creation failed.")
|
1299
|
+
except Conflict:
|
1300
|
+
error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
|
1301
|
+
if self.if_exists == IfExistAction.LOG:
|
1302
|
+
self.log.info(error_msg)
|
1303
|
+
persist_kwargs = {
|
1304
|
+
"context": context,
|
1305
|
+
"task_instance": self,
|
1306
|
+
"project_id": self.project_id or bq_hook.project_id,
|
1307
|
+
"dataset_id": self.dataset_id,
|
1308
|
+
"table_id": self.table_id,
|
1309
|
+
}
|
1310
|
+
elif self.if_exists == IfExistAction.FAIL:
|
1311
|
+
raise AirflowException(error_msg)
|
1312
|
+
else:
|
1313
|
+
raise AirflowSkipException(error_msg)
|
1314
|
+
|
1315
|
+
BigQueryTableLink.persist(**persist_kwargs)
|
1316
|
+
|
1317
|
+
def get_openlineage_facets_on_complete(self, _):
|
1318
|
+
"""Implement _on_complete as we will use table resource returned by create method."""
|
1319
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
1320
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
1321
|
+
BIGQUERY_NAMESPACE,
|
1322
|
+
get_facets_from_bq_table,
|
1323
|
+
)
|
1324
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
1325
|
+
|
1326
|
+
if not self._table:
|
1327
|
+
self.log.debug("OpenLineage did not find `self._table` attribute.")
|
1328
|
+
return OperatorLineage()
|
1329
|
+
|
1330
|
+
output_dataset = Dataset(
|
1331
|
+
namespace=BIGQUERY_NAMESPACE,
|
1332
|
+
name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
|
1333
|
+
facets=get_facets_from_bq_table(self._table),
|
1334
|
+
)
|
1335
|
+
|
1336
|
+
return OperatorLineage(outputs=[output_dataset])
|
1337
|
+
|
1338
|
+
|
1339
|
+
@deprecated(
|
1340
|
+
planned_removal_date="July 30, 2025",
|
1341
|
+
use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
|
1342
|
+
category=AirflowProviderDeprecationWarning,
|
1343
|
+
)
|
1161
1344
|
class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
1162
1345
|
"""
|
1163
1346
|
Creates a new table in the specified BigQuery dataset, optionally with schema.
|
@@ -1335,6 +1518,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1335
1518
|
self.cluster_fields = cluster_fields
|
1336
1519
|
self.table_resource = table_resource
|
1337
1520
|
self.impersonation_chain = impersonation_chain
|
1521
|
+
self._table: Table | None = None
|
1338
1522
|
if exists_ok is not None:
|
1339
1523
|
warnings.warn(
|
1340
1524
|
"`exists_ok` parameter is deprecated, please use `if_exists`",
|
@@ -1365,6 +1549,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1365
1549
|
|
1366
1550
|
try:
|
1367
1551
|
self.log.info("Creating table")
|
1552
|
+
# Save table as attribute for further use by OpenLineage
|
1368
1553
|
self._table = bq_hook.create_empty_table(
|
1369
1554
|
project_id=self.project_id,
|
1370
1555
|
dataset_id=self.dataset_id,
|
@@ -1379,19 +1564,22 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1379
1564
|
table_resource=self.table_resource,
|
1380
1565
|
exists_ok=self.if_exists == IfExistAction.IGNORE,
|
1381
1566
|
)
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1567
|
+
if self._table:
|
1568
|
+
persist_kwargs = {
|
1569
|
+
"context": context,
|
1570
|
+
"task_instance": self,
|
1571
|
+
"project_id": self._table.to_api_repr()["tableReference"]["projectId"],
|
1572
|
+
"dataset_id": self._table.to_api_repr()["tableReference"]["datasetId"],
|
1573
|
+
"table_id": self._table.to_api_repr()["tableReference"]["tableId"],
|
1574
|
+
}
|
1575
|
+
self.log.info(
|
1576
|
+
"Table %s.%s.%s created successfully",
|
1577
|
+
self._table.project,
|
1578
|
+
self._table.dataset_id,
|
1579
|
+
self._table.table_id,
|
1580
|
+
)
|
1581
|
+
else:
|
1582
|
+
raise AirflowException("Table creation failed.")
|
1395
1583
|
except Conflict:
|
1396
1584
|
error_msg = f"Table {self.dataset_id}.{self.table_id} already exists."
|
1397
1585
|
if self.if_exists == IfExistAction.LOG:
|
@@ -1410,7 +1598,8 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1410
1598
|
|
1411
1599
|
BigQueryTableLink.persist(**persist_kwargs)
|
1412
1600
|
|
1413
|
-
def get_openlineage_facets_on_complete(self,
|
1601
|
+
def get_openlineage_facets_on_complete(self, _):
|
1602
|
+
"""Implement _on_complete as we will use table resource returned by create method."""
|
1414
1603
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
1415
1604
|
from airflow.providers.google.cloud.openlineage.utils import (
|
1416
1605
|
BIGQUERY_NAMESPACE,
|
@@ -1418,17 +1607,24 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1418
1607
|
)
|
1419
1608
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
1420
1609
|
|
1421
|
-
|
1422
|
-
|
1610
|
+
if not self._table:
|
1611
|
+
self.log.debug("OpenLineage did not find `self._table` attribute.")
|
1612
|
+
return OperatorLineage()
|
1613
|
+
|
1423
1614
|
output_dataset = Dataset(
|
1424
1615
|
namespace=BIGQUERY_NAMESPACE,
|
1425
|
-
name=table_id,
|
1616
|
+
name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
|
1426
1617
|
facets=get_facets_from_bq_table(self._table),
|
1427
1618
|
)
|
1428
1619
|
|
1429
1620
|
return OperatorLineage(outputs=[output_dataset])
|
1430
1621
|
|
1431
1622
|
|
1623
|
+
@deprecated(
|
1624
|
+
planned_removal_date="July 30, 2025",
|
1625
|
+
use_instead="airflow.providers.google.cloud.operators.bigquery.BigQueryCreateTableOperator",
|
1626
|
+
category=AirflowProviderDeprecationWarning,
|
1627
|
+
)
|
1432
1628
|
class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
1433
1629
|
"""
|
1434
1630
|
Create a new external table with data from Google Cloud Storage.
|
@@ -1645,6 +1841,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1645
1841
|
self.encryption_configuration = encryption_configuration
|
1646
1842
|
self.location = location
|
1647
1843
|
self.impersonation_chain = impersonation_chain
|
1844
|
+
self._table: Table | None = None
|
1648
1845
|
|
1649
1846
|
def execute(self, context: Context) -> None:
|
1650
1847
|
bq_hook = BigQueryHook(
|
@@ -1653,16 +1850,18 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1653
1850
|
impersonation_chain=self.impersonation_chain,
|
1654
1851
|
)
|
1655
1852
|
if self.table_resource:
|
1853
|
+
# Save table as attribute for further use by OpenLineage
|
1656
1854
|
self._table = bq_hook.create_empty_table(
|
1657
1855
|
table_resource=self.table_resource,
|
1658
1856
|
)
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1857
|
+
if self._table:
|
1858
|
+
BigQueryTableLink.persist(
|
1859
|
+
context=context,
|
1860
|
+
task_instance=self,
|
1861
|
+
dataset_id=self._table.dataset_id,
|
1862
|
+
project_id=self._table.project,
|
1863
|
+
table_id=self._table.table_id,
|
1864
|
+
)
|
1666
1865
|
return
|
1667
1866
|
|
1668
1867
|
if not self.schema_fields and self.schema_object and self.source_format != "DATASTORE_BACKUP":
|
@@ -1712,19 +1911,19 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1712
1911
|
"encryptionConfiguration": self.encryption_configuration,
|
1713
1912
|
}
|
1714
1913
|
|
1715
|
-
|
1716
|
-
|
1717
|
-
|
1718
|
-
|
1719
|
-
|
1720
|
-
|
1721
|
-
|
1722
|
-
|
1723
|
-
|
1724
|
-
|
1725
|
-
)
|
1914
|
+
# Save table as attribute for further use by OpenLineage
|
1915
|
+
self._table = bq_hook.create_empty_table(table_resource=table_resource)
|
1916
|
+
if self._table:
|
1917
|
+
BigQueryTableLink.persist(
|
1918
|
+
context=context,
|
1919
|
+
task_instance=self,
|
1920
|
+
dataset_id=self._table.dataset_id,
|
1921
|
+
project_id=self._table.project,
|
1922
|
+
table_id=self._table.table_id,
|
1923
|
+
)
|
1726
1924
|
|
1727
|
-
def get_openlineage_facets_on_complete(self,
|
1925
|
+
def get_openlineage_facets_on_complete(self, _):
|
1926
|
+
"""Implement _on_complete as we will use table resource returned by create method."""
|
1728
1927
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
1729
1928
|
from airflow.providers.google.cloud.openlineage.utils import (
|
1730
1929
|
BIGQUERY_NAMESPACE,
|
@@ -1732,11 +1931,9 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1732
1931
|
)
|
1733
1932
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
1734
1933
|
|
1735
|
-
table_info = self._table.to_api_repr()["tableReference"]
|
1736
|
-
table_id = ".".join((table_info["projectId"], table_info["datasetId"], table_info["tableId"]))
|
1737
1934
|
output_dataset = Dataset(
|
1738
1935
|
namespace=BIGQUERY_NAMESPACE,
|
1739
|
-
name=table_id,
|
1936
|
+
name=f"{self._table.project}.{self._table.dataset_id}.{self._table.table_id}",
|
1740
1937
|
facets=get_facets_from_bq_table(self._table),
|
1741
1938
|
)
|
1742
1939
|
|
@@ -2129,6 +2326,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
2129
2326
|
self.gcp_conn_id = gcp_conn_id
|
2130
2327
|
self.table_resource = table_resource
|
2131
2328
|
self.impersonation_chain = impersonation_chain
|
2329
|
+
self._table: dict | None = None
|
2132
2330
|
super().__init__(**kwargs)
|
2133
2331
|
|
2134
2332
|
def execute(self, context: Context):
|
@@ -2137,7 +2335,8 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
2137
2335
|
impersonation_chain=self.impersonation_chain,
|
2138
2336
|
)
|
2139
2337
|
|
2140
|
-
table
|
2338
|
+
# Save table as attribute for further use by OpenLineage
|
2339
|
+
self._table = bq_hook.update_table(
|
2141
2340
|
table_resource=self.table_resource,
|
2142
2341
|
fields=self.fields,
|
2143
2342
|
dataset_id=self.dataset_id,
|
@@ -2145,15 +2344,34 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
2145
2344
|
project_id=self.project_id,
|
2146
2345
|
)
|
2147
2346
|
|
2148
|
-
|
2149
|
-
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2153
|
-
|
2347
|
+
if self._table:
|
2348
|
+
BigQueryTableLink.persist(
|
2349
|
+
context=context,
|
2350
|
+
task_instance=self,
|
2351
|
+
dataset_id=self._table["tableReference"]["datasetId"],
|
2352
|
+
project_id=self._table["tableReference"]["projectId"],
|
2353
|
+
table_id=self._table["tableReference"]["tableId"],
|
2354
|
+
)
|
2355
|
+
|
2356
|
+
return self._table
|
2357
|
+
|
2358
|
+
def get_openlineage_facets_on_complete(self, _):
|
2359
|
+
"""Implement _on_complete as we will use table resource returned by update method."""
|
2360
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
2361
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
2362
|
+
BIGQUERY_NAMESPACE,
|
2363
|
+
get_facets_from_bq_table,
|
2364
|
+
)
|
2365
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
2366
|
+
|
2367
|
+
table = Table.from_api_repr(self._table)
|
2368
|
+
output_dataset = Dataset(
|
2369
|
+
namespace=BIGQUERY_NAMESPACE,
|
2370
|
+
name=f"{table.project}.{table.dataset_id}.{table.table_id}",
|
2371
|
+
facets=get_facets_from_bq_table(table),
|
2154
2372
|
)
|
2155
2373
|
|
2156
|
-
return
|
2374
|
+
return OperatorLineage(outputs=[output_dataset])
|
2157
2375
|
|
2158
2376
|
|
2159
2377
|
class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
@@ -2287,15 +2505,47 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
|
|
2287
2505
|
self.ignore_if_missing = ignore_if_missing
|
2288
2506
|
self.location = location
|
2289
2507
|
self.impersonation_chain = impersonation_chain
|
2508
|
+
self.hook: BigQueryHook | None = None
|
2290
2509
|
|
2291
2510
|
def execute(self, context: Context) -> None:
|
2292
2511
|
self.log.info("Deleting: %s", self.deletion_dataset_table)
|
2293
|
-
hook
|
2512
|
+
# Save hook as attribute for further use by OpenLineage
|
2513
|
+
self.hook = BigQueryHook(
|
2294
2514
|
gcp_conn_id=self.gcp_conn_id,
|
2295
2515
|
location=self.location,
|
2296
2516
|
impersonation_chain=self.impersonation_chain,
|
2297
2517
|
)
|
2298
|
-
hook.delete_table(table_id=self.deletion_dataset_table, not_found_ok=self.ignore_if_missing)
|
2518
|
+
self.hook.delete_table(table_id=self.deletion_dataset_table, not_found_ok=self.ignore_if_missing)
|
2519
|
+
|
2520
|
+
def get_openlineage_facets_on_complete(self, _):
|
2521
|
+
"""Implement _on_complete as we need default project_id from hook."""
|
2522
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
2523
|
+
Dataset,
|
2524
|
+
LifecycleStateChange,
|
2525
|
+
LifecycleStateChangeDatasetFacet,
|
2526
|
+
PreviousIdentifier,
|
2527
|
+
)
|
2528
|
+
from airflow.providers.google.cloud.openlineage.utils import BIGQUERY_NAMESPACE
|
2529
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
2530
|
+
|
2531
|
+
bq_table_id = str(
|
2532
|
+
TableReference.from_string(self.deletion_dataset_table, default_project=self.hook.project_id)
|
2533
|
+
)
|
2534
|
+
ds = Dataset(
|
2535
|
+
namespace=BIGQUERY_NAMESPACE,
|
2536
|
+
name=bq_table_id,
|
2537
|
+
facets={
|
2538
|
+
"lifecycleStateChange": LifecycleStateChangeDatasetFacet(
|
2539
|
+
lifecycleStateChange=LifecycleStateChange.DROP.value,
|
2540
|
+
previousIdentifier=PreviousIdentifier(
|
2541
|
+
namespace=BIGQUERY_NAMESPACE,
|
2542
|
+
name=bq_table_id,
|
2543
|
+
),
|
2544
|
+
)
|
2545
|
+
},
|
2546
|
+
)
|
2547
|
+
|
2548
|
+
return OperatorLineage(inputs=[ds])
|
2299
2549
|
|
2300
2550
|
|
2301
2551
|
class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
@@ -2354,6 +2604,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
2354
2604
|
self.gcp_conn_id = gcp_conn_id
|
2355
2605
|
self.location = location
|
2356
2606
|
self.impersonation_chain = impersonation_chain
|
2607
|
+
self._table: dict | None = None
|
2357
2608
|
|
2358
2609
|
def execute(self, context: Context) -> None:
|
2359
2610
|
self.log.info("Upserting Dataset: %s with table_resource: %s", self.dataset_id, self.table_resource)
|
@@ -2362,18 +2613,38 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
2362
2613
|
location=self.location,
|
2363
2614
|
impersonation_chain=self.impersonation_chain,
|
2364
2615
|
)
|
2365
|
-
table
|
2616
|
+
# Save table as attribute for further use by OpenLineage
|
2617
|
+
self._table = hook.run_table_upsert(
|
2366
2618
|
dataset_id=self.dataset_id,
|
2367
2619
|
table_resource=self.table_resource,
|
2368
2620
|
project_id=self.project_id,
|
2369
2621
|
)
|
2370
|
-
|
2371
|
-
|
2372
|
-
|
2373
|
-
|
2374
|
-
|
2375
|
-
|
2622
|
+
if self._table:
|
2623
|
+
BigQueryTableLink.persist(
|
2624
|
+
context=context,
|
2625
|
+
task_instance=self,
|
2626
|
+
dataset_id=self._table["tableReference"]["datasetId"],
|
2627
|
+
project_id=self._table["tableReference"]["projectId"],
|
2628
|
+
table_id=self._table["tableReference"]["tableId"],
|
2629
|
+
)
|
2630
|
+
|
2631
|
+
def get_openlineage_facets_on_complete(self, _):
|
2632
|
+
"""Implement _on_complete as we will use table resource returned by upsert method."""
|
2633
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
2634
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
2635
|
+
BIGQUERY_NAMESPACE,
|
2636
|
+
get_facets_from_bq_table,
|
2376
2637
|
)
|
2638
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
2639
|
+
|
2640
|
+
table = Table.from_api_repr(self._table)
|
2641
|
+
output_dataset = Dataset(
|
2642
|
+
namespace=BIGQUERY_NAMESPACE,
|
2643
|
+
name=f"{table.project}.{table.dataset_id}.{table.table_id}",
|
2644
|
+
facets=get_facets_from_bq_table(table),
|
2645
|
+
)
|
2646
|
+
|
2647
|
+
return OperatorLineage(outputs=[output_dataset])
|
2377
2648
|
|
2378
2649
|
|
2379
2650
|
class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
@@ -2462,6 +2733,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2462
2733
|
self.gcp_conn_id = gcp_conn_id
|
2463
2734
|
self.impersonation_chain = impersonation_chain
|
2464
2735
|
self.location = location
|
2736
|
+
self._table: dict | None = None
|
2465
2737
|
super().__init__(**kwargs)
|
2466
2738
|
|
2467
2739
|
def execute(self, context: Context):
|
@@ -2469,25 +2741,44 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2469
2741
|
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, location=self.location
|
2470
2742
|
)
|
2471
2743
|
|
2472
|
-
table
|
2744
|
+
# Save table as attribute for further use by OpenLineage
|
2745
|
+
self._table = bq_hook.update_table_schema(
|
2473
2746
|
schema_fields_updates=self.schema_fields_updates,
|
2474
2747
|
include_policy_tags=self.include_policy_tags,
|
2475
2748
|
dataset_id=self.dataset_id,
|
2476
2749
|
table_id=self.table_id,
|
2477
2750
|
project_id=self.project_id,
|
2478
2751
|
)
|
2752
|
+
if self._table:
|
2753
|
+
BigQueryTableLink.persist(
|
2754
|
+
context=context,
|
2755
|
+
task_instance=self,
|
2756
|
+
dataset_id=self._table["tableReference"]["datasetId"],
|
2757
|
+
project_id=self._table["tableReference"]["projectId"],
|
2758
|
+
table_id=self._table["tableReference"]["tableId"],
|
2759
|
+
)
|
2760
|
+
return self._table
|
2479
2761
|
|
2480
|
-
|
2481
|
-
|
2482
|
-
|
2483
|
-
|
2484
|
-
|
2485
|
-
|
2762
|
+
def get_openlineage_facets_on_complete(self, _):
|
2763
|
+
"""Implement _on_complete as we will use table resource returned by update method."""
|
2764
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
2765
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
2766
|
+
BIGQUERY_NAMESPACE,
|
2767
|
+
get_facets_from_bq_table,
|
2768
|
+
)
|
2769
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
2770
|
+
|
2771
|
+
table = Table.from_api_repr(self._table)
|
2772
|
+
output_dataset = Dataset(
|
2773
|
+
namespace=BIGQUERY_NAMESPACE,
|
2774
|
+
name=f"{table.project}.{table.dataset_id}.{table.table_id}",
|
2775
|
+
facets=get_facets_from_bq_table(table),
|
2486
2776
|
)
|
2487
|
-
|
2777
|
+
|
2778
|
+
return OperatorLineage(outputs=[output_dataset])
|
2488
2779
|
|
2489
2780
|
|
2490
|
-
class BigQueryInsertJobOperator(GoogleCloudBaseOperator,
|
2781
|
+
class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOperatorOpenLineageMixin):
|
2491
2782
|
"""
|
2492
2783
|
Execute a BigQuery job.
|
2493
2784
|
|
@@ -2554,7 +2845,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2554
2845
|
)
|
2555
2846
|
template_fields_renderers = {"configuration": "json", "configuration.query.query": "sql"}
|
2556
2847
|
ui_color = BigQueryUIColors.QUERY.value
|
2557
|
-
operator_extra_links = (BigQueryTableLink(),)
|
2848
|
+
operator_extra_links = (BigQueryTableLink(), BigQueryJobDetailLink())
|
2558
2849
|
|
2559
2850
|
def __init__(
|
2560
2851
|
self,
|
@@ -2664,6 +2955,8 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2664
2955
|
try:
|
2665
2956
|
self.log.info("Executing: %s'", self.configuration)
|
2666
2957
|
# Create a job
|
2958
|
+
if self.job_id is None:
|
2959
|
+
raise ValueError("job_id cannot be None")
|
2667
2960
|
job: BigQueryJob | UnknownJob = self._submit_job(hook, self.job_id)
|
2668
2961
|
except Conflict:
|
2669
2962
|
# If the job already exists retrieve it
|
@@ -2726,6 +3019,15 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2726
3019
|
)
|
2727
3020
|
context["ti"].xcom_push(key="job_id_path", value=job_id_path)
|
2728
3021
|
|
3022
|
+
persist_kwargs = {
|
3023
|
+
"context": context,
|
3024
|
+
"task_instance": self,
|
3025
|
+
"project_id": self.project_id,
|
3026
|
+
"location": self.location,
|
3027
|
+
"job_id": self.job_id,
|
3028
|
+
}
|
3029
|
+
BigQueryJobDetailLink.persist(**persist_kwargs)
|
3030
|
+
|
2729
3031
|
# Wait for the job to complete
|
2730
3032
|
if not self.deferrable:
|
2731
3033
|
job.result(timeout=self.result_timeout, retry=self.result_retry)
|
@@ -2749,6 +3051,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2749
3051
|
)
|
2750
3052
|
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
2751
3053
|
self._handle_job_error(job)
|
3054
|
+
return self.job_id
|
2752
3055
|
|
2753
3056
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> str | None:
|
2754
3057
|
"""
|