apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ from google.api_core.retry import Retry
|
|
29
29
|
from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob
|
30
30
|
from google.cloud.bigquery.table import RowIterator
|
31
31
|
|
32
|
+
from airflow.configuration import conf
|
32
33
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
|
33
34
|
from airflow.models import BaseOperator, BaseOperatorLink
|
34
35
|
from airflow.models.xcom import XCom
|
@@ -51,6 +52,7 @@ from airflow.providers.google.cloud.triggers.bigquery import (
|
|
51
52
|
BigQueryIntervalCheckTrigger,
|
52
53
|
BigQueryValueCheckTrigger,
|
53
54
|
)
|
55
|
+
from airflow.providers.google.cloud.utils.bigquery import convert_job_id
|
54
56
|
|
55
57
|
if TYPE_CHECKING:
|
56
58
|
from google.cloud.bigquery import UnknownJob
|
@@ -90,8 +92,8 @@ class BigQueryConsoleLink(BaseOperatorLink):
|
|
90
92
|
*,
|
91
93
|
ti_key: TaskInstanceKey,
|
92
94
|
):
|
93
|
-
|
94
|
-
return BIGQUERY_JOB_DETAILS_LINK_FMT.format(job_id=
|
95
|
+
job_id_path = XCom.get_value(key="job_id_path", ti_key=ti_key)
|
96
|
+
return BIGQUERY_JOB_DETAILS_LINK_FMT.format(job_id=job_id_path) if job_id_path else ""
|
95
97
|
|
96
98
|
|
97
99
|
@attr.s(auto_attribs=True)
|
@@ -110,7 +112,7 @@ class BigQueryConsoleIndexableLink(BaseOperatorLink):
|
|
110
112
|
*,
|
111
113
|
ti_key: TaskInstanceKey,
|
112
114
|
):
|
113
|
-
job_ids = XCom.get_value(key="
|
115
|
+
job_ids = XCom.get_value(key="job_id_path", ti_key=ti_key)
|
114
116
|
if not job_ids:
|
115
117
|
return None
|
116
118
|
if len(job_ids) < self.index:
|
@@ -132,17 +134,17 @@ class _BigQueryDbHookMixin:
|
|
132
134
|
|
133
135
|
|
134
136
|
class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
135
|
-
"""
|
136
|
-
|
137
|
-
a
|
138
|
-
|
139
|
-
|
137
|
+
"""Performs checks against BigQuery.
|
138
|
+
|
139
|
+
This operator expects a SQL query that returns a single row. Each value on
|
140
|
+
that row is evaluated using a Python ``bool`` cast. If any of the values
|
141
|
+
is falsy, the check errors out.
|
140
142
|
|
141
143
|
.. seealso::
|
142
144
|
For more information on how to use this operator, take a look at the guide:
|
143
145
|
:ref:`howto/operator:BigQueryCheckOperator`
|
144
146
|
|
145
|
-
Note that Python bool casting evals the following as
|
147
|
+
Note that Python bool casting evals the following as *False*:
|
146
148
|
|
147
149
|
* ``False``
|
148
150
|
* ``0``
|
@@ -151,36 +153,34 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
151
153
|
* Empty dictionary or set (``{}``)
|
152
154
|
|
153
155
|
Given a query like ``SELECT COUNT(*) FROM foo``, it will fail only if
|
154
|
-
the count
|
155
|
-
for instance, check that the table has the same number of rows as
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
This operator can be used as a data quality check in your pipeline
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
:param
|
167
|
-
:param
|
168
|
-
:param use_legacy_sql: Whether to use legacy SQL (true)
|
169
|
-
or standard SQL (false).
|
156
|
+
the count equals to zero. You can craft much more complex query that could,
|
157
|
+
for instance, check that the table has the same number of rows as the source
|
158
|
+
table upstream, or that the count of today's partition is greater than
|
159
|
+
yesterday's partition, or that a set of metrics are less than three standard
|
160
|
+
deviation for the 7-day average.
|
161
|
+
|
162
|
+
This operator can be used as a data quality check in your pipeline.
|
163
|
+
Depending on where you put it in your DAG, you have the choice to stop the
|
164
|
+
critical path, preventing from publishing dubious data, or on the side and
|
165
|
+
receive email alerts without stopping the progress of the DAG.
|
166
|
+
|
167
|
+
:param sql: SQL to execute.
|
168
|
+
:param gcp_conn_id: Connection ID for Google Cloud.
|
169
|
+
:param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false).
|
170
170
|
:param location: The geographic location of the job. See details at:
|
171
171
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
172
|
-
:param impersonation_chain: Optional service account to impersonate using
|
173
|
-
credentials, or chained list of accounts required to get the
|
174
|
-
of the last account in the list, which will be impersonated
|
175
|
-
If set as a string, the account must grant the
|
176
|
-
the Service Account Token Creator IAM role.
|
177
|
-
|
178
|
-
|
179
|
-
account from the list granting this role to the originating account (templated)
|
180
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
181
|
-
:param deferrable: Run operator in the deferrable mode
|
182
|
-
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
183
|
-
|
172
|
+
:param impersonation_chain: Optional service account to impersonate using
|
173
|
+
short-term credentials, or chained list of accounts required to get the
|
174
|
+
access token of the last account in the list, which will be impersonated
|
175
|
+
in the request. If set as a string, the account must grant the
|
176
|
+
originating account the Service Account Token Creator IAM role. If set
|
177
|
+
as a sequence, the identities from the list must grant Service Account
|
178
|
+
Token Creator IAM role to the directly preceding identity, with first
|
179
|
+
account from the list granting this role to the originating account. (templated)
|
180
|
+
:param labels: a dictionary containing labels for the table, passed to BigQuery.
|
181
|
+
:param deferrable: Run operator in the deferrable mode.
|
182
|
+
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
183
|
+
check for the status of job.
|
184
184
|
"""
|
185
185
|
|
186
186
|
template_fields: Sequence[str] = (
|
@@ -201,7 +201,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
201
201
|
location: str | None = None,
|
202
202
|
impersonation_chain: str | Sequence[str] | None = None,
|
203
203
|
labels: dict | None = None,
|
204
|
-
deferrable: bool = False,
|
204
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
205
205
|
poll_interval: float = 4.0,
|
206
206
|
**kwargs,
|
207
207
|
) -> None:
|
@@ -254,10 +254,10 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
254
254
|
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
255
255
|
|
256
256
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
|
257
|
-
"""
|
258
|
-
|
259
|
-
|
260
|
-
successful.
|
257
|
+
"""Callback for when the trigger fires.
|
258
|
+
|
259
|
+
This returns immediately. It relies on trigger to throw an exception,
|
260
|
+
otherwise it assumes execution was successful.
|
261
261
|
"""
|
262
262
|
if event["status"] == "error":
|
263
263
|
raise AirflowException(event["message"])
|
@@ -274,31 +274,30 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
274
274
|
|
275
275
|
|
276
276
|
class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
277
|
-
"""
|
278
|
-
Performs a simple value check using sql code.
|
277
|
+
"""Perform a simple value check using sql code.
|
279
278
|
|
280
279
|
.. seealso::
|
281
280
|
For more information on how to use this operator, take a look at the guide:
|
282
281
|
:ref:`howto/operator:BigQueryValueCheckOperator`
|
283
282
|
|
284
|
-
:param sql:
|
283
|
+
:param sql: SQL to execute.
|
285
284
|
:param use_legacy_sql: Whether to use legacy SQL (true)
|
286
285
|
or standard SQL (false).
|
287
286
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
288
287
|
:param location: The geographic location of the job. See details at:
|
289
288
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
290
|
-
:param impersonation_chain: Optional service account to impersonate using
|
291
|
-
credentials, or chained list of accounts required to get the
|
292
|
-
of the last account in the list, which will be impersonated
|
293
|
-
If set as a string, the account must grant the
|
294
|
-
the Service Account Token Creator IAM role.
|
295
|
-
|
296
|
-
|
297
|
-
account from the list granting this role to the originating account (templated)
|
298
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
299
|
-
:param deferrable: Run operator in the deferrable mode
|
300
|
-
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
301
|
-
|
289
|
+
:param impersonation_chain: Optional service account to impersonate using
|
290
|
+
short-term credentials, or chained list of accounts required to get the
|
291
|
+
access token of the last account in the list, which will be impersonated
|
292
|
+
in the request. If set as a string, the account must grant the
|
293
|
+
originating account the Service Account Token Creator IAM role. If set
|
294
|
+
as a sequence, the identities from the list must grant Service Account
|
295
|
+
Token Creator IAM role to the directly preceding identity, with first
|
296
|
+
account from the list granting this role to the originating account. (templated)
|
297
|
+
:param labels: a dictionary containing labels for the table, passed to BigQuery.
|
298
|
+
:param deferrable: Run operator in the deferrable mode.
|
299
|
+
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
300
|
+
check for the status of job.
|
302
301
|
"""
|
303
302
|
|
304
303
|
template_fields: Sequence[str] = (
|
@@ -322,7 +321,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
322
321
|
location: str | None = None,
|
323
322
|
impersonation_chain: str | Sequence[str] | None = None,
|
324
323
|
labels: dict | None = None,
|
325
|
-
deferrable: bool = False,
|
324
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
326
325
|
poll_interval: float = 4.0,
|
327
326
|
**kwargs,
|
328
327
|
) -> None:
|
@@ -364,25 +363,33 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
364
363
|
|
365
364
|
job = self._submit_job(hook, job_id="")
|
366
365
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
366
|
+
if job.running():
|
367
|
+
self.defer(
|
368
|
+
timeout=self.execution_timeout,
|
369
|
+
trigger=BigQueryValueCheckTrigger(
|
370
|
+
conn_id=self.gcp_conn_id,
|
371
|
+
job_id=job.job_id,
|
372
|
+
project_id=hook.project_id,
|
373
|
+
sql=self.sql,
|
374
|
+
pass_value=self.pass_value,
|
375
|
+
tolerance=self.tol,
|
376
|
+
poll_interval=self.poll_interval,
|
377
|
+
),
|
378
|
+
method_name="execute_complete",
|
379
|
+
)
|
380
|
+
self._handle_job_error(job)
|
381
|
+
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
382
|
+
|
383
|
+
@staticmethod
|
384
|
+
def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
|
385
|
+
if job.error_result:
|
386
|
+
raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
|
380
387
|
|
381
388
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
|
382
|
-
"""
|
383
|
-
|
384
|
-
|
385
|
-
successful.
|
389
|
+
"""Callback for when the trigger fires.
|
390
|
+
|
391
|
+
This returns immediately. It relies on trigger to throw an exception,
|
392
|
+
otherwise it assumes execution was successful.
|
386
393
|
"""
|
387
394
|
if event["status"] == "error":
|
388
395
|
raise AirflowException(event["message"])
|
@@ -454,7 +461,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
454
461
|
location: str | None = None,
|
455
462
|
impersonation_chain: str | Sequence[str] | None = None,
|
456
463
|
labels: dict | None = None,
|
457
|
-
deferrable: bool = False,
|
464
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
458
465
|
poll_interval: float = 4.0,
|
459
466
|
**kwargs,
|
460
467
|
) -> None:
|
@@ -522,10 +529,10 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
522
529
|
)
|
523
530
|
|
524
531
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
|
525
|
-
"""
|
526
|
-
|
527
|
-
|
528
|
-
successful.
|
532
|
+
"""Callback for when the trigger fires.
|
533
|
+
|
534
|
+
This returns immediately. It relies on trigger to throw an exception,
|
535
|
+
otherwise it assumes execution was successful.
|
529
536
|
"""
|
530
537
|
if event["status"] == "error":
|
531
538
|
raise AirflowException(event["message"])
|
@@ -789,7 +796,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
789
796
|
``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
|
790
797
|
the data would still be of the form ``'A,B'``.
|
791
798
|
|
792
|
-
**Example
|
799
|
+
**Example**::
|
793
800
|
|
794
801
|
get_data = BigQueryGetDataOperator(
|
795
802
|
task_id='get_data_from_bq',
|
@@ -848,7 +855,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
848
855
|
gcp_conn_id: str = "google_cloud_default",
|
849
856
|
location: str | None = None,
|
850
857
|
impersonation_chain: str | Sequence[str] | None = None,
|
851
|
-
deferrable: bool = False,
|
858
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
852
859
|
poll_interval: float = 4.0,
|
853
860
|
as_dict: bool = False,
|
854
861
|
use_legacy_sql: bool = True,
|
@@ -886,12 +893,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
886
893
|
)
|
887
894
|
|
888
895
|
def generate_query(self, hook: BigQueryHook) -> str:
|
889
|
-
"""
|
890
|
-
Generate a select query if selected fields are given or with *
|
891
|
-
for the given dataset and table id.
|
892
|
-
|
893
|
-
:param hook BigQuery Hook
|
894
|
-
"""
|
896
|
+
"""Generate a SELECT query if for the given dataset and table ID."""
|
895
897
|
query = "select "
|
896
898
|
if self.selected_fields:
|
897
899
|
query += self.selected_fields
|
@@ -967,10 +969,10 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
967
969
|
)
|
968
970
|
|
969
971
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> Any:
|
970
|
-
"""
|
971
|
-
|
972
|
-
|
973
|
-
successful.
|
972
|
+
"""Callback for when the trigger fires.
|
973
|
+
|
974
|
+
This returns immediately. It relies on trigger to throw an exception,
|
975
|
+
otherwise it assumes execution was successful.
|
974
976
|
"""
|
975
977
|
if event["status"] == "error":
|
976
978
|
raise AirflowException(event["message"])
|
@@ -980,12 +982,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
980
982
|
|
981
983
|
|
982
984
|
class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
983
|
-
"""
|
984
|
-
Executes BigQuery SQL queries in a specific BigQuery database.
|
985
|
-
This operator does not assert idempotency.
|
985
|
+
"""Executes BigQuery SQL queries in a specific BigQuery database.
|
986
986
|
|
987
|
-
This operator is deprecated.
|
988
|
-
|
987
|
+
This operator is deprecated. Please use
|
988
|
+
:class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator`
|
989
|
+
instead.
|
990
|
+
|
991
|
+
This operator does not assert idempotency.
|
989
992
|
|
990
993
|
:param sql: the SQL code to be executed as a single string, or
|
991
994
|
a list of str (sql statements), or a reference to a template file.
|
@@ -1041,10 +1044,11 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
|
1041
1044
|
US and EU. See details at
|
1042
1045
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
1043
1046
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1044
|
-
|
1047
|
+
|
1048
|
+
.. code-block:: python
|
1045
1049
|
|
1046
1050
|
encryption_configuration = {
|
1047
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
1051
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
1048
1052
|
}
|
1049
1053
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
1050
1054
|
credentials, or chained list of accounts required to get the access_token
|
@@ -1184,7 +1188,11 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
|
1184
1188
|
]
|
1185
1189
|
else:
|
1186
1190
|
raise AirflowException(f"argument 'sql' of type {type(str)} is neither a string nor an iterable")
|
1187
|
-
|
1191
|
+
project_id = self.hook.project_id
|
1192
|
+
if project_id:
|
1193
|
+
job_id_path = convert_job_id(job_id=job_id, project_id=project_id, location=self.location)
|
1194
|
+
context["task_instance"].xcom_push(key="job_id_path", value=job_id_path)
|
1195
|
+
return job_id
|
1188
1196
|
|
1189
1197
|
def on_kill(self) -> None:
|
1190
1198
|
super().on_kill()
|
@@ -1194,9 +1202,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
|
1194
1202
|
|
1195
1203
|
|
1196
1204
|
class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
1197
|
-
"""
|
1198
|
-
Creates a new, empty table in the specified BigQuery dataset,
|
1199
|
-
optionally with schema.
|
1205
|
+
"""Creates a new table in the specified BigQuery dataset, optionally with schema.
|
1200
1206
|
|
1201
1207
|
The schema to be used for the BigQuery table may be specified in one of
|
1202
1208
|
two ways. You may either directly pass the schema fields in, or you may
|
@@ -1217,7 +1223,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1217
1223
|
:param schema_fields: If set, the schema field list as defined here:
|
1218
1224
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
1219
1225
|
|
1220
|
-
**Example
|
1226
|
+
**Example**::
|
1221
1227
|
|
1222
1228
|
schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
1223
1229
|
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}]
|
@@ -1236,45 +1242,46 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1236
1242
|
and interact with the Google Cloud Storage service.
|
1237
1243
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
1238
1244
|
|
1239
|
-
|
1245
|
+
**Example (with schema JSON in GCS)**::
|
1240
1246
|
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1247
|
+
CreateTable = BigQueryCreateEmptyTableOperator(
|
1248
|
+
task_id='BigQueryCreateEmptyTableOperator_task',
|
1249
|
+
dataset_id='ODS',
|
1250
|
+
table_id='Employees',
|
1251
|
+
project_id='internal-gcp-project',
|
1252
|
+
gcs_schema_object='gs://schema-bucket/employee_schema.json',
|
1253
|
+
gcp_conn_id='airflow-conn-id',
|
1254
|
+
google_cloud_storage_conn_id='airflow-conn-id'
|
1255
|
+
)
|
1250
1256
|
|
1251
|
-
|
1257
|
+
**Corresponding Schema file** (``employee_schema.json``)::
|
1252
1258
|
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1259
|
+
[
|
1260
|
+
{
|
1261
|
+
"mode": "NULLABLE",
|
1262
|
+
"name": "emp_name",
|
1263
|
+
"type": "STRING"
|
1264
|
+
},
|
1265
|
+
{
|
1266
|
+
"mode": "REQUIRED",
|
1267
|
+
"name": "salary",
|
1268
|
+
"type": "INTEGER"
|
1269
|
+
}
|
1270
|
+
]
|
1271
|
+
|
1272
|
+
**Example (with schema in the DAG)**::
|
1273
|
+
|
1274
|
+
CreateTable = BigQueryCreateEmptyTableOperator(
|
1275
|
+
task_id='BigQueryCreateEmptyTableOperator_task',
|
1276
|
+
dataset_id='ODS',
|
1277
|
+
table_id='Employees',
|
1278
|
+
project_id='internal-gcp-project',
|
1279
|
+
schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
1280
|
+
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}],
|
1281
|
+
gcp_conn_id='airflow-conn-id-account',
|
1282
|
+
google_cloud_storage_conn_id='airflow-conn-id'
|
1283
|
+
)
|
1265
1284
|
|
1266
|
-
**Example (with schema in the DAG)**: ::
|
1267
|
-
|
1268
|
-
CreateTable = BigQueryCreateEmptyTableOperator(
|
1269
|
-
task_id='BigQueryCreateEmptyTableOperator_task',
|
1270
|
-
dataset_id='ODS',
|
1271
|
-
table_id='Employees',
|
1272
|
-
project_id='internal-gcp-project',
|
1273
|
-
schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
1274
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}],
|
1275
|
-
gcp_conn_id='airflow-conn-id-account',
|
1276
|
-
google_cloud_storage_conn_id='airflow-conn-id'
|
1277
|
-
)
|
1278
1285
|
:param view: [Optional] A dictionary containing definition for the view.
|
1279
1286
|
If set, it will create a view instead of a table:
|
1280
1287
|
|
@@ -1282,10 +1289,11 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1282
1289
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
1283
1290
|
:param materialized_view: [Optional] The materialized view definition.
|
1284
1291
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1285
|
-
|
1292
|
+
|
1293
|
+
.. code-block:: python
|
1286
1294
|
|
1287
1295
|
encryption_configuration = {
|
1288
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
1296
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
1289
1297
|
}
|
1290
1298
|
:param location: The location used for the operation.
|
1291
1299
|
:param cluster_fields: [Optional] The fields used for clustering.
|
@@ -1446,9 +1454,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1446
1454
|
|
1447
1455
|
|
1448
1456
|
class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
1449
|
-
"""
|
1450
|
-
Creates a new external table in the dataset with the data from Google Cloud
|
1451
|
-
Storage.
|
1457
|
+
"""Create a new external table with data from Google Cloud Storage.
|
1452
1458
|
|
1453
1459
|
The schema to be used for the BigQuery table may be specified in one of
|
1454
1460
|
two ways. You may either directly pass the schema fields in, or you may
|
@@ -1468,7 +1474,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1468
1474
|
:param schema_fields: If set, the schema field list as defined here:
|
1469
1475
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
1470
1476
|
|
1471
|
-
**Example
|
1477
|
+
**Example**::
|
1472
1478
|
|
1473
1479
|
schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
1474
1480
|
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}]
|
@@ -1508,10 +1514,11 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1508
1514
|
:param src_fmt_configs: configure optional fields specific to the source format
|
1509
1515
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
1510
1516
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1511
|
-
|
1517
|
+
|
1518
|
+
.. code-block:: python
|
1512
1519
|
|
1513
1520
|
encryption_configuration = {
|
1514
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
1521
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
1515
1522
|
}
|
1516
1523
|
:param location: The location used for the operation.
|
1517
1524
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
@@ -1735,8 +1742,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1735
1742
|
|
1736
1743
|
|
1737
1744
|
class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
1738
|
-
"""
|
1739
|
-
This operator deletes an existing dataset from your Project in Big query.
|
1745
|
+
"""Delete an existing dataset from your Project in BigQuery.
|
1740
1746
|
|
1741
1747
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
|
1742
1748
|
|
@@ -1760,7 +1766,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
1760
1766
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1761
1767
|
account from the list granting this role to the originating account (templated).
|
1762
1768
|
|
1763
|
-
**Example
|
1769
|
+
**Example**::
|
1764
1770
|
|
1765
1771
|
delete_temp_data = BigQueryDeleteDatasetOperator(
|
1766
1772
|
dataset_id='temp-dataset',
|
@@ -1810,8 +1816,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
1810
1816
|
|
1811
1817
|
|
1812
1818
|
class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
1813
|
-
"""
|
1814
|
-
This operator is used to create new dataset for your Project in BigQuery.
|
1819
|
+
"""Create a new dataset for your Project in BigQuery.
|
1815
1820
|
|
1816
1821
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
|
1817
1822
|
|
@@ -1837,7 +1842,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
1837
1842
|
:param if_exists: What should Airflow do if the dataset exists. If set to `log`, the TI will be passed to
|
1838
1843
|
success and an error message will be logged. Set to `ignore` to ignore the error, set to `fail` to
|
1839
1844
|
fail the TI, and set to `skip` to skip it.
|
1840
|
-
**Example
|
1845
|
+
**Example**::
|
1841
1846
|
|
1842
1847
|
create_new_dataset = BigQueryCreateEmptyDatasetOperator(
|
1843
1848
|
dataset_id='new-dataset',
|
@@ -1872,7 +1877,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
1872
1877
|
exists_ok: bool | None = None,
|
1873
1878
|
**kwargs,
|
1874
1879
|
) -> None:
|
1875
|
-
|
1876
1880
|
self.dataset_id = dataset_id
|
1877
1881
|
self.project_id = project_id
|
1878
1882
|
self.location = location
|
@@ -1934,8 +1938,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
1934
1938
|
|
1935
1939
|
|
1936
1940
|
class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
|
1937
|
-
"""
|
1938
|
-
This operator is used to return the dataset specified by dataset_id.
|
1941
|
+
"""Get the dataset specified by ID.
|
1939
1942
|
|
1940
1943
|
.. seealso::
|
1941
1944
|
For more information on how to use this operator, take a look at the guide:
|
@@ -1999,8 +2002,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
|
|
1999
2002
|
|
2000
2003
|
|
2001
2004
|
class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
|
2002
|
-
"""
|
2003
|
-
This operator retrieves the list of tables in the specified dataset.
|
2005
|
+
"""Retrieve the list of tables in the specified dataset.
|
2004
2006
|
|
2005
2007
|
.. seealso::
|
2006
2008
|
For more information on how to use this operator, take a look at the guide:
|
@@ -2059,12 +2061,13 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
|
|
2059
2061
|
|
2060
2062
|
|
2061
2063
|
class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
|
2062
|
-
"""
|
2063
|
-
This operator is used to patch dataset for your Project in BigQuery.
|
2064
|
-
It only replaces fields that are provided in the submitted dataset resource.
|
2064
|
+
"""Patch a dataset for your Project in BigQuery.
|
2065
2065
|
|
2066
|
-
This operator is deprecated.
|
2067
|
-
|
2066
|
+
This operator is deprecated. Please use
|
2067
|
+
:class:`airflow.providers.google.cloud.operators.bigquery.BigQueryUpdateTableOperator`
|
2068
|
+
instead.
|
2069
|
+
|
2070
|
+
Only replaces fields that are provided in the submitted dataset resource.
|
2068
2071
|
|
2069
2072
|
:param dataset_id: The id of dataset. Don't need to provide,
|
2070
2073
|
if datasetId in dataset_reference.
|
@@ -2127,8 +2130,8 @@ class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
|
|
2127
2130
|
|
2128
2131
|
|
2129
2132
|
class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
2130
|
-
"""
|
2131
|
-
|
2133
|
+
"""Update a table for your Project in BigQuery.
|
2134
|
+
|
2132
2135
|
Use ``fields`` to specify which fields of table to update. If a field
|
2133
2136
|
is listed in ``fields`` and is ``None`` in table, it will be deleted.
|
2134
2137
|
|
@@ -2214,8 +2217,8 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
2214
2217
|
|
2215
2218
|
|
2216
2219
|
class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
2217
|
-
"""
|
2218
|
-
|
2220
|
+
"""Update a dataset for your Project in BigQuery.
|
2221
|
+
|
2219
2222
|
Use ``fields`` to specify which fields of dataset to update. If a field
|
2220
2223
|
is listed in ``fields`` and is ``None`` in dataset, it will be deleted.
|
2221
2224
|
If no ``fields`` are provided then all fields of provided ``dataset_resource``
|
@@ -2296,8 +2299,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
2296
2299
|
|
2297
2300
|
|
2298
2301
|
class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
|
2299
|
-
"""
|
2300
|
-
Deletes BigQuery tables.
|
2302
|
+
"""Delete a BigQuery table.
|
2301
2303
|
|
2302
2304
|
.. seealso::
|
2303
2305
|
For more information on how to use this operator, take a look at the guide:
|
@@ -2355,8 +2357,7 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
|
|
2355
2357
|
|
2356
2358
|
|
2357
2359
|
class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
2358
|
-
"""
|
2359
|
-
Upsert BigQuery table.
|
2360
|
+
"""Upsert to a BigQuery table.
|
2360
2361
|
|
2361
2362
|
.. seealso::
|
2362
2363
|
For more information on how to use this operator, take a look at the guide:
|
@@ -2433,8 +2434,8 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
2433
2434
|
|
2434
2435
|
|
2435
2436
|
class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
2436
|
-
"""
|
2437
|
-
|
2437
|
+
"""Update BigQuery Table Schema.
|
2438
|
+
|
2438
2439
|
Updates fields on a table schema based on contents of the supplied schema_fields_updates
|
2439
2440
|
parameter. The supplied schema does not need to be complete, if the field
|
2440
2441
|
already exists in the schema you only need to supply keys & values for the
|
@@ -2447,16 +2448,22 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2447
2448
|
:param schema_fields_updates: a partial schema resource. see
|
2448
2449
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema
|
2449
2450
|
|
2450
|
-
|
2451
|
+
.. code-block:: python
|
2451
2452
|
|
2452
|
-
|
2453
|
-
|
2454
|
-
|
2455
|
-
|
2456
|
-
|
2457
|
-
|
2458
|
-
|
2459
|
-
|
2453
|
+
schema_fields_updates = [
|
2454
|
+
{"name": "emp_name", "description": "Some New Description"},
|
2455
|
+
{
|
2456
|
+
"name": "salary",
|
2457
|
+
"policyTags": {"names": ["some_new_policy_tag"]},
|
2458
|
+
},
|
2459
|
+
{
|
2460
|
+
"name": "departments",
|
2461
|
+
"fields": [
|
2462
|
+
{"name": "name", "description": "Some New Description"},
|
2463
|
+
{"name": "type", "description": "Some New Description"},
|
2464
|
+
],
|
2465
|
+
},
|
2466
|
+
]
|
2460
2467
|
|
2461
2468
|
:param include_policy_tags: (Optional) If set to True policy tags will be included in
|
2462
2469
|
the update request which requires special permissions even if unchanged (default False)
|
@@ -2536,9 +2543,9 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2536
2543
|
|
2537
2544
|
|
2538
2545
|
class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
|
2539
|
-
"""
|
2540
|
-
Executes a BigQuery job. Waits for the job to complete and returns job id.
|
2546
|
+
"""Execute a BigQuery job.
|
2541
2547
|
|
2548
|
+
Waits for the job to complete and returns job id.
|
2542
2549
|
This operator work in the following way:
|
2543
2550
|
|
2544
2551
|
- it calculates a unique hash of the job using job's configuration or uuid if ``force_rerun`` is True
|
@@ -2616,7 +2623,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
|
|
2616
2623
|
cancel_on_kill: bool = True,
|
2617
2624
|
result_retry: Retry = DEFAULT_RETRY,
|
2618
2625
|
result_timeout: float | None = None,
|
2619
|
-
deferrable: bool = False,
|
2626
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
2620
2627
|
poll_interval: float = 4.0,
|
2621
2628
|
**kwargs,
|
2622
2629
|
) -> None:
|
@@ -2727,9 +2734,11 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
|
|
2727
2734
|
persist_kwargs["dataset_id"] = table["datasetId"]
|
2728
2735
|
persist_kwargs["project_id"] = table["projectId"]
|
2729
2736
|
BigQueryTableLink.persist(**persist_kwargs)
|
2730
|
-
|
2731
2737
|
self.job_id = job.job_id
|
2732
|
-
|
2738
|
+
project_id = self.project_id or self.hook.project_id
|
2739
|
+
if project_id:
|
2740
|
+
job_id_path = convert_job_id(job_id=job_id, project_id=project_id, location=self.location)
|
2741
|
+
context["ti"].xcom_push(key="job_id_path", value=job_id_path)
|
2733
2742
|
# Wait for the job to complete
|
2734
2743
|
if not self.deferrable:
|
2735
2744
|
job.result(timeout=self.result_timeout, retry=self.result_retry)
|
@@ -2749,12 +2758,13 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
|
|
2749
2758
|
method_name="execute_complete",
|
2750
2759
|
)
|
2751
2760
|
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
2761
|
+
self._handle_job_error(job)
|
2752
2762
|
|
2753
2763
|
def execute_complete(self, context: Context, event: dict[str, Any]):
|
2754
|
-
"""
|
2755
|
-
|
2756
|
-
|
2757
|
-
successful.
|
2764
|
+
"""Callback for when the trigger fires.
|
2765
|
+
|
2766
|
+
This returns immediately. It relies on trigger to throw an exception,
|
2767
|
+
otherwise it assumes execution was successful.
|
2758
2768
|
"""
|
2759
2769
|
if event["status"] == "error":
|
2760
2770
|
raise AirflowException(event["message"])
|