apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +38 -39
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
  5. airflow/providers/google/cloud/hooks/bigquery.py +328 -318
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
  8. airflow/providers/google/cloud/hooks/dataflow.py +11 -15
  9. airflow/providers/google/cloud/hooks/dataform.py +3 -3
  10. airflow/providers/google/cloud/hooks/dataproc.py +577 -573
  11. airflow/providers/google/cloud/hooks/functions.py +60 -76
  12. airflow/providers/google/cloud/hooks/gcs.py +108 -18
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
  14. airflow/providers/google/cloud/links/datafusion.py +4 -3
  15. airflow/providers/google/cloud/operators/bigquery.py +201 -191
  16. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  17. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  18. airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
  19. airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
  20. airflow/providers/google/cloud/operators/dataflow.py +6 -4
  21. airflow/providers/google/cloud/operators/dataform.py +3 -2
  22. airflow/providers/google/cloud/operators/dataproc.py +127 -123
  23. airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
  24. airflow/providers/google/cloud/operators/gcs.py +35 -13
  25. airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
  26. airflow/providers/google/cloud/operators/mlengine.py +2 -6
  27. airflow/providers/google/cloud/operators/vision.py +47 -56
  28. airflow/providers/google/cloud/sensors/bigquery.py +3 -2
  29. airflow/providers/google/cloud/sensors/gcs.py +5 -7
  30. airflow/providers/google/cloud/sensors/pubsub.py +2 -2
  31. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
  32. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  33. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  34. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
  35. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
  36. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
  37. airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
  38. airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
  39. airflow/providers/google/cloud/utils/bigquery.py +17 -0
  40. airflow/providers/google/get_provider_info.py +7 -2
  41. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
  42. airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
  43. apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
  44. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
  45. apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
  46. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
  47. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
  48. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
  49. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
  50. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ from google.api_core.retry import Retry
29
29
  from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob
30
30
  from google.cloud.bigquery.table import RowIterator
31
31
 
32
+ from airflow.configuration import conf
32
33
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
33
34
  from airflow.models import BaseOperator, BaseOperatorLink
34
35
  from airflow.models.xcom import XCom
@@ -51,6 +52,7 @@ from airflow.providers.google.cloud.triggers.bigquery import (
51
52
  BigQueryIntervalCheckTrigger,
52
53
  BigQueryValueCheckTrigger,
53
54
  )
55
+ from airflow.providers.google.cloud.utils.bigquery import convert_job_id
54
56
 
55
57
  if TYPE_CHECKING:
56
58
  from google.cloud.bigquery import UnknownJob
@@ -90,8 +92,8 @@ class BigQueryConsoleLink(BaseOperatorLink):
90
92
  *,
91
93
  ti_key: TaskInstanceKey,
92
94
  ):
93
- job_id = XCom.get_value(key="job_id", ti_key=ti_key)
94
- return BIGQUERY_JOB_DETAILS_LINK_FMT.format(job_id=job_id) if job_id else ""
95
+ job_id_path = XCom.get_value(key="job_id_path", ti_key=ti_key)
96
+ return BIGQUERY_JOB_DETAILS_LINK_FMT.format(job_id=job_id_path) if job_id_path else ""
95
97
 
96
98
 
97
99
  @attr.s(auto_attribs=True)
@@ -110,7 +112,7 @@ class BigQueryConsoleIndexableLink(BaseOperatorLink):
110
112
  *,
111
113
  ti_key: TaskInstanceKey,
112
114
  ):
113
- job_ids = XCom.get_value(key="job_id", ti_key=ti_key)
115
+ job_ids = XCom.get_value(key="job_id_path", ti_key=ti_key)
114
116
  if not job_ids:
115
117
  return None
116
118
  if len(job_ids) < self.index:
@@ -132,17 +134,17 @@ class _BigQueryDbHookMixin:
132
134
 
133
135
 
134
136
  class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
135
- """
136
- Performs checks against BigQuery. The ``BigQueryCheckOperator`` expects
137
- a sql query that will return a single row. Each value on that
138
- first row is evaluated using python ``bool`` casting. If any of the
139
- values return ``False`` the check is failed and errors out.
137
+ """Performs checks against BigQuery.
138
+
139
+ This operator expects a SQL query that returns a single row. Each value on
140
+ that row is evaluated using a Python ``bool`` cast. If any of the values
141
+ is falsy, the check errors out.
140
142
 
141
143
  .. seealso::
142
144
  For more information on how to use this operator, take a look at the guide:
143
145
  :ref:`howto/operator:BigQueryCheckOperator`
144
146
 
145
- Note that Python bool casting evals the following as ``False``:
147
+ Note that Python bool casting evals the following as *False*:
146
148
 
147
149
  * ``False``
148
150
  * ``0``
@@ -151,36 +153,34 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
151
153
  * Empty dictionary or set (``{}``)
152
154
 
153
155
  Given a query like ``SELECT COUNT(*) FROM foo``, it will fail only if
154
- the count ``== 0``. You can craft much more complex query that could,
155
- for instance, check that the table has the same number of rows as
156
- the source table upstream, or that the count of today's partition is
157
- greater than yesterday's partition, or that a set of metrics are less
158
- than 3 standard deviation for the 7 day average.
159
-
160
- This operator can be used as a data quality check in your pipeline, and
161
- depending on where you put it in your DAG, you have the choice to
162
- stop the critical path, preventing from
163
- publishing dubious data, or on the side and receive email alerts
164
- without stopping the progress of the DAG.
165
-
166
- :param sql: the sql to be executed
167
- :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
168
- :param use_legacy_sql: Whether to use legacy SQL (true)
169
- or standard SQL (false).
156
+ the count equals to zero. You can craft much more complex query that could,
157
+ for instance, check that the table has the same number of rows as the source
158
+ table upstream, or that the count of today's partition is greater than
159
+ yesterday's partition, or that a set of metrics are less than three standard
160
+ deviation for the 7-day average.
161
+
162
+ This operator can be used as a data quality check in your pipeline.
163
+ Depending on where you put it in your DAG, you have the choice to stop the
164
+ critical path, preventing from publishing dubious data, or on the side and
165
+ receive email alerts without stopping the progress of the DAG.
166
+
167
+ :param sql: SQL to execute.
168
+ :param gcp_conn_id: Connection ID for Google Cloud.
169
+ :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false).
170
170
  :param location: The geographic location of the job. See details at:
171
171
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
172
- :param impersonation_chain: Optional service account to impersonate using short-term
173
- credentials, or chained list of accounts required to get the access_token
174
- of the last account in the list, which will be impersonated in the request.
175
- If set as a string, the account must grant the originating account
176
- the Service Account Token Creator IAM role.
177
- If set as a sequence, the identities from the list must grant
178
- Service Account Token Creator IAM role to the directly preceding identity, with first
179
- account from the list granting this role to the originating account (templated).
180
- :param labels: a dictionary containing labels for the table, passed to BigQuery
181
- :param deferrable: Run operator in the deferrable mode
182
- :param poll_interval: (Deferrable mode only) polling period in seconds to check for the status of job.
183
- Defaults to 4 seconds.
172
+ :param impersonation_chain: Optional service account to impersonate using
173
+ short-term credentials, or chained list of accounts required to get the
174
+ access token of the last account in the list, which will be impersonated
175
+ in the request. If set as a string, the account must grant the
176
+ originating account the Service Account Token Creator IAM role. If set
177
+ as a sequence, the identities from the list must grant Service Account
178
+ Token Creator IAM role to the directly preceding identity, with first
179
+ account from the list granting this role to the originating account. (templated)
180
+ :param labels: a dictionary containing labels for the table, passed to BigQuery.
181
+ :param deferrable: Run operator in the deferrable mode.
182
+ :param poll_interval: (Deferrable mode only) polling period in seconds to
183
+ check for the status of job.
184
184
  """
185
185
 
186
186
  template_fields: Sequence[str] = (
@@ -201,7 +201,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
201
201
  location: str | None = None,
202
202
  impersonation_chain: str | Sequence[str] | None = None,
203
203
  labels: dict | None = None,
204
- deferrable: bool = False,
204
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
205
205
  poll_interval: float = 4.0,
206
206
  **kwargs,
207
207
  ) -> None:
@@ -254,10 +254,10 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
254
254
  self.log.info("Current state of job %s is %s", job.job_id, job.state)
255
255
 
256
256
  def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
257
- """
258
- Callback for when the trigger fires - returns immediately.
259
- Relies on trigger to throw an exception, otherwise it assumes execution was
260
- successful.
257
+ """Callback for when the trigger fires.
258
+
259
+ This returns immediately. It relies on trigger to throw an exception,
260
+ otherwise it assumes execution was successful.
261
261
  """
262
262
  if event["status"] == "error":
263
263
  raise AirflowException(event["message"])
@@ -274,31 +274,30 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
274
274
 
275
275
 
276
276
  class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
277
- """
278
- Performs a simple value check using sql code.
277
+ """Perform a simple value check using sql code.
279
278
 
280
279
  .. seealso::
281
280
  For more information on how to use this operator, take a look at the guide:
282
281
  :ref:`howto/operator:BigQueryValueCheckOperator`
283
282
 
284
- :param sql: the sql to be executed
283
+ :param sql: SQL to execute.
285
284
  :param use_legacy_sql: Whether to use legacy SQL (true)
286
285
  or standard SQL (false).
287
286
  :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
288
287
  :param location: The geographic location of the job. See details at:
289
288
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
290
- :param impersonation_chain: Optional service account to impersonate using short-term
291
- credentials, or chained list of accounts required to get the access_token
292
- of the last account in the list, which will be impersonated in the request.
293
- If set as a string, the account must grant the originating account
294
- the Service Account Token Creator IAM role.
295
- If set as a sequence, the identities from the list must grant
296
- Service Account Token Creator IAM role to the directly preceding identity, with first
297
- account from the list granting this role to the originating account (templated).
298
- :param labels: a dictionary containing labels for the table, passed to BigQuery
299
- :param deferrable: Run operator in the deferrable mode
300
- :param poll_interval: (Deferrable mode only) polling period in seconds to check for the status of job.
301
- Defaults to 4 seconds.
289
+ :param impersonation_chain: Optional service account to impersonate using
290
+ short-term credentials, or chained list of accounts required to get the
291
+ access token of the last account in the list, which will be impersonated
292
+ in the request. If set as a string, the account must grant the
293
+ originating account the Service Account Token Creator IAM role. If set
294
+ as a sequence, the identities from the list must grant Service Account
295
+ Token Creator IAM role to the directly preceding identity, with first
296
+ account from the list granting this role to the originating account. (templated)
297
+ :param labels: a dictionary containing labels for the table, passed to BigQuery.
298
+ :param deferrable: Run operator in the deferrable mode.
299
+ :param poll_interval: (Deferrable mode only) polling period in seconds to
300
+ check for the status of job.
302
301
  """
303
302
 
304
303
  template_fields: Sequence[str] = (
@@ -322,7 +321,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
322
321
  location: str | None = None,
323
322
  impersonation_chain: str | Sequence[str] | None = None,
324
323
  labels: dict | None = None,
325
- deferrable: bool = False,
324
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
326
325
  poll_interval: float = 4.0,
327
326
  **kwargs,
328
327
  ) -> None:
@@ -364,25 +363,33 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
364
363
 
365
364
  job = self._submit_job(hook, job_id="")
366
365
  context["ti"].xcom_push(key="job_id", value=job.job_id)
367
- self.defer(
368
- timeout=self.execution_timeout,
369
- trigger=BigQueryValueCheckTrigger(
370
- conn_id=self.gcp_conn_id,
371
- job_id=job.job_id,
372
- project_id=hook.project_id,
373
- sql=self.sql,
374
- pass_value=self.pass_value,
375
- tolerance=self.tol,
376
- poll_interval=self.poll_interval,
377
- ),
378
- method_name="execute_complete",
379
- )
366
+ if job.running():
367
+ self.defer(
368
+ timeout=self.execution_timeout,
369
+ trigger=BigQueryValueCheckTrigger(
370
+ conn_id=self.gcp_conn_id,
371
+ job_id=job.job_id,
372
+ project_id=hook.project_id,
373
+ sql=self.sql,
374
+ pass_value=self.pass_value,
375
+ tolerance=self.tol,
376
+ poll_interval=self.poll_interval,
377
+ ),
378
+ method_name="execute_complete",
379
+ )
380
+ self._handle_job_error(job)
381
+ self.log.info("Current state of job %s is %s", job.job_id, job.state)
382
+
383
+ @staticmethod
384
+ def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
385
+ if job.error_result:
386
+ raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
380
387
 
381
388
  def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
382
- """
383
- Callback for when the trigger fires - returns immediately.
384
- Relies on trigger to throw an exception, otherwise it assumes execution was
385
- successful.
389
+ """Callback for when the trigger fires.
390
+
391
+ This returns immediately. It relies on trigger to throw an exception,
392
+ otherwise it assumes execution was successful.
386
393
  """
387
394
  if event["status"] == "error":
388
395
  raise AirflowException(event["message"])
@@ -454,7 +461,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
454
461
  location: str | None = None,
455
462
  impersonation_chain: str | Sequence[str] | None = None,
456
463
  labels: dict | None = None,
457
- deferrable: bool = False,
464
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
458
465
  poll_interval: float = 4.0,
459
466
  **kwargs,
460
467
  ) -> None:
@@ -522,10 +529,10 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
522
529
  )
523
530
 
524
531
  def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
525
- """
526
- Callback for when the trigger fires - returns immediately.
527
- Relies on trigger to throw an exception, otherwise it assumes execution was
528
- successful.
532
+ """Callback for when the trigger fires.
533
+
534
+ This returns immediately. It relies on trigger to throw an exception,
535
+ otherwise it assumes execution was successful.
529
536
  """
530
537
  if event["status"] == "error":
531
538
  raise AirflowException(event["message"])
@@ -789,7 +796,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
789
796
  ``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
790
797
  the data would still be of the form ``'A,B'``.
791
798
 
792
- **Example**: ::
799
+ **Example**::
793
800
 
794
801
  get_data = BigQueryGetDataOperator(
795
802
  task_id='get_data_from_bq',
@@ -848,7 +855,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
848
855
  gcp_conn_id: str = "google_cloud_default",
849
856
  location: str | None = None,
850
857
  impersonation_chain: str | Sequence[str] | None = None,
851
- deferrable: bool = False,
858
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
852
859
  poll_interval: float = 4.0,
853
860
  as_dict: bool = False,
854
861
  use_legacy_sql: bool = True,
@@ -886,12 +893,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
886
893
  )
887
894
 
888
895
  def generate_query(self, hook: BigQueryHook) -> str:
889
- """
890
- Generate a select query if selected fields are given or with *
891
- for the given dataset and table id.
892
-
893
- :param hook BigQuery Hook
894
- """
896
+ """Generate a SELECT query if for the given dataset and table ID."""
895
897
  query = "select "
896
898
  if self.selected_fields:
897
899
  query += self.selected_fields
@@ -967,10 +969,10 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
967
969
  )
968
970
 
969
971
  def execute_complete(self, context: Context, event: dict[str, Any]) -> Any:
970
- """
971
- Callback for when the trigger fires - returns immediately.
972
- Relies on trigger to throw an exception, otherwise it assumes execution was
973
- successful.
972
+ """Callback for when the trigger fires.
973
+
974
+ This returns immediately. It relies on trigger to throw an exception,
975
+ otherwise it assumes execution was successful.
974
976
  """
975
977
  if event["status"] == "error":
976
978
  raise AirflowException(event["message"])
@@ -980,12 +982,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
980
982
 
981
983
 
982
984
  class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
983
- """
984
- Executes BigQuery SQL queries in a specific BigQuery database.
985
- This operator does not assert idempotency.
985
+ """Executes BigQuery SQL queries in a specific BigQuery database.
986
986
 
987
- This operator is deprecated.
988
- Please use :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator`
987
+ This operator is deprecated. Please use
988
+ :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator`
989
+ instead.
990
+
991
+ This operator does not assert idempotency.
989
992
 
990
993
  :param sql: the SQL code to be executed as a single string, or
991
994
  a list of str (sql statements), or a reference to a template file.
@@ -1041,10 +1044,11 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
1041
1044
  US and EU. See details at
1042
1045
  https://cloud.google.com/bigquery/docs/locations#specifying_your_location
1043
1046
  :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1044
- **Example**: ::
1047
+
1048
+ .. code-block:: python
1045
1049
 
1046
1050
  encryption_configuration = {
1047
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
1051
+ "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1048
1052
  }
1049
1053
  :param impersonation_chain: Optional service account to impersonate using short-term
1050
1054
  credentials, or chained list of accounts required to get the access_token
@@ -1184,7 +1188,11 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
1184
1188
  ]
1185
1189
  else:
1186
1190
  raise AirflowException(f"argument 'sql' of type {type(str)} is neither a string nor an iterable")
1187
- context["task_instance"].xcom_push(key="job_id", value=job_id)
1191
+ project_id = self.hook.project_id
1192
+ if project_id:
1193
+ job_id_path = convert_job_id(job_id=job_id, project_id=project_id, location=self.location)
1194
+ context["task_instance"].xcom_push(key="job_id_path", value=job_id_path)
1195
+ return job_id
1188
1196
 
1189
1197
  def on_kill(self) -> None:
1190
1198
  super().on_kill()
@@ -1194,9 +1202,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
1194
1202
 
1195
1203
 
1196
1204
  class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1197
- """
1198
- Creates a new, empty table in the specified BigQuery dataset,
1199
- optionally with schema.
1205
+ """Creates a new table in the specified BigQuery dataset, optionally with schema.
1200
1206
 
1201
1207
  The schema to be used for the BigQuery table may be specified in one of
1202
1208
  two ways. You may either directly pass the schema fields in, or you may
@@ -1217,7 +1223,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1217
1223
  :param schema_fields: If set, the schema field list as defined here:
1218
1224
  https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
1219
1225
 
1220
- **Example**: ::
1226
+ **Example**::
1221
1227
 
1222
1228
  schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1223
1229
  {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}]
@@ -1236,45 +1242,46 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1236
1242
  and interact with the Google Cloud Storage service.
1237
1243
  :param labels: a dictionary containing labels for the table, passed to BigQuery
1238
1244
 
1239
- **Example (with schema JSON in GCS)**: ::
1245
+ **Example (with schema JSON in GCS)**::
1240
1246
 
1241
- CreateTable = BigQueryCreateEmptyTableOperator(
1242
- task_id='BigQueryCreateEmptyTableOperator_task',
1243
- dataset_id='ODS',
1244
- table_id='Employees',
1245
- project_id='internal-gcp-project',
1246
- gcs_schema_object='gs://schema-bucket/employee_schema.json',
1247
- gcp_conn_id='airflow-conn-id',
1248
- google_cloud_storage_conn_id='airflow-conn-id'
1249
- )
1247
+ CreateTable = BigQueryCreateEmptyTableOperator(
1248
+ task_id='BigQueryCreateEmptyTableOperator_task',
1249
+ dataset_id='ODS',
1250
+ table_id='Employees',
1251
+ project_id='internal-gcp-project',
1252
+ gcs_schema_object='gs://schema-bucket/employee_schema.json',
1253
+ gcp_conn_id='airflow-conn-id',
1254
+ google_cloud_storage_conn_id='airflow-conn-id'
1255
+ )
1250
1256
 
1251
- **Corresponding Schema file** (``employee_schema.json``): ::
1257
+ **Corresponding Schema file** (``employee_schema.json``)::
1252
1258
 
1253
- [
1254
- {
1255
- "mode": "NULLABLE",
1256
- "name": "emp_name",
1257
- "type": "STRING"
1258
- },
1259
- {
1260
- "mode": "REQUIRED",
1261
- "name": "salary",
1262
- "type": "INTEGER"
1263
- }
1264
- ]
1259
+ [
1260
+ {
1261
+ "mode": "NULLABLE",
1262
+ "name": "emp_name",
1263
+ "type": "STRING"
1264
+ },
1265
+ {
1266
+ "mode": "REQUIRED",
1267
+ "name": "salary",
1268
+ "type": "INTEGER"
1269
+ }
1270
+ ]
1271
+
1272
+ **Example (with schema in the DAG)**::
1273
+
1274
+ CreateTable = BigQueryCreateEmptyTableOperator(
1275
+ task_id='BigQueryCreateEmptyTableOperator_task',
1276
+ dataset_id='ODS',
1277
+ table_id='Employees',
1278
+ project_id='internal-gcp-project',
1279
+ schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1280
+ {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}],
1281
+ gcp_conn_id='airflow-conn-id-account',
1282
+ google_cloud_storage_conn_id='airflow-conn-id'
1283
+ )
1265
1284
 
1266
- **Example (with schema in the DAG)**: ::
1267
-
1268
- CreateTable = BigQueryCreateEmptyTableOperator(
1269
- task_id='BigQueryCreateEmptyTableOperator_task',
1270
- dataset_id='ODS',
1271
- table_id='Employees',
1272
- project_id='internal-gcp-project',
1273
- schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1274
- {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}],
1275
- gcp_conn_id='airflow-conn-id-account',
1276
- google_cloud_storage_conn_id='airflow-conn-id'
1277
- )
1278
1285
  :param view: [Optional] A dictionary containing definition for the view.
1279
1286
  If set, it will create a view instead of a table:
1280
1287
 
@@ -1282,10 +1289,11 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1282
1289
  https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
1283
1290
  :param materialized_view: [Optional] The materialized view definition.
1284
1291
  :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1285
- **Example**: ::
1292
+
1293
+ .. code-block:: python
1286
1294
 
1287
1295
  encryption_configuration = {
1288
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
1296
+ "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1289
1297
  }
1290
1298
  :param location: The location used for the operation.
1291
1299
  :param cluster_fields: [Optional] The fields used for clustering.
@@ -1446,9 +1454,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1446
1454
 
1447
1455
 
1448
1456
  class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1449
- """
1450
- Creates a new external table in the dataset with the data from Google Cloud
1451
- Storage.
1457
+ """Create a new external table with data from Google Cloud Storage.
1452
1458
 
1453
1459
  The schema to be used for the BigQuery table may be specified in one of
1454
1460
  two ways. You may either directly pass the schema fields in, or you may
@@ -1468,7 +1474,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1468
1474
  :param schema_fields: If set, the schema field list as defined here:
1469
1475
  https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
1470
1476
 
1471
- **Example**: ::
1477
+ **Example**::
1472
1478
 
1473
1479
  schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
1474
1480
  {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}]
@@ -1508,10 +1514,11 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1508
1514
  :param src_fmt_configs: configure optional fields specific to the source format
1509
1515
  :param labels: a dictionary containing labels for the table, passed to BigQuery
1510
1516
  :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
1511
- **Example**: ::
1517
+
1518
+ .. code-block:: python
1512
1519
 
1513
1520
  encryption_configuration = {
1514
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
1521
+ "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1515
1522
  }
1516
1523
  :param location: The location used for the operation.
1517
1524
  :param impersonation_chain: Optional service account to impersonate using short-term
@@ -1735,8 +1742,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1735
1742
 
1736
1743
 
1737
1744
  class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1738
- """
1739
- This operator deletes an existing dataset from your Project in Big query.
1745
+ """Delete an existing dataset from your Project in BigQuery.
1740
1746
 
1741
1747
  https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
1742
1748
 
@@ -1760,7 +1766,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1760
1766
  Service Account Token Creator IAM role to the directly preceding identity, with first
1761
1767
  account from the list granting this role to the originating account (templated).
1762
1768
 
1763
- **Example**: ::
1769
+ **Example**::
1764
1770
 
1765
1771
  delete_temp_data = BigQueryDeleteDatasetOperator(
1766
1772
  dataset_id='temp-dataset',
@@ -1810,8 +1816,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1810
1816
 
1811
1817
 
1812
1818
  class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
1813
- """
1814
- This operator is used to create new dataset for your Project in BigQuery.
1819
+ """Create a new dataset for your Project in BigQuery.
1815
1820
 
1816
1821
  https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
1817
1822
 
@@ -1837,7 +1842,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
1837
1842
  :param if_exists: What should Airflow do if the dataset exists. If set to `log`, the TI will be passed to
1838
1843
  success and an error message will be logged. Set to `ignore` to ignore the error, set to `fail` to
1839
1844
  fail the TI, and set to `skip` to skip it.
1840
- **Example**: ::
1845
+ **Example**::
1841
1846
 
1842
1847
  create_new_dataset = BigQueryCreateEmptyDatasetOperator(
1843
1848
  dataset_id='new-dataset',
@@ -1872,7 +1877,6 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
1872
1877
  exists_ok: bool | None = None,
1873
1878
  **kwargs,
1874
1879
  ) -> None:
1875
-
1876
1880
  self.dataset_id = dataset_id
1877
1881
  self.project_id = project_id
1878
1882
  self.location = location
@@ -1934,8 +1938,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
1934
1938
 
1935
1939
 
1936
1940
  class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
1937
- """
1938
- This operator is used to return the dataset specified by dataset_id.
1941
+ """Get the dataset specified by ID.
1939
1942
 
1940
1943
  .. seealso::
1941
1944
  For more information on how to use this operator, take a look at the guide:
@@ -1999,8 +2002,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
1999
2002
 
2000
2003
 
2001
2004
  class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
2002
- """
2003
- This operator retrieves the list of tables in the specified dataset.
2005
+ """Retrieve the list of tables in the specified dataset.
2004
2006
 
2005
2007
  .. seealso::
2006
2008
  For more information on how to use this operator, take a look at the guide:
@@ -2059,12 +2061,13 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
2059
2061
 
2060
2062
 
2061
2063
  class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
2062
- """
2063
- This operator is used to patch dataset for your Project in BigQuery.
2064
- It only replaces fields that are provided in the submitted dataset resource.
2064
+ """Patch a dataset for your Project in BigQuery.
2065
2065
 
2066
- This operator is deprecated.
2067
- Please use :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryUpdateTableOperator`
2066
+ This operator is deprecated. Please use
2067
+ :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryUpdateTableOperator`
2068
+ instead.
2069
+
2070
+ Only replaces fields that are provided in the submitted dataset resource.
2068
2071
 
2069
2072
  :param dataset_id: The id of dataset. Don't need to provide,
2070
2073
  if datasetId in dataset_reference.
@@ -2127,8 +2130,8 @@ class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
2127
2130
 
2128
2131
 
2129
2132
  class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2130
- """
2131
- This operator is used to update table for your Project in BigQuery.
2133
+ """Update a table for your Project in BigQuery.
2134
+
2132
2135
  Use ``fields`` to specify which fields of table to update. If a field
2133
2136
  is listed in ``fields`` and is ``None`` in table, it will be deleted.
2134
2137
 
@@ -2214,8 +2217,8 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2214
2217
 
2215
2218
 
2216
2219
  class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
2217
- """
2218
- This operator is used to update dataset for your Project in BigQuery.
2220
+ """Update a dataset for your Project in BigQuery.
2221
+
2219
2222
  Use ``fields`` to specify which fields of dataset to update. If a field
2220
2223
  is listed in ``fields`` and is ``None`` in dataset, it will be deleted.
2221
2224
  If no ``fields`` are provided then all fields of provided ``dataset_resource``
@@ -2296,8 +2299,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
2296
2299
 
2297
2300
 
2298
2301
  class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
2299
- """
2300
- Deletes BigQuery tables.
2302
+ """Delete a BigQuery table.
2301
2303
 
2302
2304
  .. seealso::
2303
2305
  For more information on how to use this operator, take a look at the guide:
@@ -2355,8 +2357,7 @@ class BigQueryDeleteTableOperator(GoogleCloudBaseOperator):
2355
2357
 
2356
2358
 
2357
2359
  class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2358
- """
2359
- Upsert BigQuery table.
2360
+ """Upsert to a BigQuery table.
2360
2361
 
2361
2362
  .. seealso::
2362
2363
  For more information on how to use this operator, take a look at the guide:
@@ -2433,8 +2434,8 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2433
2434
 
2434
2435
 
2435
2436
  class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2436
- """
2437
- Update BigQuery Table Schema
2437
+ """Update BigQuery Table Schema.
2438
+
2438
2439
  Updates fields on a table schema based on contents of the supplied schema_fields_updates
2439
2440
  parameter. The supplied schema does not need to be complete, if the field
2440
2441
  already exists in the schema you only need to supply keys & values for the
@@ -2447,16 +2448,22 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2447
2448
  :param schema_fields_updates: a partial schema resource. see
2448
2449
  https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema
2449
2450
 
2450
- **Example**: ::
2451
+ .. code-block:: python
2451
2452
 
2452
- schema_fields_updates=[
2453
- {"name": "emp_name", "description": "Some New Description"},
2454
- {"name": "salary", "policyTags": {'names': ['some_new_policy_tag']},},
2455
- {"name": "departments", "fields": [
2456
- {"name": "name", "description": "Some New Description"},
2457
- {"name": "type", "description": "Some New Description"}
2458
- ]},
2459
- ]
2453
+ schema_fields_updates = [
2454
+ {"name": "emp_name", "description": "Some New Description"},
2455
+ {
2456
+ "name": "salary",
2457
+ "policyTags": {"names": ["some_new_policy_tag"]},
2458
+ },
2459
+ {
2460
+ "name": "departments",
2461
+ "fields": [
2462
+ {"name": "name", "description": "Some New Description"},
2463
+ {"name": "type", "description": "Some New Description"},
2464
+ ],
2465
+ },
2466
+ ]
2460
2467
 
2461
2468
  :param include_policy_tags: (Optional) If set to True policy tags will be included in
2462
2469
  the update request which requires special permissions even if unchanged (default False)
@@ -2536,9 +2543,9 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2536
2543
 
2537
2544
 
2538
2545
  class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
2539
- """
2540
- Executes a BigQuery job. Waits for the job to complete and returns job id.
2546
+ """Execute a BigQuery job.
2541
2547
 
2548
+ Waits for the job to complete and returns job id.
2542
2549
  This operator work in the following way:
2543
2550
 
2544
2551
  - it calculates a unique hash of the job using job's configuration or uuid if ``force_rerun`` is True
@@ -2616,7 +2623,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
2616
2623
  cancel_on_kill: bool = True,
2617
2624
  result_retry: Retry = DEFAULT_RETRY,
2618
2625
  result_timeout: float | None = None,
2619
- deferrable: bool = False,
2626
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
2620
2627
  poll_interval: float = 4.0,
2621
2628
  **kwargs,
2622
2629
  ) -> None:
@@ -2727,9 +2734,11 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
2727
2734
  persist_kwargs["dataset_id"] = table["datasetId"]
2728
2735
  persist_kwargs["project_id"] = table["projectId"]
2729
2736
  BigQueryTableLink.persist(**persist_kwargs)
2730
-
2731
2737
  self.job_id = job.job_id
2732
- context["ti"].xcom_push(key="job_id", value=self.job_id)
2738
+ project_id = self.project_id or self.hook.project_id
2739
+ if project_id:
2740
+ job_id_path = convert_job_id(job_id=job_id, project_id=project_id, location=self.location)
2741
+ context["ti"].xcom_push(key="job_id_path", value=job_id_path)
2733
2742
  # Wait for the job to complete
2734
2743
  if not self.deferrable:
2735
2744
  job.result(timeout=self.result_timeout, retry=self.result_retry)
@@ -2749,12 +2758,13 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator):
2749
2758
  method_name="execute_complete",
2750
2759
  )
2751
2760
  self.log.info("Current state of job %s is %s", job.job_id, job.state)
2761
+ self._handle_job_error(job)
2752
2762
 
2753
2763
  def execute_complete(self, context: Context, event: dict[str, Any]):
2754
- """
2755
- Callback for when the trigger fires - returns immediately.
2756
- Relies on trigger to throw an exception, otherwise it assumes execution was
2757
- successful.
2764
+ """Callback for when the trigger fires.
2765
+
2766
+ This returns immediately. It relies on trigger to throw an exception,
2767
+ otherwise it assumes execution was successful.
2758
2768
  """
2759
2769
  if event["status"] == "error":
2760
2770
  raise AirflowException(event["message"])