apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,9 @@
|
|
15
15
|
# KIND, either express or implied. See the License for the
|
16
16
|
# specific language governing permissions and limitations
|
17
17
|
# under the License.
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
"""
|
18
|
+
|
19
|
+
"""BigQuery Hook and a very basic PEP 249 implementation for BigQuery."""
|
20
|
+
|
22
21
|
from __future__ import annotations
|
23
22
|
|
24
23
|
import json
|
@@ -76,8 +75,9 @@ BigQueryJob = Union[CopyJob, QueryJob, LoadJob, ExtractJob]
|
|
76
75
|
|
77
76
|
|
78
77
|
class BigQueryHook(GoogleBaseHook, DbApiHook):
|
79
|
-
"""
|
80
|
-
|
78
|
+
"""Interact with BigQuery.
|
79
|
+
|
80
|
+
This hook uses the Google Cloud connection.
|
81
81
|
|
82
82
|
:param gcp_conn_id: The Airflow connection used for GCP credentials.
|
83
83
|
:param use_legacy_sql: This specifies whether to use legacy SQL dialect.
|
@@ -85,9 +85,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
85
85
|
:param priority: Specifies a priority for the query.
|
86
86
|
Possible values include INTERACTIVE and BATCH.
|
87
87
|
The default value is INTERACTIVE.
|
88
|
-
:param api_resource_configs: This contains params configuration applied for
|
89
|
-
|
90
|
-
|
88
|
+
:param api_resource_configs: This contains params configuration applied for
|
89
|
+
Google BigQuery jobs.
|
90
|
+
:param impersonation_chain: This is the optional service account to
|
91
|
+
impersonate using short term credentials.
|
91
92
|
:param labels: The BigQuery resource label.
|
92
93
|
"""
|
93
94
|
|
@@ -125,7 +126,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
125
126
|
self.credentials_path = "bigquery_hook_credentials.json"
|
126
127
|
|
127
128
|
def get_conn(self) -> BigQueryConnection:
|
128
|
-
"""
|
129
|
+
"""Get a BigQuery PEP 249 connection object."""
|
129
130
|
service = self.get_service()
|
130
131
|
return BigQueryConnection(
|
131
132
|
service=service,
|
@@ -137,7 +138,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
137
138
|
)
|
138
139
|
|
139
140
|
def get_service(self) -> Resource:
|
140
|
-
"""
|
141
|
+
"""Get a BigQuery service object. Deprecated."""
|
141
142
|
warnings.warn(
|
142
143
|
"This method will be deprecated. Please use `BigQueryHook.get_client` method",
|
143
144
|
AirflowProviderDeprecationWarning,
|
@@ -146,12 +147,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
146
147
|
return build("bigquery", "v2", http=http_authorized, cache_discovery=False)
|
147
148
|
|
148
149
|
def get_client(self, project_id: str | None = None, location: str | None = None) -> Client:
|
149
|
-
"""
|
150
|
-
Returns authenticated BigQuery Client.
|
150
|
+
"""Get an authenticated BigQuery Client.
|
151
151
|
|
152
152
|
:param project_id: Project ID for the project which the client acts on behalf of.
|
153
153
|
:param location: Default location for jobs / datasets / tables.
|
154
|
-
:return:
|
155
154
|
"""
|
156
155
|
return Client(
|
157
156
|
client_info=CLIENT_INFO,
|
@@ -161,15 +160,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
161
160
|
)
|
162
161
|
|
163
162
|
def get_uri(self) -> str:
|
164
|
-
"""Override DbApiHook
|
163
|
+
"""Override from ``DbApiHook`` for ``get_sqlalchemy_engine()``."""
|
165
164
|
return f"bigquery://{self.project_id}"
|
166
165
|
|
167
166
|
def get_sqlalchemy_engine(self, engine_kwargs=None):
|
168
|
-
"""
|
169
|
-
Get an sqlalchemy_engine object.
|
167
|
+
"""Create an SQLAlchemy engine object.
|
170
168
|
|
171
169
|
:param engine_kwargs: Kwargs used in :func:`~sqlalchemy.create_engine`.
|
172
|
-
:return: the created engine.
|
173
170
|
"""
|
174
171
|
if engine_kwargs is None:
|
175
172
|
engine_kwargs = {}
|
@@ -233,7 +230,8 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
233
230
|
replace: Any = False,
|
234
231
|
**kwargs,
|
235
232
|
) -> None:
|
236
|
-
"""
|
233
|
+
"""Insert rows.
|
234
|
+
|
237
235
|
Insertion is currently unsupported. Theoretically, you could use
|
238
236
|
BigQuery's streaming API to insert rows into a table, but this hasn't
|
239
237
|
been implemented.
|
@@ -247,14 +245,14 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
247
245
|
dialect: str | None = None,
|
248
246
|
**kwargs,
|
249
247
|
) -> DataFrame:
|
250
|
-
"""
|
251
|
-
|
252
|
-
|
253
|
-
|
248
|
+
"""Get a Pandas DataFrame for the BigQuery results.
|
249
|
+
|
250
|
+
The DbApiHook method must be overridden because Pandas doesn't support
|
251
|
+
PEP 249 connections, except for SQLite.
|
254
252
|
|
255
|
-
|
256
|
-
|
257
|
-
|
253
|
+
.. seealso::
|
254
|
+
https://github.com/pandas-dev/pandas/blob/055d008615272a1ceca9720dc365a2abd316f353/pandas/io/sql.py#L415
|
255
|
+
https://github.com/pandas-dev/pandas/issues/6900
|
258
256
|
|
259
257
|
:param sql: The BigQuery SQL to execute.
|
260
258
|
:param parameters: The parameters to render the SQL query with (not
|
@@ -274,8 +272,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
274
272
|
|
275
273
|
@GoogleBaseHook.fallback_to_default_project_id
|
276
274
|
def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool:
|
277
|
-
"""
|
278
|
-
Checks for the existence of a table in Google BigQuery.
|
275
|
+
"""Check if a table exists in Google BigQuery.
|
279
276
|
|
280
277
|
:param project_id: The Google cloud project in which to look for the
|
281
278
|
table. The connection supplied to the hook must provide access to
|
@@ -295,8 +292,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
295
292
|
def table_partition_exists(
|
296
293
|
self, dataset_id: str, table_id: str, partition_id: str, project_id: str
|
297
294
|
) -> bool:
|
298
|
-
"""
|
299
|
-
Checks for the existence of a partition in a table in Google BigQuery.
|
295
|
+
"""Check if a partition exists in Google BigQuery.
|
300
296
|
|
301
297
|
:param project_id: The Google cloud project in which to look for the
|
302
298
|
table. The connection supplied to the hook must provide access to
|
@@ -330,9 +326,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
330
326
|
location: str | None = None,
|
331
327
|
exists_ok: bool = True,
|
332
328
|
) -> Table:
|
333
|
-
"""
|
334
|
-
|
335
|
-
To create a view, which is defined by a SQL query, parse a dictionary to
|
329
|
+
"""Create a new, empty table in the dataset.
|
330
|
+
|
331
|
+
To create a view, which is defined by a SQL query, parse a dictionary to
|
332
|
+
the *view* argument.
|
336
333
|
|
337
334
|
:param project_id: The project to create the table into.
|
338
335
|
:param dataset_id: The dataset to create the table into.
|
@@ -342,14 +339,16 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
342
339
|
If provided all other parameters are ignored.
|
343
340
|
:param schema_fields: If set, the schema field list as defined here:
|
344
341
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
345
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
346
|
-
:param retry: Optional. How to retry the RPC.
|
347
342
|
|
348
|
-
|
343
|
+
.. code-block:: python
|
349
344
|
|
350
|
-
|
351
|
-
|
345
|
+
schema_fields = [
|
346
|
+
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
347
|
+
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
348
|
+
]
|
352
349
|
|
350
|
+
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
351
|
+
:param retry: Optional. How to retry the RPC.
|
353
352
|
:param time_partitioning: configure optional time partitioning fields i.e.
|
354
353
|
partition by field, type and expiration as per API specifications.
|
355
354
|
|
@@ -363,20 +362,22 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
363
362
|
If set, it will create a view instead of a table:
|
364
363
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
365
364
|
|
366
|
-
|
365
|
+
.. code-block:: python
|
367
366
|
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
367
|
+
view = {
|
368
|
+
"query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000",
|
369
|
+
"useLegacySql": False,
|
370
|
+
}
|
372
371
|
|
373
372
|
:param materialized_view: [Optional] The materialized view definition.
|
374
373
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
375
|
-
|
374
|
+
|
375
|
+
.. code-block:: python
|
376
376
|
|
377
377
|
encryption_configuration = {
|
378
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
378
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
379
379
|
}
|
380
|
+
|
380
381
|
:param num_retries: Maximum number of retries in case of connection problems.
|
381
382
|
:param location: (Optional) The geographic location where the table should reside.
|
382
383
|
:param exists_ok: If ``True``, ignore "already exists" errors when creating the table.
|
@@ -429,10 +430,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
429
430
|
dataset_reference: dict[str, Any] | None = None,
|
430
431
|
exists_ok: bool = True,
|
431
432
|
) -> dict[str, Any]:
|
432
|
-
"""
|
433
|
-
Create a new empty dataset.
|
433
|
+
"""Create a new empty dataset.
|
434
434
|
|
435
|
-
|
435
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert
|
436
436
|
|
437
437
|
:param project_id: The name of the project where we want to create
|
438
438
|
an empty a dataset. Don't need to provide, if projectId in dataset_reference.
|
@@ -491,8 +491,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
491
491
|
max_results: int | None = None,
|
492
492
|
retry: Retry = DEFAULT_RETRY,
|
493
493
|
) -> list[dict[str, Any]]:
|
494
|
-
"""
|
495
|
-
Get the list of tables for a given dataset.
|
494
|
+
"""Get the list of tables for a given dataset.
|
496
495
|
|
497
496
|
For more information, see:
|
498
497
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
|
@@ -521,8 +520,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
521
520
|
delete_contents: bool = False,
|
522
521
|
retry: Retry = DEFAULT_RETRY,
|
523
522
|
) -> None:
|
524
|
-
"""
|
525
|
-
Delete a dataset of Big query in your project.
|
523
|
+
"""Delete a dataset of Big query in your project.
|
526
524
|
|
527
525
|
:param project_id: The name of the project where we have the dataset.
|
528
526
|
:param dataset_id: The dataset to be delete.
|
@@ -562,17 +560,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
562
560
|
location: str | None = None,
|
563
561
|
project_id: str | None = None,
|
564
562
|
) -> Table:
|
565
|
-
"""
|
566
|
-
Creates a new external table in the dataset with the data from Google
|
567
|
-
Cloud Storage.
|
568
|
-
|
569
|
-
See here:
|
570
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
|
563
|
+
"""Create an external table in the dataset with data from Google Cloud Storage.
|
571
564
|
|
572
|
-
|
573
|
-
Please use `BigQueryHook.create_empty_table` method with passing the `table_resource` object
|
565
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource
|
574
566
|
|
575
|
-
|
567
|
+
This method is deprecated. Please use :func:`.create_empty_table` with
|
568
|
+
the ``table_resource`` object. See function documentation for more
|
569
|
+
details about these parameters.
|
576
570
|
|
577
571
|
:param external_project_dataset_table:
|
578
572
|
The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
|
@@ -618,10 +612,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
618
612
|
:param labels: A dictionary containing labels for the BiqQuery table.
|
619
613
|
:param description: A string containing the description for the BigQuery table.
|
620
614
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
621
|
-
|
615
|
+
|
616
|
+
.. code-block:: python
|
622
617
|
|
623
618
|
encryption_configuration = {
|
624
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
619
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
625
620
|
}
|
626
621
|
"""
|
627
622
|
warnings.warn(
|
@@ -706,8 +701,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
706
701
|
table_id: str | None = None,
|
707
702
|
project_id: str | None = None,
|
708
703
|
) -> dict[str, Any]:
|
709
|
-
"""
|
710
|
-
Change some fields of a table.
|
704
|
+
"""Change some fields of a table.
|
711
705
|
|
712
706
|
Use ``fields`` to specify which fields to update. At least one field
|
713
707
|
must be provided. If a field is listed in ``fields`` and is ``None``
|
@@ -757,11 +751,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
757
751
|
require_partition_filter: bool | None = None,
|
758
752
|
encryption_configuration: dict | None = None,
|
759
753
|
) -> None:
|
760
|
-
"""
|
761
|
-
Patch information in an existing table.
|
762
|
-
It only updates fields that are provided in the request object.
|
754
|
+
"""Patch information in an existing table.
|
763
755
|
|
764
|
-
|
756
|
+
It only updates fields that are provided in the request object. This
|
757
|
+
method is deprecated. Please use :func:`.update_table` instead.
|
765
758
|
|
766
759
|
Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch
|
767
760
|
|
@@ -779,30 +772,35 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
779
772
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
780
773
|
The supported schema modifications and unsupported schema modification are listed here:
|
781
774
|
https://cloud.google.com/bigquery/docs/managing-table-schemas
|
782
|
-
**Example**: ::
|
783
775
|
|
784
|
-
|
785
|
-
|
776
|
+
.. code-block:: python
|
777
|
+
|
778
|
+
schema = [
|
779
|
+
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
780
|
+
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
781
|
+
]
|
786
782
|
|
787
783
|
:param time_partitioning: [Optional] A dictionary containing time-based partitioning
|
788
784
|
definition for the table.
|
789
785
|
:param view: [Optional] A dictionary containing definition for the view.
|
790
786
|
If set, it will patch a view instead of a table:
|
791
787
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
792
|
-
|
788
|
+
|
789
|
+
.. code-block:: python
|
793
790
|
|
794
791
|
view = {
|
795
792
|
"query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 500",
|
796
|
-
"useLegacySql": False
|
793
|
+
"useLegacySql": False,
|
797
794
|
}
|
798
795
|
|
799
796
|
:param require_partition_filter: [Optional] If true, queries over the this table require a
|
800
797
|
partition filter. If false, queries over the table
|
801
798
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
802
|
-
|
799
|
+
|
800
|
+
.. code-block:: python
|
803
801
|
|
804
802
|
encryption_configuration = {
|
805
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
803
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
806
804
|
}
|
807
805
|
|
808
806
|
"""
|
@@ -852,9 +850,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
852
850
|
skip_invalid_rows: bool = False,
|
853
851
|
fail_on_error: bool = False,
|
854
852
|
) -> None:
|
855
|
-
"""
|
856
|
-
Method to stream data into BigQuery one record at a time without needing
|
857
|
-
to run a load job.
|
853
|
+
"""Stream data into BigQuery one record at a time without a load job.
|
858
854
|
|
859
855
|
.. seealso::
|
860
856
|
For more information, see:
|
@@ -865,8 +861,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
865
861
|
:param table_id: The name of the table
|
866
862
|
:param rows: the rows to insert
|
867
863
|
|
868
|
-
|
869
|
-
|
864
|
+
.. code-block:: python
|
865
|
+
|
866
|
+
rows = [{"json": {"a_key": "a_value_0"}}, {"json": {"a_key": "a_value_1"}}]
|
870
867
|
|
871
868
|
:param ignore_unknown_values: [Optional] Accept rows that contain values
|
872
869
|
that do not match the schema. The unknown values are ignored.
|
@@ -906,8 +903,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
906
903
|
project_id: str | None = None,
|
907
904
|
retry: Retry = DEFAULT_RETRY,
|
908
905
|
) -> Dataset:
|
909
|
-
"""
|
910
|
-
Change some fields of a dataset.
|
906
|
+
"""Change some fields of a dataset.
|
911
907
|
|
912
908
|
Use ``fields`` to specify which fields to update. At least one field
|
913
909
|
must be provided. If a field is listed in ``fields`` and is ``None`` in
|
@@ -945,11 +941,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
945
941
|
return dataset
|
946
942
|
|
947
943
|
def patch_dataset(self, dataset_id: str, dataset_resource: dict, project_id: str | None = None) -> dict:
|
948
|
-
"""
|
949
|
-
|
944
|
+
"""Patches information in an existing dataset.
|
945
|
+
|
950
946
|
It only replaces fields that are provided in the submitted dataset resource.
|
951
947
|
|
952
|
-
This method is deprecated. Please use
|
948
|
+
This method is deprecated. Please use :func:`.update_dataset` instead.
|
953
949
|
|
954
950
|
More info:
|
955
951
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch
|
@@ -993,11 +989,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
993
989
|
table_prefix: str | None = None,
|
994
990
|
max_results: int | None = None,
|
995
991
|
) -> list[dict[str, Any]]:
|
996
|
-
"""
|
997
|
-
Method returns tables list of a BigQuery tables. If table prefix is specified,
|
998
|
-
only tables beginning by it are returned.
|
992
|
+
"""List tables of a BigQuery dataset.
|
999
993
|
|
1000
|
-
|
994
|
+
If a table prefix is specified, only tables beginning by it are
|
995
|
+
returned. This method is deprecated. Please use
|
996
|
+
:func:`.get_dataset_tables` instead.
|
1001
997
|
|
1002
998
|
For more information, see:
|
1003
999
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
|
@@ -1037,8 +1033,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1037
1033
|
retry: Retry = DEFAULT_RETRY,
|
1038
1034
|
return_iterator: bool = False,
|
1039
1035
|
) -> list[DatasetListItem] | HTTPIterator:
|
1040
|
-
"""
|
1041
|
-
Method returns full list of BigQuery datasets in the current project.
|
1036
|
+
"""Get all BigQuery datasets in the current project.
|
1042
1037
|
|
1043
1038
|
For more information, see:
|
1044
1039
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list
|
@@ -1081,16 +1076,15 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1081
1076
|
|
1082
1077
|
@GoogleBaseHook.fallback_to_default_project_id
|
1083
1078
|
def get_dataset(self, dataset_id: str, project_id: str | None = None) -> Dataset:
|
1084
|
-
"""
|
1085
|
-
Fetch the dataset referenced by dataset_id.
|
1079
|
+
"""Fetch the dataset referenced by *dataset_id*.
|
1086
1080
|
|
1087
1081
|
:param dataset_id: The BigQuery Dataset ID
|
1088
1082
|
:param project_id: The Google Cloud Project ID
|
1089
1083
|
:return: dataset_resource
|
1090
1084
|
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1085
|
+
.. seealso::
|
1086
|
+
For more information, see Dataset Resource content:
|
1087
|
+
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
|
1094
1088
|
"""
|
1095
1089
|
dataset = self.get_client(project_id=project_id).get_dataset(
|
1096
1090
|
dataset_ref=DatasetReference(project_id, dataset_id)
|
@@ -1107,10 +1101,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1107
1101
|
view_project: str | None = None,
|
1108
1102
|
project_id: str | None = None,
|
1109
1103
|
) -> dict[str, Any]:
|
1110
|
-
"""
|
1111
|
-
|
1104
|
+
"""Grant authorized view access of a dataset to a view table.
|
1105
|
+
|
1112
1106
|
If this view has already been granted access to the dataset, do nothing.
|
1113
|
-
This method is not atomic.
|
1107
|
+
This method is not atomic. Running it may clobber a simultaneous update.
|
1114
1108
|
|
1115
1109
|
:param source_dataset: the source dataset
|
1116
1110
|
:param view_dataset: the dataset that the view is in
|
@@ -1159,8 +1153,8 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1159
1153
|
def run_table_upsert(
|
1160
1154
|
self, dataset_id: str, table_resource: dict[str, Any], project_id: str | None = None
|
1161
1155
|
) -> dict[str, Any]:
|
1162
|
-
"""
|
1163
|
-
|
1156
|
+
"""Update a table if it exists, otherwise create a new one.
|
1157
|
+
|
1164
1158
|
Since BigQuery does not natively allow table upserts, this is not an
|
1165
1159
|
atomic operation.
|
1166
1160
|
|
@@ -1169,7 +1163,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1169
1163
|
https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
|
1170
1164
|
:param project_id: the project to upsert the table into. If None,
|
1171
1165
|
project will be self.project_id.
|
1172
|
-
:return:
|
1173
1166
|
"""
|
1174
1167
|
table_id = table_resource["tableReference"]["tableId"]
|
1175
1168
|
table_resource = self._resolve_table_reference(
|
@@ -1188,12 +1181,12 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1188
1181
|
return table
|
1189
1182
|
|
1190
1183
|
def run_table_delete(self, deletion_dataset_table: str, ignore_if_missing: bool = False) -> None:
|
1191
|
-
"""
|
1192
|
-
|
1193
|
-
If the table does not exist, return an error unless ignore_if_missing
|
1184
|
+
"""Delete an existing table from the dataset.
|
1185
|
+
|
1186
|
+
If the table does not exist, return an error unless *ignore_if_missing*
|
1194
1187
|
is set to True.
|
1195
1188
|
|
1196
|
-
This method is deprecated. Please use
|
1189
|
+
This method is deprecated. Please use :func:`.delete_table` instead.
|
1197
1190
|
|
1198
1191
|
:param deletion_dataset_table: A dotted
|
1199
1192
|
``(<project>.|<project>:)<dataset>.<table>`` that indicates which table
|
@@ -1214,9 +1207,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1214
1207
|
not_found_ok: bool = True,
|
1215
1208
|
project_id: str | None = None,
|
1216
1209
|
) -> None:
|
1217
|
-
"""
|
1218
|
-
|
1219
|
-
unless not_found_ok is
|
1210
|
+
"""Delete an existing table from the dataset.
|
1211
|
+
|
1212
|
+
If the table does not exist, return an error unless *not_found_ok* is
|
1213
|
+
set to True.
|
1220
1214
|
|
1221
1215
|
:param table_id: A dotted ``(<project>.|<project>:)<dataset>.<table>``
|
1222
1216
|
that indicates which table will be deleted.
|
@@ -1239,12 +1233,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1239
1233
|
page_token: str | None = None,
|
1240
1234
|
start_index: int | None = None,
|
1241
1235
|
) -> list[dict]:
|
1242
|
-
"""
|
1243
|
-
Get the data of a given dataset.table and optionally with selected columns.
|
1236
|
+
"""Get data from given table.
|
1244
1237
|
|
1245
|
-
This method is deprecated. Please use
|
1238
|
+
This method is deprecated. Please use :func:`.list_rows` instead.
|
1246
1239
|
|
1247
|
-
|
1240
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list
|
1248
1241
|
|
1249
1242
|
:param dataset_id: the dataset ID of the requested table.
|
1250
1243
|
:param table_id: the table ID of the requested table.
|
@@ -1281,8 +1274,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1281
1274
|
retry: Retry = DEFAULT_RETRY,
|
1282
1275
|
return_iterator: bool = False,
|
1283
1276
|
) -> list[Row] | RowIterator:
|
1284
|
-
"""
|
1285
|
-
List the rows of the table.
|
1277
|
+
"""List rows in a table.
|
1286
1278
|
|
1287
1279
|
See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
|
1288
1280
|
|
@@ -1331,10 +1323,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1331
1323
|
|
1332
1324
|
@GoogleBaseHook.fallback_to_default_project_id
|
1333
1325
|
def get_schema(self, dataset_id: str, table_id: str, project_id: str | None = None) -> dict:
|
1334
|
-
"""
|
1335
|
-
Get the schema for a given dataset and table.
|
1326
|
+
"""Get the schema for a given dataset and table.
|
1336
1327
|
|
1337
|
-
|
1328
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
|
1338
1329
|
|
1339
1330
|
:param dataset_id: the dataset ID of the requested table
|
1340
1331
|
:param table_id: the table ID of the requested table
|
@@ -1355,32 +1346,37 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1355
1346
|
table_id: str,
|
1356
1347
|
project_id: str | None = None,
|
1357
1348
|
) -> dict[str, Any]:
|
1358
|
-
"""
|
1359
|
-
|
1360
|
-
some fields in schemas are immutable
|
1361
|
-
an exception.
|
1362
|
-
If a new field is included it will be inserted which requires all required fields to be set.
|
1349
|
+
"""Update fields within a schema for a given dataset and table.
|
1350
|
+
|
1351
|
+
Note that some fields in schemas are immutable; trying to change them
|
1352
|
+
will cause an exception.
|
1363
1353
|
|
1364
|
-
|
1354
|
+
If a new field is included, it will be inserted, which requires all
|
1355
|
+
required fields to be set.
|
1356
|
+
|
1357
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema
|
1365
1358
|
|
1366
1359
|
:param include_policy_tags: If set to True policy tags will be included in
|
1367
1360
|
the update request which requires special permissions even if unchanged
|
1368
1361
|
see https://cloud.google.com/bigquery/docs/column-level-security#roles
|
1369
1362
|
:param dataset_id: the dataset ID of the requested table to be updated
|
1370
1363
|
:param table_id: the table ID of the table to be updated
|
1371
|
-
:param schema_fields_updates: a partial schema resource.
|
1364
|
+
:param schema_fields_updates: a partial schema resource. See
|
1372
1365
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema
|
1373
1366
|
|
1374
|
-
|
1367
|
+
.. code-block:: python
|
1375
1368
|
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1369
|
+
schema_fields_updates = [
|
1370
|
+
{"name": "emp_name", "description": "Some New Description"},
|
1371
|
+
{"name": "salary", "description": "Some New Description"},
|
1372
|
+
{
|
1373
|
+
"name": "departments",
|
1374
|
+
"fields": [
|
1375
|
+
{"name": "name", "description": "Some New Description"},
|
1376
|
+
{"name": "type", "description": "Some New Description"},
|
1377
|
+
],
|
1378
|
+
},
|
1379
|
+
]
|
1384
1380
|
|
1385
1381
|
:param project_id: The name of the project where we want to update the table.
|
1386
1382
|
"""
|
@@ -1446,8 +1442,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1446
1442
|
location: str | None = None,
|
1447
1443
|
retry: Retry = DEFAULT_RETRY,
|
1448
1444
|
) -> bool:
|
1449
|
-
"""
|
1450
|
-
Check if jobs completed.
|
1445
|
+
"""Check if jobs have completed.
|
1451
1446
|
|
1452
1447
|
:param job_id: id of the job.
|
1453
1448
|
:param project_id: Google Cloud Project where the job is running
|
@@ -1476,8 +1471,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1476
1471
|
project_id: str | None = None,
|
1477
1472
|
location: str | None = None,
|
1478
1473
|
) -> None:
|
1479
|
-
"""
|
1480
|
-
Cancel a job and wait for cancellation to complete.
|
1474
|
+
"""Cancel a job and wait for cancellation to complete.
|
1481
1475
|
|
1482
1476
|
:param job_id: id of the job.
|
1483
1477
|
:param project_id: Google Cloud Project where the job is running
|
@@ -1521,10 +1515,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1521
1515
|
project_id: str | None = None,
|
1522
1516
|
location: str | None = None,
|
1523
1517
|
) -> CopyJob | QueryJob | LoadJob | ExtractJob | UnknownJob:
|
1524
|
-
"""
|
1525
|
-
Retrieves a BigQuery job.
|
1518
|
+
"""Retrieve a BigQuery job.
|
1526
1519
|
|
1527
|
-
|
1520
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1528
1521
|
|
1529
1522
|
:param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z),
|
1530
1523
|
numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024
|
@@ -1556,11 +1549,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1556
1549
|
retry: Retry = DEFAULT_RETRY,
|
1557
1550
|
timeout: float | None = None,
|
1558
1551
|
) -> BigQueryJob:
|
1559
|
-
"""
|
1560
|
-
Executes a BigQuery job. Waits for the job to complete and returns job id.
|
1552
|
+
"""Execute a BigQuery job and wait for it to complete.
|
1561
1553
|
|
1562
|
-
|
1563
|
-
https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1554
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1564
1555
|
|
1565
1556
|
:param configuration: The configuration parameter maps directly to
|
1566
1557
|
BigQuery's configuration field in the job object. See
|
@@ -1569,12 +1560,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1569
1560
|
:param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z),
|
1570
1561
|
numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024
|
1571
1562
|
characters. If not provided then uuid will be generated.
|
1572
|
-
:param project_id: Google Cloud Project where the job is running
|
1573
|
-
:param location:
|
1574
|
-
:param nowait:
|
1563
|
+
:param project_id: Google Cloud Project where the job is running.
|
1564
|
+
:param location: Location the job is running.
|
1565
|
+
:param nowait: Whether to insert job without waiting for the result.
|
1575
1566
|
:param retry: How to retry the RPC.
|
1576
1567
|
:param timeout: The number of seconds to wait for the underlying HTTP transport
|
1577
1568
|
before using ``retry``.
|
1569
|
+
:return: The job ID.
|
1578
1570
|
"""
|
1579
1571
|
location = location or self.location
|
1580
1572
|
job_id = job_id or self._custom_job_id(configuration)
|
@@ -1611,14 +1603,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1611
1603
|
return job_api_repr
|
1612
1604
|
|
1613
1605
|
def run_with_configuration(self, configuration: dict) -> str:
|
1614
|
-
"""
|
1615
|
-
Executes a BigQuery SQL query.
|
1616
|
-
|
1617
|
-
See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1606
|
+
"""Execute a BigQuery SQL query.
|
1618
1607
|
|
1619
|
-
|
1608
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1620
1609
|
|
1621
|
-
|
1610
|
+
This method is deprecated. Please use :func:`.insert_job` instead.
|
1622
1611
|
|
1623
1612
|
:param configuration: The configuration parameter maps directly to
|
1624
1613
|
BigQuery's configuration field in the job object. See
|
@@ -1658,15 +1647,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1658
1647
|
labels: dict | None = None,
|
1659
1648
|
description: str | None = None,
|
1660
1649
|
) -> str:
|
1661
|
-
"""
|
1662
|
-
Executes a BigQuery load command to load data from Google Cloud Storage
|
1663
|
-
to BigQuery.
|
1650
|
+
"""Load data from Google Cloud Storage to BigQuery.
|
1664
1651
|
|
1665
|
-
|
1652
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1666
1653
|
|
1667
|
-
This method is deprecated. Please use
|
1668
|
-
|
1669
|
-
For more details about these parameters.
|
1654
|
+
This method is deprecated. Please use :func:`.insert_job` instead.
|
1670
1655
|
|
1671
1656
|
:param destination_project_dataset_table:
|
1672
1657
|
The dotted ``(<project>.|<project>:)<dataset>.<table>($<partition>)`` BigQuery
|
@@ -1716,11 +1701,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1716
1701
|
by one or more columns. BigQuery supports clustering for both partitioned and
|
1717
1702
|
non-partitioned tables. The order of columns given determines the sort order.
|
1718
1703
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1719
|
-
|
1704
|
+
|
1705
|
+
.. code-block:: python
|
1720
1706
|
|
1721
1707
|
encryption_configuration = {
|
1722
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key"
|
1708
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
1723
1709
|
}
|
1710
|
+
|
1724
1711
|
:param labels: A dictionary containing labels for the BiqQuery table.
|
1725
1712
|
:param description: A string containing the description for the BigQuery table.
|
1726
1713
|
"""
|
@@ -1885,16 +1872,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1885
1872
|
labels: dict | None = None,
|
1886
1873
|
encryption_configuration: dict | None = None,
|
1887
1874
|
) -> str:
|
1888
|
-
"""
|
1889
|
-
Executes a BigQuery copy command to copy data from one BigQuery table
|
1890
|
-
to another.
|
1875
|
+
"""Copy data from one BigQuery table to another.
|
1891
1876
|
|
1877
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy
|
1892
1878
|
|
1893
|
-
|
1894
|
-
|
1895
|
-
This method is deprecated. Please use `BigQueryHook.insert_job` method.
|
1896
|
-
|
1897
|
-
For more details about these parameters.
|
1879
|
+
This method is deprecated. Please use :func:`.insert_job` instead.
|
1898
1880
|
|
1899
1881
|
:param source_project_dataset_tables: One or more dotted
|
1900
1882
|
``(project:|project.)<dataset>.<table>``
|
@@ -1909,11 +1891,12 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1909
1891
|
:param labels: a dictionary containing labels for the job/query,
|
1910
1892
|
passed to BigQuery
|
1911
1893
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
1912
|
-
**Example**: ::
|
1913
1894
|
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1895
|
+
.. code-block:: python
|
1896
|
+
|
1897
|
+
encryption_configuration = {
|
1898
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
1899
|
+
}
|
1917
1900
|
"""
|
1918
1901
|
warnings.warn(
|
1919
1902
|
"This method is deprecated. Please use `BigQueryHook.insert_job` method.",
|
@@ -1976,15 +1959,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
1976
1959
|
labels: dict | None = None,
|
1977
1960
|
return_full_job: bool = False,
|
1978
1961
|
) -> str | BigQueryJob:
|
1979
|
-
"""
|
1980
|
-
Executes a BigQuery extract command to copy data from BigQuery to
|
1981
|
-
Google Cloud Storage.
|
1982
|
-
|
1983
|
-
See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1962
|
+
"""Copy data from BigQuery to Google Cloud Storage.
|
1984
1963
|
|
1985
|
-
|
1964
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
1986
1965
|
|
1987
|
-
|
1966
|
+
This method is deprecated. Please use :func:`.insert_job` instead.
|
1988
1967
|
|
1989
1968
|
:param source_project_dataset_table: The dotted ``<dataset>.<table>``
|
1990
1969
|
BigQuery table to use as the source data.
|
@@ -2064,13 +2043,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2064
2043
|
location: str | None = None,
|
2065
2044
|
encryption_configuration: dict | None = None,
|
2066
2045
|
) -> str:
|
2067
|
-
"""
|
2068
|
-
|
2069
|
-
table.
|
2046
|
+
"""Execute a BigQuery SQL query.
|
2047
|
+
|
2048
|
+
Optionally persists results in a BigQuery table.
|
2070
2049
|
|
2071
|
-
|
2050
|
+
.. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs
|
2072
2051
|
|
2073
|
-
This method is deprecated. Please use
|
2052
|
+
This method is deprecated. Please use :func:`.insert_job` instead.
|
2074
2053
|
|
2075
2054
|
For more details about these parameters.
|
2076
2055
|
|
@@ -2120,11 +2099,12 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2120
2099
|
US and EU. See details at
|
2121
2100
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
2122
2101
|
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
2123
|
-
**Example**: ::
|
2124
2102
|
|
2125
|
-
|
2126
|
-
|
2127
|
-
|
2103
|
+
.. code-block:: python
|
2104
|
+
|
2105
|
+
encryption_configuration = {
|
2106
|
+
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
2107
|
+
}
|
2128
2108
|
"""
|
2129
2109
|
warnings.warn(
|
2130
2110
|
"This method is deprecated. Please use `BigQueryHook.insert_job` method.",
|
@@ -2283,7 +2263,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2283
2263
|
def split_tablename(
|
2284
2264
|
self, table_input: str, default_project_id: str, var_name: str | None = None
|
2285
2265
|
) -> tuple[str, str, str]:
|
2286
|
-
|
2287
2266
|
if "." not in table_input:
|
2288
2267
|
raise ValueError(f"Expected table name in the format of <dataset>.<table>. Got: {table_input}")
|
2289
2268
|
|
@@ -2344,7 +2323,8 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2344
2323
|
|
2345
2324
|
|
2346
2325
|
class BigQueryConnection:
|
2347
|
-
"""
|
2326
|
+
"""BigQuery connection.
|
2327
|
+
|
2348
2328
|
BigQuery does not have a notion of a persistent connection. Thus, these
|
2349
2329
|
objects are small stateless factories for cursors, which do all the real
|
2350
2330
|
work.
|
@@ -2370,7 +2350,8 @@ class BigQueryConnection:
|
|
2370
2350
|
|
2371
2351
|
|
2372
2352
|
class BigQueryBaseCursor(LoggingMixin):
|
2373
|
-
"""
|
2353
|
+
"""BigQuery cursor.
|
2354
|
+
|
2374
2355
|
The BigQuery base cursor contains helper methods to execute queries against
|
2375
2356
|
BigQuery. The methods can be used directly by operators, in cases where a
|
2376
2357
|
PEP 249 cursor isn't needed.
|
@@ -2401,9 +2382,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2401
2382
|
self.hook = hook
|
2402
2383
|
|
2403
2384
|
def create_empty_table(self, *args, **kwargs):
|
2404
|
-
"""
|
2405
|
-
|
2406
|
-
Please use
|
2385
|
+
"""This method is deprecated.
|
2386
|
+
|
2387
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table`
|
2388
|
+
instead.
|
2407
2389
|
"""
|
2408
2390
|
warnings.warn(
|
2409
2391
|
"This method is deprecated. "
|
@@ -2414,9 +2396,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2414
2396
|
return self.hook.create_empty_table(*args, **kwargs)
|
2415
2397
|
|
2416
2398
|
def create_empty_dataset(self, *args, **kwargs) -> dict[str, Any]:
|
2417
|
-
"""
|
2418
|
-
|
2419
|
-
Please use
|
2399
|
+
"""This method is deprecated.
|
2400
|
+
|
2401
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset`
|
2402
|
+
instead.
|
2420
2403
|
"""
|
2421
2404
|
warnings.warn(
|
2422
2405
|
"This method is deprecated. "
|
@@ -2427,9 +2410,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2427
2410
|
return self.hook.create_empty_dataset(*args, **kwargs)
|
2428
2411
|
|
2429
2412
|
def get_dataset_tables(self, *args, **kwargs) -> list[dict[str, Any]]:
|
2430
|
-
"""
|
2431
|
-
|
2432
|
-
Please use
|
2413
|
+
"""This method is deprecated.
|
2414
|
+
|
2415
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables`
|
2416
|
+
instead.
|
2433
2417
|
"""
|
2434
2418
|
warnings.warn(
|
2435
2419
|
"This method is deprecated. "
|
@@ -2440,9 +2424,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2440
2424
|
return self.hook.get_dataset_tables(*args, **kwargs)
|
2441
2425
|
|
2442
2426
|
def delete_dataset(self, *args, **kwargs) -> None:
|
2443
|
-
"""
|
2444
|
-
|
2445
|
-
Please use
|
2427
|
+
"""This method is deprecated.
|
2428
|
+
|
2429
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset`
|
2430
|
+
instead.
|
2446
2431
|
"""
|
2447
2432
|
warnings.warn(
|
2448
2433
|
"This method is deprecated. "
|
@@ -2453,9 +2438,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2453
2438
|
return self.hook.delete_dataset(*args, **kwargs)
|
2454
2439
|
|
2455
2440
|
def create_external_table(self, *args, **kwargs):
|
2456
|
-
"""
|
2457
|
-
|
2458
|
-
Please use
|
2441
|
+
"""This method is deprecated.
|
2442
|
+
|
2443
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table`
|
2444
|
+
instead.
|
2459
2445
|
"""
|
2460
2446
|
warnings.warn(
|
2461
2447
|
"This method is deprecated. "
|
@@ -2466,9 +2452,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2466
2452
|
return self.hook.create_external_table(*args, **kwargs)
|
2467
2453
|
|
2468
2454
|
def patch_table(self, *args, **kwargs) -> None:
|
2469
|
-
"""
|
2470
|
-
|
2471
|
-
Please use
|
2455
|
+
"""This method is deprecated.
|
2456
|
+
|
2457
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table`
|
2458
|
+
instead.
|
2472
2459
|
"""
|
2473
2460
|
warnings.warn(
|
2474
2461
|
"This method is deprecated. "
|
@@ -2479,9 +2466,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2479
2466
|
return self.hook.patch_table(*args, **kwargs)
|
2480
2467
|
|
2481
2468
|
def insert_all(self, *args, **kwargs) -> None:
|
2482
|
-
"""
|
2483
|
-
|
2484
|
-
Please use
|
2469
|
+
"""This method is deprecated.
|
2470
|
+
|
2471
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all`
|
2472
|
+
instead.
|
2485
2473
|
"""
|
2486
2474
|
warnings.warn(
|
2487
2475
|
"This method is deprecated. "
|
@@ -2492,9 +2480,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2492
2480
|
return self.hook.insert_all(*args, **kwargs)
|
2493
2481
|
|
2494
2482
|
def update_dataset(self, *args, **kwargs) -> dict:
|
2495
|
-
"""
|
2496
|
-
|
2497
|
-
Please use
|
2483
|
+
"""This method is deprecated.
|
2484
|
+
|
2485
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset`
|
2486
|
+
instead.
|
2498
2487
|
"""
|
2499
2488
|
warnings.warn(
|
2500
2489
|
"This method is deprecated. "
|
@@ -2505,9 +2494,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2505
2494
|
return Dataset.to_api_repr(self.hook.update_dataset(*args, **kwargs))
|
2506
2495
|
|
2507
2496
|
def patch_dataset(self, *args, **kwargs) -> dict:
|
2508
|
-
"""
|
2509
|
-
|
2510
|
-
Please use
|
2497
|
+
"""This method is deprecated.
|
2498
|
+
|
2499
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset`
|
2500
|
+
instead.
|
2511
2501
|
"""
|
2512
2502
|
warnings.warn(
|
2513
2503
|
"This method is deprecated. "
|
@@ -2518,9 +2508,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2518
2508
|
return self.hook.patch_dataset(*args, **kwargs)
|
2519
2509
|
|
2520
2510
|
def get_dataset_tables_list(self, *args, **kwargs) -> list[dict[str, Any]]:
|
2521
|
-
"""
|
2522
|
-
|
2523
|
-
Please use
|
2511
|
+
"""This method is deprecated.
|
2512
|
+
|
2513
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list`
|
2514
|
+
instead.
|
2524
2515
|
"""
|
2525
2516
|
warnings.warn(
|
2526
2517
|
"This method is deprecated. "
|
@@ -2531,9 +2522,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2531
2522
|
return self.hook.get_dataset_tables_list(*args, **kwargs)
|
2532
2523
|
|
2533
2524
|
def get_datasets_list(self, *args, **kwargs) -> list | HTTPIterator:
|
2534
|
-
"""
|
2535
|
-
|
2536
|
-
Please use
|
2525
|
+
"""This method is deprecated.
|
2526
|
+
|
2527
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list`
|
2528
|
+
instead.
|
2537
2529
|
"""
|
2538
2530
|
warnings.warn(
|
2539
2531
|
"This method is deprecated. "
|
@@ -2544,9 +2536,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2544
2536
|
return self.hook.get_datasets_list(*args, **kwargs)
|
2545
2537
|
|
2546
2538
|
def get_dataset(self, *args, **kwargs) -> Dataset:
|
2547
|
-
"""
|
2548
|
-
|
2549
|
-
Please use
|
2539
|
+
"""This method is deprecated.
|
2540
|
+
|
2541
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset`
|
2542
|
+
instead.
|
2550
2543
|
"""
|
2551
2544
|
warnings.warn(
|
2552
2545
|
"This method is deprecated. "
|
@@ -2557,9 +2550,11 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2557
2550
|
return self.hook.get_dataset(*args, **kwargs)
|
2558
2551
|
|
2559
2552
|
def run_grant_dataset_view_access(self, *args, **kwargs) -> dict:
|
2560
|
-
"""
|
2561
|
-
|
2562
|
-
Please use
|
2553
|
+
"""This method is deprecated.
|
2554
|
+
|
2555
|
+
Please use
|
2556
|
+
:func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access`
|
2557
|
+
instead.
|
2563
2558
|
"""
|
2564
2559
|
warnings.warn(
|
2565
2560
|
"This method is deprecated. "
|
@@ -2571,9 +2566,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2571
2566
|
return self.hook.run_grant_dataset_view_access(*args, **kwargs)
|
2572
2567
|
|
2573
2568
|
def run_table_upsert(self, *args, **kwargs) -> dict:
|
2574
|
-
"""
|
2575
|
-
|
2576
|
-
Please use
|
2569
|
+
"""This method is deprecated.
|
2570
|
+
|
2571
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert`
|
2572
|
+
instead.
|
2577
2573
|
"""
|
2578
2574
|
warnings.warn(
|
2579
2575
|
"This method is deprecated. "
|
@@ -2584,9 +2580,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2584
2580
|
return self.hook.run_table_upsert(*args, **kwargs)
|
2585
2581
|
|
2586
2582
|
def run_table_delete(self, *args, **kwargs) -> None:
|
2587
|
-
"""
|
2588
|
-
|
2589
|
-
Please use
|
2583
|
+
"""This method is deprecated.
|
2584
|
+
|
2585
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete`
|
2586
|
+
instead.
|
2590
2587
|
"""
|
2591
2588
|
warnings.warn(
|
2592
2589
|
"This method is deprecated. "
|
@@ -2597,9 +2594,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2597
2594
|
return self.hook.run_table_delete(*args, **kwargs)
|
2598
2595
|
|
2599
2596
|
def get_tabledata(self, *args, **kwargs) -> list[dict]:
|
2600
|
-
"""
|
2601
|
-
|
2602
|
-
Please use
|
2597
|
+
"""This method is deprecated.
|
2598
|
+
|
2599
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata`
|
2600
|
+
instead.
|
2603
2601
|
"""
|
2604
2602
|
warnings.warn(
|
2605
2603
|
"This method is deprecated. "
|
@@ -2610,9 +2608,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2610
2608
|
return self.hook.get_tabledata(*args, **kwargs)
|
2611
2609
|
|
2612
2610
|
def get_schema(self, *args, **kwargs) -> dict:
|
2613
|
-
"""
|
2614
|
-
|
2615
|
-
Please use
|
2611
|
+
"""This method is deprecated.
|
2612
|
+
|
2613
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema`
|
2614
|
+
instead.
|
2616
2615
|
"""
|
2617
2616
|
warnings.warn(
|
2618
2617
|
"This method is deprecated. "
|
@@ -2623,9 +2622,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2623
2622
|
return self.hook.get_schema(*args, **kwargs)
|
2624
2623
|
|
2625
2624
|
def poll_job_complete(self, *args, **kwargs) -> bool:
|
2626
|
-
"""
|
2627
|
-
|
2628
|
-
Please use
|
2625
|
+
"""This method is deprecated.
|
2626
|
+
|
2627
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete`
|
2628
|
+
instead.
|
2629
2629
|
"""
|
2630
2630
|
warnings.warn(
|
2631
2631
|
"This method is deprecated. "
|
@@ -2636,9 +2636,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2636
2636
|
return self.hook.poll_job_complete(*args, **kwargs)
|
2637
2637
|
|
2638
2638
|
def cancel_query(self, *args, **kwargs) -> None:
|
2639
|
-
"""
|
2640
|
-
|
2641
|
-
Please use
|
2639
|
+
"""This method is deprecated.
|
2640
|
+
|
2641
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query`
|
2642
|
+
instead.
|
2642
2643
|
"""
|
2643
2644
|
warnings.warn(
|
2644
2645
|
"This method is deprecated. "
|
@@ -2649,9 +2650,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2649
2650
|
return self.hook.cancel_query(*args, **kwargs) # type: ignore
|
2650
2651
|
|
2651
2652
|
def run_with_configuration(self, *args, **kwargs) -> str:
|
2652
|
-
"""
|
2653
|
-
|
2654
|
-
Please use
|
2653
|
+
"""This method is deprecated.
|
2654
|
+
|
2655
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration`
|
2656
|
+
instead.
|
2655
2657
|
"""
|
2656
2658
|
warnings.warn(
|
2657
2659
|
"This method is deprecated. "
|
@@ -2662,9 +2664,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2662
2664
|
return self.hook.run_with_configuration(*args, **kwargs)
|
2663
2665
|
|
2664
2666
|
def run_load(self, *args, **kwargs) -> str:
|
2665
|
-
"""
|
2666
|
-
|
2667
|
-
Please use
|
2667
|
+
"""This method is deprecated.
|
2668
|
+
|
2669
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load`
|
2670
|
+
instead.
|
2668
2671
|
"""
|
2669
2672
|
warnings.warn(
|
2670
2673
|
"This method is deprecated. "
|
@@ -2675,9 +2678,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2675
2678
|
return self.hook.run_load(*args, **kwargs)
|
2676
2679
|
|
2677
2680
|
def run_copy(self, *args, **kwargs) -> str:
|
2678
|
-
"""
|
2679
|
-
|
2680
|
-
Please use
|
2681
|
+
"""This method is deprecated.
|
2682
|
+
|
2683
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy`
|
2684
|
+
instead.
|
2681
2685
|
"""
|
2682
2686
|
warnings.warn(
|
2683
2687
|
"This method is deprecated. "
|
@@ -2688,9 +2692,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2688
2692
|
return self.hook.run_copy(*args, **kwargs)
|
2689
2693
|
|
2690
2694
|
def run_extract(self, *args, **kwargs) -> str | BigQueryJob:
|
2691
|
-
"""
|
2692
|
-
|
2693
|
-
Please use
|
2695
|
+
"""This method is deprecated.
|
2696
|
+
|
2697
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract`
|
2698
|
+
instead.
|
2694
2699
|
"""
|
2695
2700
|
warnings.warn(
|
2696
2701
|
"This method is deprecated. "
|
@@ -2701,9 +2706,10 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2701
2706
|
return self.hook.run_extract(*args, **kwargs)
|
2702
2707
|
|
2703
2708
|
def run_query(self, *args, **kwargs) -> str:
|
2704
|
-
"""
|
2705
|
-
|
2706
|
-
Please use
|
2709
|
+
"""This method is deprecated.
|
2710
|
+
|
2711
|
+
Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query`
|
2712
|
+
instead.
|
2707
2713
|
"""
|
2708
2714
|
warnings.warn(
|
2709
2715
|
"This method is deprecated. "
|
@@ -2715,9 +2721,9 @@ class BigQueryBaseCursor(LoggingMixin):
|
|
2715
2721
|
|
2716
2722
|
|
2717
2723
|
class BigQueryCursor(BigQueryBaseCursor):
|
2718
|
-
"""
|
2719
|
-
|
2720
|
-
implementation was used as a reference
|
2724
|
+
"""A very basic BigQuery PEP 249 cursor implementation.
|
2725
|
+
|
2726
|
+
The PyHive PEP 249 implementation was used as a reference:
|
2721
2727
|
|
2722
2728
|
https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py
|
2723
2729
|
https://github.com/dropbox/PyHive/blob/master/pyhive/common.py
|
@@ -2765,8 +2771,7 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2765
2771
|
return -1
|
2766
2772
|
|
2767
2773
|
def execute(self, operation: str, parameters: dict | None = None) -> None:
|
2768
|
-
"""
|
2769
|
-
Executes a BigQuery query, and returns the job ID.
|
2774
|
+
"""Execute a BigQuery query, and return the job ID.
|
2770
2775
|
|
2771
2776
|
:param operation: The query to execute.
|
2772
2777
|
:param parameters: Parameters to substitute into the query.
|
@@ -2782,8 +2787,7 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2782
2787
|
self.description = []
|
2783
2788
|
|
2784
2789
|
def executemany(self, operation: str, seq_of_parameters: list) -> None:
|
2785
|
-
"""
|
2786
|
-
Execute a BigQuery query multiple times with different parameters.
|
2790
|
+
"""Execute a BigQuery query multiple times with different parameters.
|
2787
2791
|
|
2788
2792
|
:param operation: The query to execute.
|
2789
2793
|
:param seq_of_parameters: List of dictionary parameters to substitute into the
|
@@ -2804,8 +2808,10 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2804
2808
|
return self.next()
|
2805
2809
|
|
2806
2810
|
def next(self) -> list | None:
|
2807
|
-
"""
|
2808
|
-
|
2811
|
+
"""Return the next row from a buffer.
|
2812
|
+
|
2813
|
+
Helper method for ``fetchone``.
|
2814
|
+
|
2809
2815
|
If the buffer is empty, attempts to paginate through the result set for
|
2810
2816
|
the next page, and load it into the buffer.
|
2811
2817
|
"""
|
@@ -2838,16 +2844,22 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2838
2844
|
return self.buffer.pop(0)
|
2839
2845
|
|
2840
2846
|
def fetchmany(self, size: int | None = None) -> list:
|
2841
|
-
"""
|
2842
|
-
|
2843
|
-
(e.g. a list of tuples). An empty
|
2844
|
-
|
2845
|
-
|
2846
|
-
|
2847
|
-
|
2848
|
-
|
2849
|
-
|
2850
|
-
|
2847
|
+
"""Fetch the next set of rows of a query result.
|
2848
|
+
|
2849
|
+
This returns a sequence of sequences (e.g. a list of tuples). An empty
|
2850
|
+
sequence is returned when no more rows are available.
|
2851
|
+
|
2852
|
+
The number of rows to fetch per call is specified by the parameter. If
|
2853
|
+
it is not given, the cursor's arraysize determines the number of rows to
|
2854
|
+
be fetched.
|
2855
|
+
|
2856
|
+
This method tries to fetch as many rows as indicated by the size
|
2857
|
+
parameter. If this is not possible due to the specified number of rows
|
2858
|
+
not being available, fewer rows may be returned.
|
2859
|
+
|
2860
|
+
An :py:class:`~pyhive.exc.Error` (or subclass) exception is raised if
|
2861
|
+
the previous call to :py:meth:`execute` did not produce any result set,
|
2862
|
+
or no call was issued yet.
|
2851
2863
|
"""
|
2852
2864
|
if size is None:
|
2853
2865
|
size = self.arraysize
|
@@ -2860,9 +2872,9 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2860
2872
|
return result
|
2861
2873
|
|
2862
2874
|
def fetchall(self) -> list[list]:
|
2863
|
-
"""
|
2864
|
-
|
2865
|
-
sequences (e.g. a list of tuples).
|
2875
|
+
"""Fetch all (remaining) rows of a query result.
|
2876
|
+
|
2877
|
+
A sequence of sequences (e.g. a list of tuples) is returned.
|
2866
2878
|
"""
|
2867
2879
|
result = []
|
2868
2880
|
while True:
|
@@ -2873,11 +2885,17 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2873
2885
|
return result
|
2874
2886
|
|
2875
2887
|
def get_arraysize(self) -> int:
|
2876
|
-
"""
|
2888
|
+
"""Number of rows to fetch at a time.
|
2889
|
+
|
2890
|
+
.. seealso:: :func:`.fetchmany()`
|
2891
|
+
"""
|
2877
2892
|
return self.buffersize or 1
|
2878
2893
|
|
2879
2894
|
def set_arraysize(self, arraysize: int) -> None:
|
2880
|
-
"""
|
2895
|
+
"""Set the number of rows to fetch at a time.
|
2896
|
+
|
2897
|
+
.. seealso:: :func:`.fetchmany()`
|
2898
|
+
"""
|
2881
2899
|
self.buffersize = arraysize
|
2882
2900
|
|
2883
2901
|
arraysize = property(get_arraysize, set_arraysize)
|
@@ -2889,7 +2907,7 @@ class BigQueryCursor(BigQueryBaseCursor):
|
|
2889
2907
|
"""Does nothing by default."""
|
2890
2908
|
|
2891
2909
|
def _get_query_result(self) -> dict:
|
2892
|
-
"""Get job query results
|
2910
|
+
"""Get job query results; data, schema, job type, etc."""
|
2893
2911
|
query_results = (
|
2894
2912
|
self.service.jobs()
|
2895
2913
|
.getQueryResults(
|
@@ -2993,10 +3011,8 @@ def split_tablename(
|
|
2993
3011
|
def _cleanse_time_partitioning(
|
2994
3012
|
destination_dataset_table: str | None, time_partitioning_in: dict | None
|
2995
3013
|
) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option
|
2996
|
-
|
2997
3014
|
if time_partitioning_in is None:
|
2998
3015
|
time_partitioning_in = {}
|
2999
|
-
|
3000
3016
|
time_partitioning_out = {}
|
3001
3017
|
if destination_dataset_table and "$" in destination_dataset_table:
|
3002
3018
|
time_partitioning_out["type"] = "DAY"
|
@@ -3005,7 +3021,7 @@ def _cleanse_time_partitioning(
|
|
3005
3021
|
|
3006
3022
|
|
3007
3023
|
def _validate_value(key: Any, value: Any, expected_type: type | tuple[type]) -> None:
|
3008
|
-
"""
|
3024
|
+
"""Check expected type and raise error if type is not correct."""
|
3009
3025
|
if not isinstance(value, expected_type):
|
3010
3026
|
raise TypeError(f"{key} argument must have a type {expected_type} not {type(value)}")
|
3011
3027
|
|
@@ -3030,9 +3046,9 @@ def _validate_src_fmt_configs(
|
|
3030
3046
|
valid_configs: list[str],
|
3031
3047
|
backward_compatibility_configs: dict | None = None,
|
3032
3048
|
) -> dict:
|
3033
|
-
"""
|
3034
|
-
|
3035
|
-
Adds the backward compatibility config to
|
3049
|
+
"""Validate ``src_fmt_configs`` against a valid config for the source format.
|
3050
|
+
|
3051
|
+
Adds the backward compatibility config to ``src_fmt_configs``.
|
3036
3052
|
|
3037
3053
|
:param source_format: File format to export.
|
3038
3054
|
:param src_fmt_configs: Configure optional fields specific to the source format.
|
@@ -3054,9 +3070,10 @@ def _validate_src_fmt_configs(
|
|
3054
3070
|
|
3055
3071
|
|
3056
3072
|
def _format_schema_for_description(schema: dict) -> list:
|
3057
|
-
"""
|
3058
|
-
|
3059
|
-
of 7 elemenbts
|
3073
|
+
"""Reformat the schema to match cursor description standard.
|
3074
|
+
|
3075
|
+
The description should be a tuple of 7 elemenbts: name, type, display_size,
|
3076
|
+
internal_size, precision, scale, null_ok.
|
3060
3077
|
"""
|
3061
3078
|
description = []
|
3062
3079
|
for field in schema["fields"]:
|
@@ -3091,8 +3108,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3091
3108
|
job_id: str | None,
|
3092
3109
|
project_id: str | None = None,
|
3093
3110
|
) -> str | None:
|
3094
|
-
"""
|
3095
|
-
Polls for job status asynchronously using gcloud-aio.
|
3111
|
+
"""Poll for job status asynchronously using gcloud-aio.
|
3096
3112
|
|
3097
3113
|
Note that an OSError is raised when Job results are still pending.
|
3098
3114
|
Exception means that Job finished with errors
|
@@ -3116,7 +3132,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3116
3132
|
job_id: str | None,
|
3117
3133
|
project_id: str | None = None,
|
3118
3134
|
) -> dict[str, Any]:
|
3119
|
-
"""Get the
|
3135
|
+
"""Get the BigQuery job output for a given job ID asynchronously."""
|
3120
3136
|
async with ClientSession() as session:
|
3121
3137
|
self.log.info("Executing get_job_output..")
|
3122
3138
|
job_client = await self.get_job_instance(project_id, job_id, session)
|
@@ -3142,8 +3158,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3142
3158
|
return job_query_resp["jobReference"]["jobId"]
|
3143
3159
|
|
3144
3160
|
def get_records(self, query_results: dict[str, Any], as_dict: bool = False) -> list[Any]:
|
3145
|
-
"""
|
3146
|
-
Given the output query response from gcloud-aio bigquery, convert the response to records.
|
3161
|
+
"""Convert a response from BigQuery to records.
|
3147
3162
|
|
3148
3163
|
:param query_results: the results from a SQL query
|
3149
3164
|
:param as_dict: if True returns the result as a list of dictionaries, otherwise as list of lists.
|
@@ -3170,10 +3185,9 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3170
3185
|
records: list[Any],
|
3171
3186
|
tolerance: float | None = None,
|
3172
3187
|
) -> None:
|
3173
|
-
"""
|
3174
|
-
Match a single query resulting row and tolerance with pass_value.
|
3188
|
+
"""Match a single query resulting row and tolerance with pass_value.
|
3175
3189
|
|
3176
|
-
:
|
3190
|
+
:raise AirflowException: if matching fails
|
3177
3191
|
"""
|
3178
3192
|
if not records:
|
3179
3193
|
raise AirflowException("The query returned None")
|
@@ -3208,8 +3222,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3208
3222
|
def _get_numeric_matches(
|
3209
3223
|
records: list[float], pass_value: Any, tolerance: float | None = None
|
3210
3224
|
) -> list[bool]:
|
3211
|
-
"""
|
3212
|
-
A helper function to match numeric pass_value, tolerance with records value.
|
3225
|
+
"""Match numeric pass_value, tolerance with records value.
|
3213
3226
|
|
3214
3227
|
:param records: List of value to match against
|
3215
3228
|
:param pass_value: Expected value
|
@@ -3224,8 +3237,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3224
3237
|
|
3225
3238
|
@staticmethod
|
3226
3239
|
def _convert_to_float_if_possible(s: Any) -> Any:
|
3227
|
-
"""
|
3228
|
-
A small helper function to convert a string to a numeric value if appropriate.
|
3240
|
+
"""Convert a string to a numeric value if appropriate.
|
3229
3241
|
|
3230
3242
|
:param s: the string to be converted
|
3231
3243
|
"""
|
@@ -3242,8 +3254,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3242
3254
|
ignore_zero: bool,
|
3243
3255
|
ratio_formula: str,
|
3244
3256
|
) -> None:
|
3245
|
-
"""
|
3246
|
-
Checks that the values of metrics given as SQL expressions are within a certain tolerance.
|
3257
|
+
"""Check values of metrics (SQL expressions) are within a certain tolerance.
|
3247
3258
|
|
3248
3259
|
:param row1: first resulting row of a query execution job for first SQL query
|
3249
3260
|
:param row2: first resulting row of a query execution job for second SQL query
|
@@ -3323,15 +3334,14 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3323
3334
|
|
3324
3335
|
|
3325
3336
|
class BigQueryTableAsyncHook(GoogleBaseAsyncHook):
|
3326
|
-
"""
|
3337
|
+
"""Async hook for BigQuery Table."""
|
3327
3338
|
|
3328
3339
|
sync_hook_class = BigQueryHook
|
3329
3340
|
|
3330
3341
|
async def get_table_client(
|
3331
3342
|
self, dataset: str, table_id: str, project_id: str, session: ClientSession
|
3332
3343
|
) -> Table_async:
|
3333
|
-
"""
|
3334
|
-
Returns a Google Big Query Table object.
|
3344
|
+
"""Get a Google Big Query Table object.
|
3335
3345
|
|
3336
3346
|
:param dataset: The name of the dataset in which to look for the table storage bucket.
|
3337
3347
|
:param table_id: The name of the table to check the existence of.
|