apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. airflow/providers/google/__init__.py +5 -8
  2. airflow/providers/google/cloud/hooks/automl.py +35 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +126 -41
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
  19. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  20. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  21. airflow/providers/google/cloud/operators/automl.py +243 -37
  22. airflow/providers/google/cloud/operators/bigquery.py +164 -62
  23. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  24. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  25. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  26. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  27. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  28. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  29. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  30. airflow/providers/google/cloud/operators/compute.py +12 -11
  31. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  32. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  33. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  34. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  35. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  36. airflow/providers/google/cloud/operators/dataproc.py +20 -17
  37. airflow/providers/google/cloud/operators/datastore.py +8 -7
  38. airflow/providers/google/cloud/operators/dlp.py +31 -30
  39. airflow/providers/google/cloud/operators/functions.py +4 -3
  40. airflow/providers/google/cloud/operators/gcs.py +66 -41
  41. airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
  42. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  43. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  44. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  45. airflow/providers/google/cloud/operators/spanner.py +7 -6
  46. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  47. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  48. airflow/providers/google/cloud/operators/tasks.py +14 -13
  49. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  51. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  52. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  53. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  54. airflow/providers/google/cloud/operators/vision.py +13 -12
  55. airflow/providers/google/cloud/operators/workflows.py +12 -14
  56. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  58. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  59. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  60. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  61. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  62. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  63. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  64. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  65. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  66. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  67. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  71. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  72. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  73. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  74. airflow/providers/google/cloud/triggers/bigquery.py +75 -6
  75. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  76. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  77. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  78. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  79. airflow/providers/google/cloud/triggers/dataproc.py +190 -27
  80. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
  81. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  82. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  83. airflow/providers/google/common/hooks/base_google.py +45 -7
  84. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  85. airflow/providers/google/firebase/operators/firestore.py +2 -1
  86. airflow/providers/google/get_provider_info.py +5 -3
  87. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
  89. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  90. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
  91. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -25,18 +25,15 @@ from __future__ import annotations
25
25
 
26
26
  import packaging.version
27
27
 
28
- __all__ = ["__version__"]
28
+ from airflow import __version__ as airflow_version
29
29
 
30
- __version__ = "10.17.0"
30
+ __all__ = ["__version__"]
31
31
 
32
- try:
33
- from airflow import __version__ as airflow_version
34
- except ImportError:
35
- from airflow.version import version as airflow_version
32
+ __version__ = "10.18.0"
36
33
 
37
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
38
- "2.6.0"
35
+ "2.7.0"
39
36
  ):
40
37
  raise RuntimeError(
41
- f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.6.0+"
38
+ f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.7.0+"
42
39
  )
@@ -529,7 +529,7 @@ class CloudAutoMLHook(GoogleBaseHook):
529
529
  self,
530
530
  dataset_id: str,
531
531
  location: str,
532
- project_id: str | None = None,
532
+ project_id: str = PROVIDE_PROJECT_ID,
533
533
  filter_: str | None = None,
534
534
  page_size: int | None = None,
535
535
  retry: Retry | _MethodDefault = DEFAULT,
@@ -640,3 +640,37 @@ class CloudAutoMLHook(GoogleBaseHook):
640
640
  metadata=metadata,
641
641
  )
642
642
  return result
643
+
644
+ @GoogleBaseHook.fallback_to_default_project_id
645
+ def get_dataset(
646
+ self,
647
+ dataset_id: str,
648
+ location: str,
649
+ project_id: str,
650
+ retry: Retry | _MethodDefault = DEFAULT,
651
+ timeout: float | None = None,
652
+ metadata: Sequence[tuple[str, str]] = (),
653
+ ) -> Dataset:
654
+ """
655
+ Retrieve the dataset for the given dataset_id.
656
+
657
+ :param dataset_id: ID of dataset to be retrieved.
658
+ :param location: The location of the project.
659
+ :param project_id: ID of the Google Cloud project where dataset is located if None then
660
+ default project_id is used.
661
+ :param retry: A retry object used to retry requests. If `None` is specified, requests will not be
662
+ retried.
663
+ :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
664
+ `retry` is specified, the timeout applies to each individual attempt.
665
+ :param metadata: Additional metadata that is provided to the method.
666
+
667
+ :return: `google.cloud.automl_v1beta1.types.dataset.Dataset` instance.
668
+ """
669
+ client = self.get_conn()
670
+ name = f"projects/{project_id}/locations/{location}/datasets/{dataset_id}"
671
+ return client.get_dataset(
672
+ request={"name": name},
673
+ retry=retry,
674
+ timeout=timeout,
675
+ metadata=metadata,
676
+ )
@@ -46,7 +46,14 @@ from google.cloud.bigquery import (
46
46
  UnknownJob,
47
47
  )
48
48
  from google.cloud.bigquery.dataset import AccessEntry, Dataset, DatasetListItem, DatasetReference
49
- from google.cloud.bigquery.table import EncryptionConfiguration, Row, RowIterator, Table, TableReference
49
+ from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
50
+ from google.cloud.bigquery.table import (
51
+ EncryptionConfiguration,
52
+ Row,
53
+ RowIterator,
54
+ Table,
55
+ TableReference,
56
+ )
50
57
  from google.cloud.exceptions import NotFound
51
58
  from googleapiclient.discovery import Resource, build
52
59
  from pandas_gbq import read_gbq
@@ -59,13 +66,13 @@ from airflow.providers.common.sql.hooks.sql import DbApiHook
59
66
  from airflow.providers.google.cloud.utils.bigquery import bq_cast
60
67
  from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes
61
68
  from airflow.providers.google.common.consts import CLIENT_INFO
62
- from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHook, GoogleBaseHook, get_field
63
-
64
- try:
65
- from airflow.utils.hashlib_wrapper import md5
66
- except ModuleNotFoundError:
67
- # Remove when Airflow providers min Airflow version is "2.7.0"
68
- from hashlib import md5
69
+ from airflow.providers.google.common.hooks.base_google import (
70
+ PROVIDE_PROJECT_ID,
71
+ GoogleBaseAsyncHook,
72
+ GoogleBaseHook,
73
+ get_field,
74
+ )
75
+ from airflow.utils.hashlib_wrapper import md5
69
76
  from airflow.utils.helpers import convert_camel_to_snake
70
77
  from airflow.utils.log.logging_mixin import LoggingMixin
71
78
 
@@ -198,7 +205,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
198
205
  http_authorized = self._authorize()
199
206
  return build("bigquery", "v2", http=http_authorized, cache_discovery=False)
200
207
 
201
- def get_client(self, project_id: str | None = None, location: str | None = None) -> Client:
208
+ def get_client(self, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None) -> Client:
202
209
  """Get an authenticated BigQuery Client.
203
210
 
204
211
  :param project_id: Project ID for the project which the client acts on behalf of.
@@ -250,7 +257,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
250
257
  @staticmethod
251
258
  def _resolve_table_reference(
252
259
  table_resource: dict[str, Any],
253
- project_id: str | None = None,
260
+ project_id: str = PROVIDE_PROJECT_ID,
254
261
  dataset_id: str | None = None,
255
262
  table_id: str | None = None,
256
263
  ) -> dict[str, Any]:
@@ -360,7 +367,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
360
367
  @GoogleBaseHook.fallback_to_default_project_id
361
368
  def create_empty_table(
362
369
  self,
363
- project_id: str | None = None,
370
+ project_id: str = PROVIDE_PROJECT_ID,
364
371
  dataset_id: str | None = None,
365
372
  table_id: str | None = None,
366
373
  table_resource: dict[str, Any] | None = None,
@@ -474,7 +481,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
474
481
  def create_empty_dataset(
475
482
  self,
476
483
  dataset_id: str | None = None,
477
- project_id: str | None = None,
484
+ project_id: str = PROVIDE_PROJECT_ID,
478
485
  location: str | None = None,
479
486
  dataset_reference: dict[str, Any] | None = None,
480
487
  exists_ok: bool = True,
@@ -536,7 +543,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
536
543
  def get_dataset_tables(
537
544
  self,
538
545
  dataset_id: str,
539
- project_id: str | None = None,
546
+ project_id: str = PROVIDE_PROJECT_ID,
540
547
  max_results: int | None = None,
541
548
  retry: Retry = DEFAULT_RETRY,
542
549
  ) -> list[dict[str, Any]]:
@@ -565,7 +572,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
565
572
  def delete_dataset(
566
573
  self,
567
574
  dataset_id: str,
568
- project_id: str | None = None,
575
+ project_id: str = PROVIDE_PROJECT_ID,
569
576
  delete_contents: bool = False,
570
577
  retry: Retry = DEFAULT_RETRY,
571
578
  ) -> None:
@@ -614,7 +621,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
614
621
  description: str | None = None,
615
622
  encryption_configuration: dict | None = None,
616
623
  location: str | None = None,
617
- project_id: str | None = None,
624
+ project_id: str = PROVIDE_PROJECT_ID,
618
625
  ) -> Table:
619
626
  """Create an external table in the dataset with data from Google Cloud Storage.
620
627
 
@@ -750,7 +757,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
750
757
  fields: list[str] | None = None,
751
758
  dataset_id: str | None = None,
752
759
  table_id: str | None = None,
753
- project_id: str | None = None,
760
+ project_id: str = PROVIDE_PROJECT_ID,
754
761
  ) -> dict[str, Any]:
755
762
  """Change some fields of a table.
756
763
 
@@ -796,7 +803,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
796
803
  self,
797
804
  dataset_id: str,
798
805
  table_id: str,
799
- project_id: str | None = None,
806
+ project_id: str = PROVIDE_PROJECT_ID,
800
807
  description: str | None = None,
801
808
  expiration_time: int | None = None,
802
809
  external_data_configuration: dict | None = None,
@@ -953,7 +960,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
953
960
  fields: Sequence[str],
954
961
  dataset_resource: dict[str, Any],
955
962
  dataset_id: str | None = None,
956
- project_id: str | None = None,
963
+ project_id: str = PROVIDE_PROJECT_ID,
957
964
  retry: Retry = DEFAULT_RETRY,
958
965
  ) -> Dataset:
959
966
  """Change some fields of a dataset.
@@ -999,7 +1006,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
999
1006
  ),
1000
1007
  category=AirflowProviderDeprecationWarning,
1001
1008
  )
1002
- def patch_dataset(self, dataset_id: str, dataset_resource: dict, project_id: str | None = None) -> dict:
1009
+ def patch_dataset(
1010
+ self, dataset_id: str, dataset_resource: dict, project_id: str = PROVIDE_PROJECT_ID
1011
+ ) -> dict:
1003
1012
  """Patches information in an existing dataset.
1004
1013
 
1005
1014
  It only replaces fields that are provided in the submitted dataset resource.
@@ -1047,7 +1056,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1047
1056
  def get_dataset_tables_list(
1048
1057
  self,
1049
1058
  dataset_id: str,
1050
- project_id: str | None = None,
1059
+ project_id: str = PROVIDE_PROJECT_ID,
1051
1060
  table_prefix: str | None = None,
1052
1061
  max_results: int | None = None,
1053
1062
  ) -> list[dict[str, Any]]:
@@ -1084,7 +1093,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1084
1093
  @GoogleBaseHook.fallback_to_default_project_id
1085
1094
  def get_datasets_list(
1086
1095
  self,
1087
- project_id: str | None = None,
1096
+ project_id: str = PROVIDE_PROJECT_ID,
1088
1097
  include_all: bool = False,
1089
1098
  filter_: str | None = None,
1090
1099
  max_results: int | None = None,
@@ -1134,7 +1143,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1134
1143
  return datasets_list
1135
1144
 
1136
1145
  @GoogleBaseHook.fallback_to_default_project_id
1137
- def get_dataset(self, dataset_id: str, project_id: str | None = None) -> Dataset:
1146
+ def get_dataset(self, dataset_id: str, project_id: str = PROVIDE_PROJECT_ID) -> Dataset:
1138
1147
  """Fetch the dataset referenced by *dataset_id*.
1139
1148
 
1140
1149
  :param dataset_id: The BigQuery Dataset ID
@@ -1158,7 +1167,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1158
1167
  view_dataset: str,
1159
1168
  view_table: str,
1160
1169
  view_project: str | None = None,
1161
- project_id: str | None = None,
1170
+ project_id: str = PROVIDE_PROJECT_ID,
1162
1171
  ) -> dict[str, Any]:
1163
1172
  """Grant authorized view access of a dataset to a view table.
1164
1173
 
@@ -1210,7 +1219,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1210
1219
 
1211
1220
  @GoogleBaseHook.fallback_to_default_project_id
1212
1221
  def run_table_upsert(
1213
- self, dataset_id: str, table_resource: dict[str, Any], project_id: str | None = None
1222
+ self, dataset_id: str, table_resource: dict[str, Any], project_id: str = PROVIDE_PROJECT_ID
1214
1223
  ) -> dict[str, Any]:
1215
1224
  """Update a table if it exists, otherwise create a new one.
1216
1225
 
@@ -1267,7 +1276,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1267
1276
  self,
1268
1277
  table_id: str,
1269
1278
  not_found_ok: bool = True,
1270
- project_id: str | None = None,
1279
+ project_id: str = PROVIDE_PROJECT_ID,
1271
1280
  ) -> None:
1272
1281
  """Delete an existing table from the dataset.
1273
1282
 
@@ -1334,7 +1343,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1334
1343
  selected_fields: list[str] | str | None = None,
1335
1344
  page_token: str | None = None,
1336
1345
  start_index: int | None = None,
1337
- project_id: str | None = None,
1346
+ project_id: str = PROVIDE_PROJECT_ID,
1338
1347
  location: str | None = None,
1339
1348
  retry: Retry = DEFAULT_RETRY,
1340
1349
  return_iterator: bool = False,
@@ -1387,7 +1396,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1387
1396
  return list(iterator)
1388
1397
 
1389
1398
  @GoogleBaseHook.fallback_to_default_project_id
1390
- def get_schema(self, dataset_id: str, table_id: str, project_id: str | None = None) -> dict:
1399
+ def get_schema(self, dataset_id: str, table_id: str, project_id: str = PROVIDE_PROJECT_ID) -> dict:
1391
1400
  """Get the schema for a given dataset and table.
1392
1401
 
1393
1402
  .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
@@ -1409,7 +1418,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1409
1418
  include_policy_tags: bool,
1410
1419
  dataset_id: str,
1411
1420
  table_id: str,
1412
- project_id: str | None = None,
1421
+ project_id: str = PROVIDE_PROJECT_ID,
1413
1422
  ) -> dict[str, Any]:
1414
1423
  """Update fields within a schema for a given dataset and table.
1415
1424
 
@@ -1502,7 +1511,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1502
1511
  def poll_job_complete(
1503
1512
  self,
1504
1513
  job_id: str,
1505
- project_id: str | None = None,
1514
+ project_id: str = PROVIDE_PROJECT_ID,
1506
1515
  location: str | None = None,
1507
1516
  retry: Retry = DEFAULT_RETRY,
1508
1517
  ) -> bool:
@@ -1532,7 +1541,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1532
1541
  def cancel_job(
1533
1542
  self,
1534
1543
  job_id: str,
1535
- project_id: str | None = None,
1544
+ project_id: str = PROVIDE_PROJECT_ID,
1536
1545
  location: str | None = None,
1537
1546
  ) -> None:
1538
1547
  """Cancel a job and wait for cancellation to complete.
@@ -1573,10 +1582,11 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1573
1582
  time.sleep(5)
1574
1583
 
1575
1584
  @GoogleBaseHook.fallback_to_default_project_id
1585
+ @GoogleBaseHook.refresh_credentials_retry()
1576
1586
  def get_job(
1577
1587
  self,
1578
1588
  job_id: str,
1579
- project_id: str | None = None,
1589
+ project_id: str = PROVIDE_PROJECT_ID,
1580
1590
  location: str | None = None,
1581
1591
  ) -> CopyJob | QueryJob | LoadJob | ExtractJob | UnknownJob:
1582
1592
  """Retrieve a BigQuery job.
@@ -1607,7 +1617,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
1607
1617
  self,
1608
1618
  configuration: dict,
1609
1619
  job_id: str | None = None,
1610
- project_id: str | None = None,
1620
+ project_id: str = PROVIDE_PROJECT_ID,
1611
1621
  location: str | None = None,
1612
1622
  nowait: bool = False,
1613
1623
  retry: Retry = DEFAULT_RETRY,
@@ -2382,6 +2392,48 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
2382
2392
 
2383
2393
  return project_id, dataset_id, table_id
2384
2394
 
2395
+ @GoogleBaseHook.fallback_to_default_project_id
2396
+ def get_query_results(
2397
+ self,
2398
+ job_id: str,
2399
+ location: str,
2400
+ max_results: int | None = None,
2401
+ selected_fields: list[str] | str | None = None,
2402
+ project_id: str = PROVIDE_PROJECT_ID,
2403
+ retry: Retry = DEFAULT_RETRY,
2404
+ job_retry: Retry = DEFAULT_JOB_RETRY,
2405
+ ) -> list[dict[str, Any]]:
2406
+ """
2407
+ Get query results given a job_id.
2408
+
2409
+ :param job_id: The ID of the job.
2410
+ The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
2411
+ dashes (-). The maximum length is 1,024 characters.
2412
+ :param location: The location used for the operation.
2413
+ :param selected_fields: List of fields to return (comma-separated). If
2414
+ unspecified, all fields are returned.
2415
+ :param max_results: The maximum number of records (rows) to be fetched
2416
+ from the table.
2417
+ :param project_id: Google Cloud Project where the job ran.
2418
+ :param retry: How to retry the RPC.
2419
+ :param job_retry: How to retry failed jobs.
2420
+
2421
+ :return: List of rows where columns are filtered by selected fields, when given
2422
+
2423
+ :raises: AirflowException
2424
+ """
2425
+ if isinstance(selected_fields, str):
2426
+ selected_fields = selected_fields.split(",")
2427
+ job = self.get_job(job_id=job_id, project_id=project_id, location=location)
2428
+ if not isinstance(job, QueryJob):
2429
+ raise AirflowException(f"Job '{job_id}' is not a query job")
2430
+
2431
+ if job.state != "DONE":
2432
+ raise AirflowException(f"Job '{job_id}' is not in DONE state")
2433
+
2434
+ rows = [dict(row) for row in job.result(max_results=max_results, retry=retry, job_retry=job_retry)]
2435
+ return [{k: row[k] for k in row if k in selected_fields} for row in rows] if selected_fields else rows
2436
+
2385
2437
  @property
2386
2438
  def scopes(self) -> Sequence[str]:
2387
2439
  """
@@ -2849,11 +2901,10 @@ class BigQueryCursor(BigQueryBaseCursor):
2849
2901
  return None
2850
2902
 
2851
2903
  query_results = self._get_query_result()
2852
- if "rows" in query_results and query_results["rows"]:
2904
+ if rows := query_results.get("rows"):
2853
2905
  self.page_token = query_results.get("pageToken")
2854
2906
  fields = query_results["schema"]["fields"]
2855
2907
  col_types = [field["type"] for field in fields]
2856
- rows = query_results["rows"]
2857
2908
 
2858
2909
  for dict_row in rows:
2859
2910
  typed_row = [bq_cast(vs["v"], col_types[idx]) for idx, vs in enumerate(dict_row["f"])]
@@ -3305,7 +3356,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3305
3356
  )
3306
3357
 
3307
3358
  async def _get_job(
3308
- self, job_id: str | None, project_id: str | None = None, location: str | None = None
3359
+ self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
3309
3360
  ) -> CopyJob | QueryJob | LoadJob | ExtractJob | UnknownJob:
3310
3361
  """
3311
3362
  Get BigQuery job by its ID, project ID and location.
@@ -3348,7 +3399,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3348
3399
  return hook.get_job(job_id=job_id, project_id=project_id, location=location)
3349
3400
 
3350
3401
  async def get_job_status(
3351
- self, job_id: str | None, project_id: str | None = None, location: str | None = None
3402
+ self, job_id: str | None, project_id: str = PROVIDE_PROJECT_ID, location: str | None = None
3352
3403
  ) -> dict[str, str]:
3353
3404
  job = await self._get_job(job_id=job_id, project_id=project_id, location=location)
3354
3405
  if job.state == "DONE":
@@ -3360,7 +3411,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3360
3411
  async def get_job_output(
3361
3412
  self,
3362
3413
  job_id: str | None,
3363
- project_id: str | None = None,
3414
+ project_id: str = PROVIDE_PROJECT_ID,
3364
3415
  ) -> dict[str, Any]:
3365
3416
  """Get the BigQuery job output for a given job ID asynchronously."""
3366
3417
  async with ClientSession() as session:
@@ -3373,7 +3424,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3373
3424
  self,
3374
3425
  dataset_id: str | None,
3375
3426
  table_id: str | None = None,
3376
- project_id: str | None = None,
3427
+ project_id: str = PROVIDE_PROJECT_ID,
3377
3428
  ):
3378
3429
  """Create a new job and get the job_id using gcloud-aio."""
3379
3430
  async with ClientSession() as session:
@@ -3389,16 +3440,50 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
3389
3440
  job_query_resp = await job_client.query(query_request, cast(Session, session))
3390
3441
  return job_query_resp["jobReference"]["jobId"]
3391
3442
 
3392
- def get_records(self, query_results: dict[str, Any], as_dict: bool = False) -> list[Any]:
3443
+ async def cancel_job(self, job_id: str, project_id: str | None, location: str | None) -> None:
3444
+ """
3445
+ Cancel a BigQuery job.
3446
+
3447
+ :param job_id: ID of the job to cancel.
3448
+ :param project_id: Google Cloud Project where the job was running.
3449
+ :param location: Location where the job was running.
3450
+ """
3451
+ async with ClientSession() as session:
3452
+ token = await self.get_token(session=session)
3453
+ job = Job(job_id=job_id, project=project_id, location=location, token=token, session=session) # type: ignore[arg-type]
3454
+
3455
+ self.log.info(
3456
+ "Attempting to cancel BigQuery job: %s in project: %s, location: %s",
3457
+ job_id,
3458
+ project_id,
3459
+ location,
3460
+ )
3461
+ try:
3462
+ await job.cancel()
3463
+ self.log.info("Job %s cancellation requested.", job_id)
3464
+ except Exception as e:
3465
+ self.log.error("Failed to cancel BigQuery job %s: %s", job_id, str(e))
3466
+ raise
3467
+
3468
+ # TODO: Convert get_records into an async method
3469
+ def get_records(
3470
+ self,
3471
+ query_results: dict[str, Any],
3472
+ as_dict: bool = False,
3473
+ selected_fields: str | list[str] | None = None,
3474
+ ) -> list[Any]:
3393
3475
  """Convert a response from BigQuery to records.
3394
3476
 
3395
3477
  :param query_results: the results from a SQL query
3396
3478
  :param as_dict: if True returns the result as a list of dictionaries, otherwise as list of lists.
3479
+ :param selected_fields:
3397
3480
  """
3481
+ if isinstance(selected_fields, str):
3482
+ selected_fields = selected_fields.split(",")
3398
3483
  buffer: list[Any] = []
3399
- if "rows" in query_results and query_results["rows"]:
3400
- rows = query_results["rows"]
3484
+ if rows := query_results.get("rows"):
3401
3485
  fields = query_results["schema"]["fields"]
3486
+ fields = [field for field in fields if not selected_fields or field["name"] in selected_fields]
3402
3487
  fields_names = [field["name"] for field in fields]
3403
3488
  col_types = [field["type"] for field in fields]
3404
3489
  for dict_row in rows: