apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. airflow/providers/google/__init__.py +5 -8
  2. airflow/providers/google/cloud/hooks/automl.py +35 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +126 -41
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
  19. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  20. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  21. airflow/providers/google/cloud/operators/automl.py +243 -37
  22. airflow/providers/google/cloud/operators/bigquery.py +164 -62
  23. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  24. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  25. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  26. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  27. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  28. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  29. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  30. airflow/providers/google/cloud/operators/compute.py +12 -11
  31. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  32. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  33. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  34. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  35. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  36. airflow/providers/google/cloud/operators/dataproc.py +20 -17
  37. airflow/providers/google/cloud/operators/datastore.py +8 -7
  38. airflow/providers/google/cloud/operators/dlp.py +31 -30
  39. airflow/providers/google/cloud/operators/functions.py +4 -3
  40. airflow/providers/google/cloud/operators/gcs.py +66 -41
  41. airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
  42. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  43. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  44. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  45. airflow/providers/google/cloud/operators/spanner.py +7 -6
  46. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  47. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  48. airflow/providers/google/cloud/operators/tasks.py +14 -13
  49. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  51. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  52. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  53. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  54. airflow/providers/google/cloud/operators/vision.py +13 -12
  55. airflow/providers/google/cloud/operators/workflows.py +12 -14
  56. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  58. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  59. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  60. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  61. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  62. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  63. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  64. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  65. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  66. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  67. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  71. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  72. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  73. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  74. airflow/providers/google/cloud/triggers/bigquery.py +75 -6
  75. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  76. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  77. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  78. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  79. airflow/providers/google/cloud/triggers/dataproc.py +190 -27
  80. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
  81. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  82. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  83. airflow/providers/google/common/hooks/base_google.py +45 -7
  84. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  85. airflow/providers/google/firebase/operators/firestore.py +2 -1
  86. airflow/providers/google/get_provider_info.py +5 -3
  87. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
  89. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  90. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
  91. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from typing import TYPE_CHECKING, Any, Iterable, Sequence, SupportsAbs
29
29
  import attr
30
30
  from deprecated import deprecated
31
31
  from google.api_core.exceptions import Conflict
32
- from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob
32
+ from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
33
33
  from google.cloud.bigquery.table import RowIterator
34
34
 
35
35
  from airflow.configuration import conf
@@ -56,6 +56,8 @@ from airflow.providers.google.cloud.triggers.bigquery import (
56
56
  BigQueryValueCheckTrigger,
57
57
  )
58
58
  from airflow.providers.google.cloud.utils.bigquery import convert_job_id
59
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
60
+ from airflow.utils.helpers import exactly_one
59
61
 
60
62
  if TYPE_CHECKING:
61
63
  from google.api_core.retry import Retry
@@ -66,7 +68,7 @@ if TYPE_CHECKING:
66
68
 
67
69
  BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
68
70
 
69
- LABEL_REGEX = re.compile(r"^[a-z][\w-]{0,63}$")
71
+ LABEL_REGEX = re.compile(r"^[\w-]{0,63}$")
70
72
 
71
73
 
72
74
  class BigQueryUIColors(enum.Enum):
@@ -201,7 +203,25 @@ class _BigQueryOpenLineageMixin:
201
203
  )
202
204
 
203
205
 
204
- class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
206
+ class _BigQueryOperatorsEncryptionConfigurationMixin:
207
+ """A class to handle the configuration for BigQueryHook.insert_job method."""
208
+
209
+ # Note: If you want to add this feature to a new operator you can include the class name in the type
210
+ # annotation of the `self`. Then you can inherit this class in the target operator.
211
+ # e.g: BigQueryCheckOperator, BigQueryTableCheckOperator
212
+ def include_encryption_configuration( # type:ignore[misc]
213
+ self: BigQueryCheckOperator | BigQueryTableCheckOperator,
214
+ configuration: dict,
215
+ config_key: str,
216
+ ) -> None:
217
+ """Add encryption_configuration to destinationEncryptionConfiguration key if it is not None."""
218
+ if self.encryption_configuration is not None:
219
+ configuration[config_key]["destinationEncryptionConfiguration"] = self.encryption_configuration
220
+
221
+
222
+ class BigQueryCheckOperator(
223
+ _BigQueryDbHookMixin, SQLCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
224
+ ):
205
225
  """Performs checks against BigQuery.
206
226
 
207
227
  This operator expects a SQL query that returns a single row. Each value on
@@ -246,6 +266,13 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
246
266
  Token Creator IAM role to the directly preceding identity, with first
247
267
  account from the list granting this role to the originating account. (templated)
248
268
  :param labels: a dictionary containing labels for the table, passed to BigQuery.
269
+ :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
270
+
271
+ .. code-block:: python
272
+
273
+ encryption_configuration = {
274
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
275
+ }
249
276
  :param deferrable: Run operator in the deferrable mode.
250
277
  :param poll_interval: (Deferrable mode only) polling period in seconds to
251
278
  check for the status of job.
@@ -270,6 +297,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
270
297
  location: str | None = None,
271
298
  impersonation_chain: str | Sequence[str] | None = None,
272
299
  labels: dict | None = None,
300
+ encryption_configuration: dict | None = None,
273
301
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
274
302
  poll_interval: float = 4.0,
275
303
  **kwargs,
@@ -280,6 +308,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
280
308
  self.location = location
281
309
  self.impersonation_chain = impersonation_chain
282
310
  self.labels = labels
311
+ self.encryption_configuration = encryption_configuration
283
312
  self.deferrable = deferrable
284
313
  self.poll_interval = poll_interval
285
314
 
@@ -291,6 +320,8 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
291
320
  """Submit a new job and get the job id for polling the status using Trigger."""
292
321
  configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
293
322
 
323
+ self.include_encryption_configuration(configuration, "query")
324
+
294
325
  return hook.insert_job(
295
326
  configuration=configuration,
296
327
  project_id=hook.project_id,
@@ -322,8 +353,26 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
322
353
  ),
323
354
  method_name="execute_complete",
324
355
  )
356
+ self._handle_job_error(job)
357
+ # job.result() returns a RowIterator. Mypy expects an instance of SupportsNext[Any] for
358
+ # the next() call which the RowIterator does not resemble to. Hence, ignore the arg-type error.
359
+ # Row passed to _validate_records is a collection of values only, without column names.
360
+ self._validate_records(next(iter(job.result()), [])) # type: ignore[arg-type]
325
361
  self.log.info("Current state of job %s is %s", job.job_id, job.state)
326
362
 
363
+ @staticmethod
364
+ def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
365
+ if job.error_result:
366
+ raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
367
+
368
+ def _validate_records(self, records) -> None:
369
+ if not records:
370
+ raise AirflowException(f"The following query returned zero rows: {self.sql}")
371
+ elif not all(records):
372
+ self._raise_exception( # type: ignore[attr-defined]
373
+ f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
374
+ )
375
+
327
376
  def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
328
377
  """Act as a callback for when the trigger fires.
329
378
 
@@ -333,13 +382,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
333
382
  if event["status"] == "error":
334
383
  raise AirflowException(event["message"])
335
384
 
336
- records = event["records"]
337
- if not records:
338
- raise AirflowException("The query returned empty results")
339
- elif not all(records):
340
- self._raise_exception( # type: ignore[attr-defined]
341
- f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
342
- )
385
+ self._validate_records(event["records"])
343
386
  self.log.info("Record: %s", event["records"])
344
387
  self.log.info("Success.")
345
388
 
@@ -454,8 +497,8 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
454
497
  self._handle_job_error(job)
455
498
  # job.result() returns a RowIterator. Mypy expects an instance of SupportsNext[Any] for
456
499
  # the next() call which the RowIterator does not resemble to. Hence, ignore the arg-type error.
457
- records = next(job.result()) # type: ignore[arg-type]
458
- self.check_value(records) # type: ignore[attr-defined]
500
+ # Row passed to check_value is a collection of values only, without column names.
501
+ self.check_value(next(iter(job.result()), [])) # type: ignore[arg-type]
459
502
  self.log.info("Current state of job %s is %s", job.job_id, job.state)
460
503
 
461
504
  @staticmethod
@@ -542,7 +585,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
542
585
  labels: dict | None = None,
543
586
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
544
587
  poll_interval: float = 4.0,
545
- project_id: str | None = None,
588
+ project_id: str = PROVIDE_PROJECT_ID,
546
589
  **kwargs,
547
590
  ) -> None:
548
591
  super().__init__(
@@ -753,7 +796,9 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
753
796
  self.log.info("All tests have passed")
754
797
 
755
798
 
756
- class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
799
+ class BigQueryTableCheckOperator(
800
+ _BigQueryDbHookMixin, SQLTableCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
801
+ ):
757
802
  """
758
803
  Subclasses the SQLTableCheckOperator in order to provide a job id for OpenLineage to parse.
759
804
 
@@ -781,6 +826,13 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
781
826
  Service Account Token Creator IAM role to the directly preceding identity, with first
782
827
  account from the list granting this role to the originating account (templated).
783
828
  :param labels: a dictionary containing labels for the table, passed to BigQuery
829
+ :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
830
+
831
+ .. code-block:: python
832
+
833
+ encryption_configuration = {
834
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
835
+ }
784
836
  """
785
837
 
786
838
  template_fields: Sequence[str] = tuple(set(SQLTableCheckOperator.template_fields) | {"gcp_conn_id"})
@@ -798,6 +850,7 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
798
850
  location: str | None = None,
799
851
  impersonation_chain: str | Sequence[str] | None = None,
800
852
  labels: dict | None = None,
853
+ encryption_configuration: dict | None = None,
801
854
  **kwargs,
802
855
  ) -> None:
803
856
  super().__init__(table=table, checks=checks, partition_clause=partition_clause, **kwargs)
@@ -806,6 +859,7 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
806
859
  self.location = location
807
860
  self.impersonation_chain = impersonation_chain
808
861
  self.labels = labels
862
+ self.encryption_configuration = encryption_configuration
809
863
 
810
864
  def _submit_job(
811
865
  self,
@@ -815,6 +869,8 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
815
869
  """Submit a new job and get the job id for polling the status using Trigger."""
816
870
  configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
817
871
 
872
+ self.include_encryption_configuration(configuration, "query")
873
+
818
874
  return hook.insert_job(
819
875
  configuration=configuration,
820
876
  project_id=hook.project_id,
@@ -858,9 +914,10 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
858
914
 
859
915
  class BigQueryGetDataOperator(GoogleCloudBaseOperator):
860
916
  """
861
- Fetches the data from a BigQuery table (alternatively fetch data for selected columns) and returns data.
917
+ Fetch data and return it, either from a BigQuery table, or results of a query job.
862
918
 
863
- Data is returned in either of the following two formats, based on "as_dict" value:
919
+ Data could be narrowed down by specific columns or retrieved as a whole.
920
+ It is returned in either of the following two formats, based on "as_dict" value:
864
921
  1. False (Default) - A Python list of lists, with the number of nested lists equal to the number of rows
865
922
  fetched. Each nested list represents a row, where the elements within it correspond to the column values
866
923
  for that particular row.
@@ -880,27 +937,42 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
880
937
  .. note::
881
938
  If you pass fields to ``selected_fields`` which are in different order than the
882
939
  order of columns already in
883
- BQ table, the data will still be in the order of BQ table.
940
+ BQ table/job, the data will still be in the order of BQ table.
884
941
  For example if the BQ table has 3 columns as
885
942
  ``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
886
943
  the data would still be of the form ``'A,B'``.
887
944
 
888
- **Example**::
945
+ .. note::
946
+ When utilizing job id not in deferrable mode, the job should be in DONE state.
947
+
948
+ **Example - Retrieve data from BigQuery using table**::
889
949
 
890
950
  get_data = BigQueryGetDataOperator(
891
951
  task_id="get_data_from_bq",
892
952
  dataset_id="test_dataset",
893
953
  table_id="Transaction_partitions",
894
- project_id="internal-gcp-project",
954
+ table_project_id="internal-gcp-project",
955
+ max_results=100,
956
+ selected_fields="DATE",
957
+ gcp_conn_id="airflow-conn-id",
958
+ )
959
+
960
+ **Example - Retrieve data from BigQuery using a job id**::
961
+
962
+ get_data = BigQueryGetDataOperator(
963
+ job_id="airflow_8999918812727394_86a1cecc69c5e3028d28247affd7563",
964
+ job_project_id="internal-gcp-project",
895
965
  max_results=100,
896
966
  selected_fields="DATE",
897
967
  gcp_conn_id="airflow-conn-id",
898
968
  )
899
969
 
900
970
  :param dataset_id: The dataset ID of the requested table. (templated)
901
- :param table_id: The table ID of the requested table. (templated)
971
+ :param table_id: The table ID of the requested table. Mutually exclusive with job_id. (templated)
902
972
  :param table_project_id: (Optional) The project ID of the requested table.
903
973
  If None, it will be derived from the hook's project ID. (templated)
974
+ :param job_id: The job ID from which query results are retrieved.
975
+ Mutually exclusive with table_id. (templated)
904
976
  :param job_project_id: (Optional) Google Cloud Project where the job is running.
905
977
  If None, it will be derived from the hook's project ID. (templated)
906
978
  :param project_id: (Deprecated) (Optional) The name of the project where the data
@@ -931,6 +1003,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
931
1003
  "dataset_id",
932
1004
  "table_id",
933
1005
  "table_project_id",
1006
+ "job_id",
934
1007
  "job_project_id",
935
1008
  "project_id",
936
1009
  "max_results",
@@ -942,11 +1015,12 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
942
1015
  def __init__(
943
1016
  self,
944
1017
  *,
945
- dataset_id: str,
946
- table_id: str,
1018
+ dataset_id: str | None = None,
1019
+ table_id: str | None = None,
947
1020
  table_project_id: str | None = None,
1021
+ job_id: str | None = None,
948
1022
  job_project_id: str | None = None,
949
- project_id: str | None = None,
1023
+ project_id: str = PROVIDE_PROJECT_ID,
950
1024
  max_results: int = 100,
951
1025
  selected_fields: str | None = None,
952
1026
  gcp_conn_id: str = "google_cloud_default",
@@ -964,6 +1038,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
964
1038
  self.dataset_id = dataset_id
965
1039
  self.table_id = table_id
966
1040
  self.job_project_id = job_project_id
1041
+ self.job_id = job_id
967
1042
  self.max_results = max_results
968
1043
  self.selected_fields = selected_fields
969
1044
  self.gcp_conn_id = gcp_conn_id
@@ -1000,7 +1075,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
1000
1075
  query += "*"
1001
1076
  query += (
1002
1077
  f" from `{self.table_project_id or hook.project_id}.{self.dataset_id}"
1003
- f".{self.table_id}` limit {int(self.max_results)}"
1078
+ f".{self.table_id}` limit {self.max_results}"
1004
1079
  )
1005
1080
  return query
1006
1081
 
@@ -1013,7 +1088,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
1013
1088
  if not self.table_project_id:
1014
1089
  self.table_project_id = self.project_id
1015
1090
  else:
1016
- self.log.info("Ignoring project_id parameter, as table_project_id is found.")
1091
+ self.log.info("Ignoring 'project_id' parameter, as 'table_project_id' is found.")
1092
+
1093
+ if not exactly_one(self.job_id, self.table_id):
1094
+ raise AirflowException(
1095
+ "'job_id' and 'table_id' parameters are mutually exclusive, "
1096
+ "ensure that exactly one of them is specified"
1097
+ )
1017
1098
 
1018
1099
  hook = BigQueryHook(
1019
1100
  gcp_conn_id=self.gcp_conn_id,
@@ -1022,31 +1103,45 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
1022
1103
  )
1023
1104
 
1024
1105
  if not self.deferrable:
1025
- self.log.info(
1026
- "Fetching Data from %s.%s.%s max results: %s",
1027
- self.table_project_id or hook.project_id,
1028
- self.dataset_id,
1029
- self.table_id,
1030
- int(self.max_results),
1031
- )
1032
- if not self.selected_fields:
1033
- schema: dict[str, list] = hook.get_schema(
1106
+ if not self.job_id:
1107
+ self.log.info(
1108
+ "Fetching Data from %s.%s.%s max results: %s",
1109
+ self.table_project_id or hook.project_id,
1110
+ self.dataset_id,
1111
+ self.table_id,
1112
+ self.max_results,
1113
+ )
1114
+ if not self.selected_fields:
1115
+ schema: dict[str, list] = hook.get_schema(
1116
+ dataset_id=self.dataset_id,
1117
+ table_id=self.table_id,
1118
+ project_id=self.table_project_id or hook.project_id,
1119
+ )
1120
+ if "fields" in schema:
1121
+ self.selected_fields = ",".join([field["name"] for field in schema["fields"]])
1122
+ rows: list[Row] | RowIterator | list[dict[str, Any]] = hook.list_rows(
1034
1123
  dataset_id=self.dataset_id,
1035
1124
  table_id=self.table_id,
1125
+ max_results=self.max_results,
1126
+ selected_fields=self.selected_fields,
1127
+ location=self.location,
1036
1128
  project_id=self.table_project_id or hook.project_id,
1037
1129
  )
1038
- if "fields" in schema:
1039
- self.selected_fields = ",".join([field["name"] for field in schema["fields"]])
1040
-
1041
- rows = hook.list_rows(
1042
- dataset_id=self.dataset_id,
1043
- table_id=self.table_id,
1044
- max_results=int(self.max_results),
1045
- selected_fields=self.selected_fields,
1046
- location=self.location,
1047
- project_id=self.table_project_id or hook.project_id,
1048
- )
1049
-
1130
+ else:
1131
+ self.log.info(
1132
+ "Fetching data from job '%s:%s.%s' max results: %s",
1133
+ self.job_project_id or hook.project_id,
1134
+ self.location,
1135
+ self.job_id,
1136
+ self.max_results,
1137
+ )
1138
+ rows = hook.get_query_results(
1139
+ job_id=self.job_id,
1140
+ location=self.location,
1141
+ selected_fields=self.selected_fields,
1142
+ max_results=self.max_results,
1143
+ project_id=self.job_project_id or hook.project_id,
1144
+ )
1050
1145
  if isinstance(rows, RowIterator):
1051
1146
  raise TypeError(
1052
1147
  "BigQueryHook.list_rows() returns iterator when return_iterator is False (default)"
@@ -1056,11 +1151,16 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
1056
1151
  if self.as_dict:
1057
1152
  table_data = [dict(row) for row in rows]
1058
1153
  else:
1059
- table_data = [row.values() for row in rows]
1154
+ table_data = [row.values() if isinstance(row, Row) else list(row.values()) for row in rows]
1060
1155
 
1061
1156
  return table_data
1062
1157
 
1063
- job = self._submit_job(hook, job_id="")
1158
+ if not self.job_id:
1159
+ job: BigQueryJob | UnknownJob = self._submit_job(hook, job_id="")
1160
+ else:
1161
+ job = hook.get_job(
1162
+ job_id=self.job_id, project_id=self.job_project_id or hook.project_id, location=self.location
1163
+ )
1064
1164
 
1065
1165
  context["ti"].xcom_push(key="job_id", value=job.job_id)
1066
1166
  self.defer(
@@ -1075,6 +1175,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
1075
1175
  poll_interval=self.poll_interval,
1076
1176
  as_dict=self.as_dict,
1077
1177
  impersonation_chain=self.impersonation_chain,
1178
+ selected_fields=self.selected_fields,
1078
1179
  ),
1079
1180
  method_name="execute_complete",
1080
1181
  )
@@ -1163,7 +1264,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
1163
1264
  .. code-block:: python
1164
1265
 
1165
1266
  encryption_configuration = {
1166
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1267
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1167
1268
  }
1168
1269
  :param impersonation_chain: Optional service account to impersonate using short-term
1169
1270
  credentials, or chained list of accounts required to get the access_token
@@ -1403,7 +1504,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1403
1504
  .. code-block:: python
1404
1505
 
1405
1506
  encryption_configuration = {
1406
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1507
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1407
1508
  }
1408
1509
  :param location: The location used for the operation.
1409
1510
  :param cluster_fields: [Optional] The fields used for clustering.
@@ -1447,7 +1548,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
1447
1548
  dataset_id: str,
1448
1549
  table_id: str,
1449
1550
  table_resource: dict[str, Any] | None = None,
1450
- project_id: str | None = None,
1551
+ project_id: str = PROVIDE_PROJECT_ID,
1451
1552
  schema_fields: list | None = None,
1452
1553
  gcs_schema_object: str | None = None,
1453
1554
  time_partitioning: dict | None = None,
@@ -1631,7 +1732,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
1631
1732
  .. code-block:: python
1632
1733
 
1633
1734
  encryption_configuration = {
1634
- "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
1735
+ "kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
1635
1736
  }
1636
1737
  :param location: The location used for the operation.
1637
1738
  :param impersonation_chain: Optional service account to impersonate using short-term
@@ -1907,7 +2008,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
1907
2008
  self,
1908
2009
  *,
1909
2010
  dataset_id: str,
1910
- project_id: str | None = None,
2011
+ project_id: str = PROVIDE_PROJECT_ID,
1911
2012
  delete_contents: bool = False,
1912
2013
  gcp_conn_id: str = "google_cloud_default",
1913
2014
  impersonation_chain: str | Sequence[str] | None = None,
@@ -1987,7 +2088,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
1987
2088
  self,
1988
2089
  *,
1989
2090
  dataset_id: str | None = None,
1990
- project_id: str | None = None,
2091
+ project_id: str = PROVIDE_PROJECT_ID,
1991
2092
  dataset_reference: dict | None = None,
1992
2093
  location: str | None = None,
1993
2094
  gcp_conn_id: str = "google_cloud_default",
@@ -2091,7 +2192,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
2091
2192
  self,
2092
2193
  *,
2093
2194
  dataset_id: str,
2094
- project_id: str | None = None,
2195
+ project_id: str = PROVIDE_PROJECT_ID,
2095
2196
  gcp_conn_id: str = "google_cloud_default",
2096
2197
  impersonation_chain: str | Sequence[str] | None = None,
2097
2198
  **kwargs,
@@ -2154,7 +2255,7 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
2154
2255
  self,
2155
2256
  *,
2156
2257
  dataset_id: str,
2157
- project_id: str | None = None,
2258
+ project_id: str = PROVIDE_PROJECT_ID,
2158
2259
  max_results: int | None = None,
2159
2260
  gcp_conn_id: str = "google_cloud_default",
2160
2261
  impersonation_chain: str | Sequence[str] | None = None,
@@ -2223,7 +2324,7 @@ class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
2223
2324
  *,
2224
2325
  dataset_id: str,
2225
2326
  dataset_resource: dict,
2226
- project_id: str | None = None,
2327
+ project_id: str = PROVIDE_PROJECT_ID,
2227
2328
  gcp_conn_id: str = "google_cloud_default",
2228
2329
  impersonation_chain: str | Sequence[str] | None = None,
2229
2330
  **kwargs,
@@ -2296,7 +2397,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
2296
2397
  fields: list[str] | None = None,
2297
2398
  dataset_id: str | None = None,
2298
2399
  table_id: str | None = None,
2299
- project_id: str | None = None,
2400
+ project_id: str = PROVIDE_PROJECT_ID,
2300
2401
  gcp_conn_id: str = "google_cloud_default",
2301
2402
  impersonation_chain: str | Sequence[str] | None = None,
2302
2403
  **kwargs,
@@ -2380,7 +2481,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
2380
2481
  dataset_resource: dict[str, Any],
2381
2482
  fields: list[str] | None = None,
2382
2483
  dataset_id: str | None = None,
2383
- project_id: str | None = None,
2484
+ project_id: str = PROVIDE_PROJECT_ID,
2384
2485
  gcp_conn_id: str = "google_cloud_default",
2385
2486
  impersonation_chain: str | Sequence[str] | None = None,
2386
2487
  **kwargs,
@@ -2516,7 +2617,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
2516
2617
  *,
2517
2618
  dataset_id: str,
2518
2619
  table_resource: dict,
2519
- project_id: str | None = None,
2620
+ project_id: str = PROVIDE_PROJECT_ID,
2520
2621
  gcp_conn_id: str = "google_cloud_default",
2521
2622
  location: str | None = None,
2522
2623
  impersonation_chain: str | Sequence[str] | None = None,
@@ -2623,7 +2724,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
2623
2724
  dataset_id: str,
2624
2725
  table_id: str,
2625
2726
  include_policy_tags: bool = False,
2626
- project_id: str | None = None,
2727
+ project_id: str = PROVIDE_PROJECT_ID,
2627
2728
  gcp_conn_id: str = "google_cloud_default",
2628
2729
  impersonation_chain: str | Sequence[str] | None = None,
2629
2730
  **kwargs,
@@ -2732,7 +2833,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2732
2833
  def __init__(
2733
2834
  self,
2734
2835
  configuration: dict[str, Any],
2735
- project_id: str | None = None,
2836
+ project_id: str = PROVIDE_PROJECT_ID,
2736
2837
  location: str | None = None,
2737
2838
  job_id: str | None = None,
2738
2839
  force_rerun: bool = True,
@@ -2903,6 +3004,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
2903
3004
  location=self.location or hook.location,
2904
3005
  poll_interval=self.poll_interval,
2905
3006
  impersonation_chain=self.impersonation_chain,
3007
+ cancel_on_kill=self.cancel_on_kill,
2906
3008
  ),
2907
3009
  method_name="execute_complete",
2908
3010
  )
@@ -37,6 +37,7 @@ from airflow.providers.google.cloud.hooks.bigquery_dts import BiqQueryDataTransf
37
37
  from airflow.providers.google.cloud.links.bigquery_dts import BigQueryDataTransferConfigLink
38
38
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
39
39
  from airflow.providers.google.cloud.triggers.bigquery_dts import BigQueryDataTransferRunTrigger
40
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
40
41
 
41
42
  if TYPE_CHECKING:
42
43
  from google.api_core.retry import Retry
@@ -94,7 +95,7 @@ class BigQueryCreateDataTransferOperator(GoogleCloudBaseOperator):
94
95
  self,
95
96
  *,
96
97
  transfer_config: dict,
97
- project_id: str | None = None,
98
+ project_id: str = PROVIDE_PROJECT_ID,
98
99
  location: str | None = None,
99
100
  authorization_code: str | None = None,
100
101
  retry: Retry | _MethodDefault = DEFAULT,
@@ -187,7 +188,7 @@ class BigQueryDeleteDataTransferConfigOperator(GoogleCloudBaseOperator):
187
188
  self,
188
189
  *,
189
190
  transfer_config_id: str,
190
- project_id: str | None = None,
191
+ project_id: str = PROVIDE_PROJECT_ID,
191
192
  location: str | None = None,
192
193
  retry: Retry | _MethodDefault = DEFAULT,
193
194
  timeout: float | None = None,
@@ -273,7 +274,7 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(GoogleCloudBaseOperat
273
274
  self,
274
275
  *,
275
276
  transfer_config_id: str,
276
- project_id: str | None = None,
277
+ project_id: str = PROVIDE_PROJECT_ID,
277
278
  location: str | None = None,
278
279
  requested_time_range: dict | None = None,
279
280
  requested_run_time: dict | None = None,
@@ -31,6 +31,7 @@ from airflow.providers.google.cloud.links.bigtable import (
31
31
  BigtableTablesLink,
32
32
  )
33
33
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
34
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
34
35
 
35
36
  if TYPE_CHECKING:
36
37
  import enum
@@ -112,7 +113,7 @@ class BigtableCreateInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
112
113
  instance_id: str,
113
114
  main_cluster_id: str,
114
115
  main_cluster_zone: str,
115
- project_id: str | None = None,
116
+ project_id: str = PROVIDE_PROJECT_ID,
116
117
  replica_clusters: list[dict[str, str]] | None = None,
117
118
  instance_display_name: str | None = None,
118
119
  instance_type: enums.Instance.Type | None = None,
@@ -218,7 +219,7 @@ class BigtableUpdateInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
218
219
  self,
219
220
  *,
220
221
  instance_id: str,
221
- project_id: str | None = None,
222
+ project_id: str = PROVIDE_PROJECT_ID,
222
223
  instance_display_name: str | None = None,
223
224
  instance_type: enums.Instance.Type | enum.IntEnum | None = None,
224
225
  instance_labels: dict | None = None,
@@ -298,7 +299,7 @@ class BigtableDeleteInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
298
299
  self,
299
300
  *,
300
301
  instance_id: str,
301
- project_id: str | None = None,
302
+ project_id: str = PROVIDE_PROJECT_ID,
302
303
  gcp_conn_id: str = "google_cloud_default",
303
304
  impersonation_chain: str | Sequence[str] | None = None,
304
305
  **kwargs,
@@ -374,7 +375,7 @@ class BigtableCreateTableOperator(GoogleCloudBaseOperator, BigtableValidationMix
374
375
  *,
375
376
  instance_id: str,
376
377
  table_id: str,
377
- project_id: str | None = None,
378
+ project_id: str = PROVIDE_PROJECT_ID,
378
379
  initial_split_keys: list | None = None,
379
380
  column_families: dict[str, GarbageCollectionRule] | None = None,
380
381
  gcp_conn_id: str = "google_cloud_default",
@@ -478,7 +479,7 @@ class BigtableDeleteTableOperator(GoogleCloudBaseOperator, BigtableValidationMix
478
479
  *,
479
480
  instance_id: str,
480
481
  table_id: str,
481
- project_id: str | None = None,
482
+ project_id: str = PROVIDE_PROJECT_ID,
482
483
  app_profile_id: str | None = None,
483
484
  gcp_conn_id: str = "google_cloud_default",
484
485
  impersonation_chain: str | Sequence[str] | None = None,
@@ -559,7 +560,7 @@ class BigtableUpdateClusterOperator(GoogleCloudBaseOperator, BigtableValidationM
559
560
  instance_id: str,
560
561
  cluster_id: str,
561
562
  nodes: int,
562
- project_id: str | None = None,
563
+ project_id: str = PROVIDE_PROJECT_ID,
563
564
  gcp_conn_id: str = "google_cloud_default",
564
565
  impersonation_chain: str | Sequence[str] | None = None,
565
566
  **kwargs,