apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from typing import TYPE_CHECKING, Any, Iterable, Sequence, SupportsAbs
|
|
29
29
|
import attr
|
30
30
|
from deprecated import deprecated
|
31
31
|
from google.api_core.exceptions import Conflict
|
32
|
-
from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob
|
32
|
+
from google.cloud.bigquery import DEFAULT_RETRY, CopyJob, ExtractJob, LoadJob, QueryJob, Row
|
33
33
|
from google.cloud.bigquery.table import RowIterator
|
34
34
|
|
35
35
|
from airflow.configuration import conf
|
@@ -56,6 +56,8 @@ from airflow.providers.google.cloud.triggers.bigquery import (
|
|
56
56
|
BigQueryValueCheckTrigger,
|
57
57
|
)
|
58
58
|
from airflow.providers.google.cloud.utils.bigquery import convert_job_id
|
59
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
60
|
+
from airflow.utils.helpers import exactly_one
|
59
61
|
|
60
62
|
if TYPE_CHECKING:
|
61
63
|
from google.api_core.retry import Retry
|
@@ -66,7 +68,7 @@ if TYPE_CHECKING:
|
|
66
68
|
|
67
69
|
BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
|
68
70
|
|
69
|
-
LABEL_REGEX = re.compile(r"^[
|
71
|
+
LABEL_REGEX = re.compile(r"^[\w-]{0,63}$")
|
70
72
|
|
71
73
|
|
72
74
|
class BigQueryUIColors(enum.Enum):
|
@@ -201,7 +203,25 @@ class _BigQueryOpenLineageMixin:
|
|
201
203
|
)
|
202
204
|
|
203
205
|
|
204
|
-
class
|
206
|
+
class _BigQueryOperatorsEncryptionConfigurationMixin:
|
207
|
+
"""A class to handle the configuration for BigQueryHook.insert_job method."""
|
208
|
+
|
209
|
+
# Note: If you want to add this feature to a new operator you can include the class name in the type
|
210
|
+
# annotation of the `self`. Then you can inherit this class in the target operator.
|
211
|
+
# e.g: BigQueryCheckOperator, BigQueryTableCheckOperator
|
212
|
+
def include_encryption_configuration( # type:ignore[misc]
|
213
|
+
self: BigQueryCheckOperator | BigQueryTableCheckOperator,
|
214
|
+
configuration: dict,
|
215
|
+
config_key: str,
|
216
|
+
) -> None:
|
217
|
+
"""Add encryption_configuration to destinationEncryptionConfiguration key if it is not None."""
|
218
|
+
if self.encryption_configuration is not None:
|
219
|
+
configuration[config_key]["destinationEncryptionConfiguration"] = self.encryption_configuration
|
220
|
+
|
221
|
+
|
222
|
+
class BigQueryCheckOperator(
|
223
|
+
_BigQueryDbHookMixin, SQLCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
|
224
|
+
):
|
205
225
|
"""Performs checks against BigQuery.
|
206
226
|
|
207
227
|
This operator expects a SQL query that returns a single row. Each value on
|
@@ -246,6 +266,13 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
246
266
|
Token Creator IAM role to the directly preceding identity, with first
|
247
267
|
account from the list granting this role to the originating account. (templated)
|
248
268
|
:param labels: a dictionary containing labels for the table, passed to BigQuery.
|
269
|
+
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
270
|
+
|
271
|
+
.. code-block:: python
|
272
|
+
|
273
|
+
encryption_configuration = {
|
274
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
275
|
+
}
|
249
276
|
:param deferrable: Run operator in the deferrable mode.
|
250
277
|
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
251
278
|
check for the status of job.
|
@@ -270,6 +297,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
270
297
|
location: str | None = None,
|
271
298
|
impersonation_chain: str | Sequence[str] | None = None,
|
272
299
|
labels: dict | None = None,
|
300
|
+
encryption_configuration: dict | None = None,
|
273
301
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
274
302
|
poll_interval: float = 4.0,
|
275
303
|
**kwargs,
|
@@ -280,6 +308,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
280
308
|
self.location = location
|
281
309
|
self.impersonation_chain = impersonation_chain
|
282
310
|
self.labels = labels
|
311
|
+
self.encryption_configuration = encryption_configuration
|
283
312
|
self.deferrable = deferrable
|
284
313
|
self.poll_interval = poll_interval
|
285
314
|
|
@@ -291,6 +320,8 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
291
320
|
"""Submit a new job and get the job id for polling the status using Trigger."""
|
292
321
|
configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
|
293
322
|
|
323
|
+
self.include_encryption_configuration(configuration, "query")
|
324
|
+
|
294
325
|
return hook.insert_job(
|
295
326
|
configuration=configuration,
|
296
327
|
project_id=hook.project_id,
|
@@ -322,8 +353,26 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
322
353
|
),
|
323
354
|
method_name="execute_complete",
|
324
355
|
)
|
356
|
+
self._handle_job_error(job)
|
357
|
+
# job.result() returns a RowIterator. Mypy expects an instance of SupportsNext[Any] for
|
358
|
+
# the next() call which the RowIterator does not resemble to. Hence, ignore the arg-type error.
|
359
|
+
# Row passed to _validate_records is a collection of values only, without column names.
|
360
|
+
self._validate_records(next(iter(job.result()), [])) # type: ignore[arg-type]
|
325
361
|
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
326
362
|
|
363
|
+
@staticmethod
|
364
|
+
def _handle_job_error(job: BigQueryJob | UnknownJob) -> None:
|
365
|
+
if job.error_result:
|
366
|
+
raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}")
|
367
|
+
|
368
|
+
def _validate_records(self, records) -> None:
|
369
|
+
if not records:
|
370
|
+
raise AirflowException(f"The following query returned zero rows: {self.sql}")
|
371
|
+
elif not all(records):
|
372
|
+
self._raise_exception( # type: ignore[attr-defined]
|
373
|
+
f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
|
374
|
+
)
|
375
|
+
|
327
376
|
def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
|
328
377
|
"""Act as a callback for when the trigger fires.
|
329
378
|
|
@@ -333,13 +382,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator):
|
|
333
382
|
if event["status"] == "error":
|
334
383
|
raise AirflowException(event["message"])
|
335
384
|
|
336
|
-
|
337
|
-
if not records:
|
338
|
-
raise AirflowException("The query returned empty results")
|
339
|
-
elif not all(records):
|
340
|
-
self._raise_exception( # type: ignore[attr-defined]
|
341
|
-
f"Test failed.\nQuery:\n{self.sql}\nResults:\n{records!s}"
|
342
|
-
)
|
385
|
+
self._validate_records(event["records"])
|
343
386
|
self.log.info("Record: %s", event["records"])
|
344
387
|
self.log.info("Success.")
|
345
388
|
|
@@ -454,8 +497,8 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
454
497
|
self._handle_job_error(job)
|
455
498
|
# job.result() returns a RowIterator. Mypy expects an instance of SupportsNext[Any] for
|
456
499
|
# the next() call which the RowIterator does not resemble to. Hence, ignore the arg-type error.
|
457
|
-
|
458
|
-
self.check_value(
|
500
|
+
# Row passed to check_value is a collection of values only, without column names.
|
501
|
+
self.check_value(next(iter(job.result()), [])) # type: ignore[arg-type]
|
459
502
|
self.log.info("Current state of job %s is %s", job.job_id, job.state)
|
460
503
|
|
461
504
|
@staticmethod
|
@@ -542,7 +585,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
542
585
|
labels: dict | None = None,
|
543
586
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
544
587
|
poll_interval: float = 4.0,
|
545
|
-
project_id: str
|
588
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
546
589
|
**kwargs,
|
547
590
|
) -> None:
|
548
591
|
super().__init__(
|
@@ -753,7 +796,9 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
|
|
753
796
|
self.log.info("All tests have passed")
|
754
797
|
|
755
798
|
|
756
|
-
class BigQueryTableCheckOperator(
|
799
|
+
class BigQueryTableCheckOperator(
|
800
|
+
_BigQueryDbHookMixin, SQLTableCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
|
801
|
+
):
|
757
802
|
"""
|
758
803
|
Subclasses the SQLTableCheckOperator in order to provide a job id for OpenLineage to parse.
|
759
804
|
|
@@ -781,6 +826,13 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
|
|
781
826
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
782
827
|
account from the list granting this role to the originating account (templated).
|
783
828
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
829
|
+
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
830
|
+
|
831
|
+
.. code-block:: python
|
832
|
+
|
833
|
+
encryption_configuration = {
|
834
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
835
|
+
}
|
784
836
|
"""
|
785
837
|
|
786
838
|
template_fields: Sequence[str] = tuple(set(SQLTableCheckOperator.template_fields) | {"gcp_conn_id"})
|
@@ -798,6 +850,7 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
|
|
798
850
|
location: str | None = None,
|
799
851
|
impersonation_chain: str | Sequence[str] | None = None,
|
800
852
|
labels: dict | None = None,
|
853
|
+
encryption_configuration: dict | None = None,
|
801
854
|
**kwargs,
|
802
855
|
) -> None:
|
803
856
|
super().__init__(table=table, checks=checks, partition_clause=partition_clause, **kwargs)
|
@@ -806,6 +859,7 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
|
|
806
859
|
self.location = location
|
807
860
|
self.impersonation_chain = impersonation_chain
|
808
861
|
self.labels = labels
|
862
|
+
self.encryption_configuration = encryption_configuration
|
809
863
|
|
810
864
|
def _submit_job(
|
811
865
|
self,
|
@@ -815,6 +869,8 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
|
|
815
869
|
"""Submit a new job and get the job id for polling the status using Trigger."""
|
816
870
|
configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
|
817
871
|
|
872
|
+
self.include_encryption_configuration(configuration, "query")
|
873
|
+
|
818
874
|
return hook.insert_job(
|
819
875
|
configuration=configuration,
|
820
876
|
project_id=hook.project_id,
|
@@ -858,9 +914,10 @@ class BigQueryTableCheckOperator(_BigQueryDbHookMixin, SQLTableCheckOperator):
|
|
858
914
|
|
859
915
|
class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
860
916
|
"""
|
861
|
-
|
917
|
+
Fetch data and return it, either from a BigQuery table, or results of a query job.
|
862
918
|
|
863
|
-
Data
|
919
|
+
Data could be narrowed down by specific columns or retrieved as a whole.
|
920
|
+
It is returned in either of the following two formats, based on "as_dict" value:
|
864
921
|
1. False (Default) - A Python list of lists, with the number of nested lists equal to the number of rows
|
865
922
|
fetched. Each nested list represents a row, where the elements within it correspond to the column values
|
866
923
|
for that particular row.
|
@@ -880,27 +937,42 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
880
937
|
.. note::
|
881
938
|
If you pass fields to ``selected_fields`` which are in different order than the
|
882
939
|
order of columns already in
|
883
|
-
BQ table, the data will still be in the order of BQ table.
|
940
|
+
BQ table/job, the data will still be in the order of BQ table.
|
884
941
|
For example if the BQ table has 3 columns as
|
885
942
|
``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
|
886
943
|
the data would still be of the form ``'A,B'``.
|
887
944
|
|
888
|
-
|
945
|
+
.. note::
|
946
|
+
When utilizing job id not in deferrable mode, the job should be in DONE state.
|
947
|
+
|
948
|
+
**Example - Retrieve data from BigQuery using table**::
|
889
949
|
|
890
950
|
get_data = BigQueryGetDataOperator(
|
891
951
|
task_id="get_data_from_bq",
|
892
952
|
dataset_id="test_dataset",
|
893
953
|
table_id="Transaction_partitions",
|
894
|
-
|
954
|
+
table_project_id="internal-gcp-project",
|
955
|
+
max_results=100,
|
956
|
+
selected_fields="DATE",
|
957
|
+
gcp_conn_id="airflow-conn-id",
|
958
|
+
)
|
959
|
+
|
960
|
+
**Example - Retrieve data from BigQuery using a job id**::
|
961
|
+
|
962
|
+
get_data = BigQueryGetDataOperator(
|
963
|
+
job_id="airflow_8999918812727394_86a1cecc69c5e3028d28247affd7563",
|
964
|
+
job_project_id="internal-gcp-project",
|
895
965
|
max_results=100,
|
896
966
|
selected_fields="DATE",
|
897
967
|
gcp_conn_id="airflow-conn-id",
|
898
968
|
)
|
899
969
|
|
900
970
|
:param dataset_id: The dataset ID of the requested table. (templated)
|
901
|
-
:param table_id: The table ID of the requested table. (templated)
|
971
|
+
:param table_id: The table ID of the requested table. Mutually exclusive with job_id. (templated)
|
902
972
|
:param table_project_id: (Optional) The project ID of the requested table.
|
903
973
|
If None, it will be derived from the hook's project ID. (templated)
|
974
|
+
:param job_id: The job ID from which query results are retrieved.
|
975
|
+
Mutually exclusive with table_id. (templated)
|
904
976
|
:param job_project_id: (Optional) Google Cloud Project where the job is running.
|
905
977
|
If None, it will be derived from the hook's project ID. (templated)
|
906
978
|
:param project_id: (Deprecated) (Optional) The name of the project where the data
|
@@ -931,6 +1003,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
931
1003
|
"dataset_id",
|
932
1004
|
"table_id",
|
933
1005
|
"table_project_id",
|
1006
|
+
"job_id",
|
934
1007
|
"job_project_id",
|
935
1008
|
"project_id",
|
936
1009
|
"max_results",
|
@@ -942,11 +1015,12 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
942
1015
|
def __init__(
|
943
1016
|
self,
|
944
1017
|
*,
|
945
|
-
dataset_id: str,
|
946
|
-
table_id: str,
|
1018
|
+
dataset_id: str | None = None,
|
1019
|
+
table_id: str | None = None,
|
947
1020
|
table_project_id: str | None = None,
|
1021
|
+
job_id: str | None = None,
|
948
1022
|
job_project_id: str | None = None,
|
949
|
-
project_id: str
|
1023
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
950
1024
|
max_results: int = 100,
|
951
1025
|
selected_fields: str | None = None,
|
952
1026
|
gcp_conn_id: str = "google_cloud_default",
|
@@ -964,6 +1038,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
964
1038
|
self.dataset_id = dataset_id
|
965
1039
|
self.table_id = table_id
|
966
1040
|
self.job_project_id = job_project_id
|
1041
|
+
self.job_id = job_id
|
967
1042
|
self.max_results = max_results
|
968
1043
|
self.selected_fields = selected_fields
|
969
1044
|
self.gcp_conn_id = gcp_conn_id
|
@@ -1000,7 +1075,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
1000
1075
|
query += "*"
|
1001
1076
|
query += (
|
1002
1077
|
f" from `{self.table_project_id or hook.project_id}.{self.dataset_id}"
|
1003
|
-
f".{self.table_id}` limit {
|
1078
|
+
f".{self.table_id}` limit {self.max_results}"
|
1004
1079
|
)
|
1005
1080
|
return query
|
1006
1081
|
|
@@ -1013,7 +1088,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
1013
1088
|
if not self.table_project_id:
|
1014
1089
|
self.table_project_id = self.project_id
|
1015
1090
|
else:
|
1016
|
-
self.log.info("Ignoring project_id parameter, as table_project_id is found.")
|
1091
|
+
self.log.info("Ignoring 'project_id' parameter, as 'table_project_id' is found.")
|
1092
|
+
|
1093
|
+
if not exactly_one(self.job_id, self.table_id):
|
1094
|
+
raise AirflowException(
|
1095
|
+
"'job_id' and 'table_id' parameters are mutually exclusive, "
|
1096
|
+
"ensure that exactly one of them is specified"
|
1097
|
+
)
|
1017
1098
|
|
1018
1099
|
hook = BigQueryHook(
|
1019
1100
|
gcp_conn_id=self.gcp_conn_id,
|
@@ -1022,31 +1103,45 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
1022
1103
|
)
|
1023
1104
|
|
1024
1105
|
if not self.deferrable:
|
1025
|
-
self.
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1106
|
+
if not self.job_id:
|
1107
|
+
self.log.info(
|
1108
|
+
"Fetching Data from %s.%s.%s max results: %s",
|
1109
|
+
self.table_project_id or hook.project_id,
|
1110
|
+
self.dataset_id,
|
1111
|
+
self.table_id,
|
1112
|
+
self.max_results,
|
1113
|
+
)
|
1114
|
+
if not self.selected_fields:
|
1115
|
+
schema: dict[str, list] = hook.get_schema(
|
1116
|
+
dataset_id=self.dataset_id,
|
1117
|
+
table_id=self.table_id,
|
1118
|
+
project_id=self.table_project_id or hook.project_id,
|
1119
|
+
)
|
1120
|
+
if "fields" in schema:
|
1121
|
+
self.selected_fields = ",".join([field["name"] for field in schema["fields"]])
|
1122
|
+
rows: list[Row] | RowIterator | list[dict[str, Any]] = hook.list_rows(
|
1034
1123
|
dataset_id=self.dataset_id,
|
1035
1124
|
table_id=self.table_id,
|
1125
|
+
max_results=self.max_results,
|
1126
|
+
selected_fields=self.selected_fields,
|
1127
|
+
location=self.location,
|
1036
1128
|
project_id=self.table_project_id or hook.project_id,
|
1037
1129
|
)
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1130
|
+
else:
|
1131
|
+
self.log.info(
|
1132
|
+
"Fetching data from job '%s:%s.%s' max results: %s",
|
1133
|
+
self.job_project_id or hook.project_id,
|
1134
|
+
self.location,
|
1135
|
+
self.job_id,
|
1136
|
+
self.max_results,
|
1137
|
+
)
|
1138
|
+
rows = hook.get_query_results(
|
1139
|
+
job_id=self.job_id,
|
1140
|
+
location=self.location,
|
1141
|
+
selected_fields=self.selected_fields,
|
1142
|
+
max_results=self.max_results,
|
1143
|
+
project_id=self.job_project_id or hook.project_id,
|
1144
|
+
)
|
1050
1145
|
if isinstance(rows, RowIterator):
|
1051
1146
|
raise TypeError(
|
1052
1147
|
"BigQueryHook.list_rows() returns iterator when return_iterator is False (default)"
|
@@ -1056,11 +1151,16 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
1056
1151
|
if self.as_dict:
|
1057
1152
|
table_data = [dict(row) for row in rows]
|
1058
1153
|
else:
|
1059
|
-
table_data = [row.values() for row in rows]
|
1154
|
+
table_data = [row.values() if isinstance(row, Row) else list(row.values()) for row in rows]
|
1060
1155
|
|
1061
1156
|
return table_data
|
1062
1157
|
|
1063
|
-
|
1158
|
+
if not self.job_id:
|
1159
|
+
job: BigQueryJob | UnknownJob = self._submit_job(hook, job_id="")
|
1160
|
+
else:
|
1161
|
+
job = hook.get_job(
|
1162
|
+
job_id=self.job_id, project_id=self.job_project_id or hook.project_id, location=self.location
|
1163
|
+
)
|
1064
1164
|
|
1065
1165
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
1066
1166
|
self.defer(
|
@@ -1075,6 +1175,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
1075
1175
|
poll_interval=self.poll_interval,
|
1076
1176
|
as_dict=self.as_dict,
|
1077
1177
|
impersonation_chain=self.impersonation_chain,
|
1178
|
+
selected_fields=self.selected_fields,
|
1078
1179
|
),
|
1079
1180
|
method_name="execute_complete",
|
1080
1181
|
)
|
@@ -1163,7 +1264,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
|
1163
1264
|
.. code-block:: python
|
1164
1265
|
|
1165
1266
|
encryption_configuration = {
|
1166
|
-
"kmsKeyName": "projects/
|
1267
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
1167
1268
|
}
|
1168
1269
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
1169
1270
|
credentials, or chained list of accounts required to get the access_token
|
@@ -1403,7 +1504,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1403
1504
|
.. code-block:: python
|
1404
1505
|
|
1405
1506
|
encryption_configuration = {
|
1406
|
-
"kmsKeyName": "projects/
|
1507
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
1407
1508
|
}
|
1408
1509
|
:param location: The location used for the operation.
|
1409
1510
|
:param cluster_fields: [Optional] The fields used for clustering.
|
@@ -1447,7 +1548,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1447
1548
|
dataset_id: str,
|
1448
1549
|
table_id: str,
|
1449
1550
|
table_resource: dict[str, Any] | None = None,
|
1450
|
-
project_id: str
|
1551
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1451
1552
|
schema_fields: list | None = None,
|
1452
1553
|
gcs_schema_object: str | None = None,
|
1453
1554
|
time_partitioning: dict | None = None,
|
@@ -1631,7 +1732,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1631
1732
|
.. code-block:: python
|
1632
1733
|
|
1633
1734
|
encryption_configuration = {
|
1634
|
-
"kmsKeyName": "projects/
|
1735
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
1635
1736
|
}
|
1636
1737
|
:param location: The location used for the operation.
|
1637
1738
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
@@ -1907,7 +2008,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator):
|
|
1907
2008
|
self,
|
1908
2009
|
*,
|
1909
2010
|
dataset_id: str,
|
1910
|
-
project_id: str
|
2011
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1911
2012
|
delete_contents: bool = False,
|
1912
2013
|
gcp_conn_id: str = "google_cloud_default",
|
1913
2014
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -1987,7 +2088,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator):
|
|
1987
2088
|
self,
|
1988
2089
|
*,
|
1989
2090
|
dataset_id: str | None = None,
|
1990
|
-
project_id: str
|
2091
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1991
2092
|
dataset_reference: dict | None = None,
|
1992
2093
|
location: str | None = None,
|
1993
2094
|
gcp_conn_id: str = "google_cloud_default",
|
@@ -2091,7 +2192,7 @@ class BigQueryGetDatasetOperator(GoogleCloudBaseOperator):
|
|
2091
2192
|
self,
|
2092
2193
|
*,
|
2093
2194
|
dataset_id: str,
|
2094
|
-
project_id: str
|
2195
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2095
2196
|
gcp_conn_id: str = "google_cloud_default",
|
2096
2197
|
impersonation_chain: str | Sequence[str] | None = None,
|
2097
2198
|
**kwargs,
|
@@ -2154,7 +2255,7 @@ class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator):
|
|
2154
2255
|
self,
|
2155
2256
|
*,
|
2156
2257
|
dataset_id: str,
|
2157
|
-
project_id: str
|
2258
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2158
2259
|
max_results: int | None = None,
|
2159
2260
|
gcp_conn_id: str = "google_cloud_default",
|
2160
2261
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -2223,7 +2324,7 @@ class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator):
|
|
2223
2324
|
*,
|
2224
2325
|
dataset_id: str,
|
2225
2326
|
dataset_resource: dict,
|
2226
|
-
project_id: str
|
2327
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2227
2328
|
gcp_conn_id: str = "google_cloud_default",
|
2228
2329
|
impersonation_chain: str | Sequence[str] | None = None,
|
2229
2330
|
**kwargs,
|
@@ -2296,7 +2397,7 @@ class BigQueryUpdateTableOperator(GoogleCloudBaseOperator):
|
|
2296
2397
|
fields: list[str] | None = None,
|
2297
2398
|
dataset_id: str | None = None,
|
2298
2399
|
table_id: str | None = None,
|
2299
|
-
project_id: str
|
2400
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2300
2401
|
gcp_conn_id: str = "google_cloud_default",
|
2301
2402
|
impersonation_chain: str | Sequence[str] | None = None,
|
2302
2403
|
**kwargs,
|
@@ -2380,7 +2481,7 @@ class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
2380
2481
|
dataset_resource: dict[str, Any],
|
2381
2482
|
fields: list[str] | None = None,
|
2382
2483
|
dataset_id: str | None = None,
|
2383
|
-
project_id: str
|
2484
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2384
2485
|
gcp_conn_id: str = "google_cloud_default",
|
2385
2486
|
impersonation_chain: str | Sequence[str] | None = None,
|
2386
2487
|
**kwargs,
|
@@ -2516,7 +2617,7 @@ class BigQueryUpsertTableOperator(GoogleCloudBaseOperator):
|
|
2516
2617
|
*,
|
2517
2618
|
dataset_id: str,
|
2518
2619
|
table_resource: dict,
|
2519
|
-
project_id: str
|
2620
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2520
2621
|
gcp_conn_id: str = "google_cloud_default",
|
2521
2622
|
location: str | None = None,
|
2522
2623
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -2623,7 +2724,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2623
2724
|
dataset_id: str,
|
2624
2725
|
table_id: str,
|
2625
2726
|
include_policy_tags: bool = False,
|
2626
|
-
project_id: str
|
2727
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2627
2728
|
gcp_conn_id: str = "google_cloud_default",
|
2628
2729
|
impersonation_chain: str | Sequence[str] | None = None,
|
2629
2730
|
**kwargs,
|
@@ -2732,7 +2833,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2732
2833
|
def __init__(
|
2733
2834
|
self,
|
2734
2835
|
configuration: dict[str, Any],
|
2735
|
-
project_id: str
|
2836
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
2736
2837
|
location: str | None = None,
|
2737
2838
|
job_id: str | None = None,
|
2738
2839
|
force_rerun: bool = True,
|
@@ -2903,6 +3004,7 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryOpenLineageMix
|
|
2903
3004
|
location=self.location or hook.location,
|
2904
3005
|
poll_interval=self.poll_interval,
|
2905
3006
|
impersonation_chain=self.impersonation_chain,
|
3007
|
+
cancel_on_kill=self.cancel_on_kill,
|
2906
3008
|
),
|
2907
3009
|
method_name="execute_complete",
|
2908
3010
|
)
|
@@ -37,6 +37,7 @@ from airflow.providers.google.cloud.hooks.bigquery_dts import BiqQueryDataTransf
|
|
37
37
|
from airflow.providers.google.cloud.links.bigquery_dts import BigQueryDataTransferConfigLink
|
38
38
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
39
39
|
from airflow.providers.google.cloud.triggers.bigquery_dts import BigQueryDataTransferRunTrigger
|
40
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
40
41
|
|
41
42
|
if TYPE_CHECKING:
|
42
43
|
from google.api_core.retry import Retry
|
@@ -94,7 +95,7 @@ class BigQueryCreateDataTransferOperator(GoogleCloudBaseOperator):
|
|
94
95
|
self,
|
95
96
|
*,
|
96
97
|
transfer_config: dict,
|
97
|
-
project_id: str
|
98
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
98
99
|
location: str | None = None,
|
99
100
|
authorization_code: str | None = None,
|
100
101
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -187,7 +188,7 @@ class BigQueryDeleteDataTransferConfigOperator(GoogleCloudBaseOperator):
|
|
187
188
|
self,
|
188
189
|
*,
|
189
190
|
transfer_config_id: str,
|
190
|
-
project_id: str
|
191
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
191
192
|
location: str | None = None,
|
192
193
|
retry: Retry | _MethodDefault = DEFAULT,
|
193
194
|
timeout: float | None = None,
|
@@ -273,7 +274,7 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(GoogleCloudBaseOperat
|
|
273
274
|
self,
|
274
275
|
*,
|
275
276
|
transfer_config_id: str,
|
276
|
-
project_id: str
|
277
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
277
278
|
location: str | None = None,
|
278
279
|
requested_time_range: dict | None = None,
|
279
280
|
requested_run_time: dict | None = None,
|
@@ -31,6 +31,7 @@ from airflow.providers.google.cloud.links.bigtable import (
|
|
31
31
|
BigtableTablesLink,
|
32
32
|
)
|
33
33
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
34
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
34
35
|
|
35
36
|
if TYPE_CHECKING:
|
36
37
|
import enum
|
@@ -112,7 +113,7 @@ class BigtableCreateInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
|
|
112
113
|
instance_id: str,
|
113
114
|
main_cluster_id: str,
|
114
115
|
main_cluster_zone: str,
|
115
|
-
project_id: str
|
116
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
116
117
|
replica_clusters: list[dict[str, str]] | None = None,
|
117
118
|
instance_display_name: str | None = None,
|
118
119
|
instance_type: enums.Instance.Type | None = None,
|
@@ -218,7 +219,7 @@ class BigtableUpdateInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
|
|
218
219
|
self,
|
219
220
|
*,
|
220
221
|
instance_id: str,
|
221
|
-
project_id: str
|
222
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
222
223
|
instance_display_name: str | None = None,
|
223
224
|
instance_type: enums.Instance.Type | enum.IntEnum | None = None,
|
224
225
|
instance_labels: dict | None = None,
|
@@ -298,7 +299,7 @@ class BigtableDeleteInstanceOperator(GoogleCloudBaseOperator, BigtableValidation
|
|
298
299
|
self,
|
299
300
|
*,
|
300
301
|
instance_id: str,
|
301
|
-
project_id: str
|
302
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
302
303
|
gcp_conn_id: str = "google_cloud_default",
|
303
304
|
impersonation_chain: str | Sequence[str] | None = None,
|
304
305
|
**kwargs,
|
@@ -374,7 +375,7 @@ class BigtableCreateTableOperator(GoogleCloudBaseOperator, BigtableValidationMix
|
|
374
375
|
*,
|
375
376
|
instance_id: str,
|
376
377
|
table_id: str,
|
377
|
-
project_id: str
|
378
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
378
379
|
initial_split_keys: list | None = None,
|
379
380
|
column_families: dict[str, GarbageCollectionRule] | None = None,
|
380
381
|
gcp_conn_id: str = "google_cloud_default",
|
@@ -478,7 +479,7 @@ class BigtableDeleteTableOperator(GoogleCloudBaseOperator, BigtableValidationMix
|
|
478
479
|
*,
|
479
480
|
instance_id: str,
|
480
481
|
table_id: str,
|
481
|
-
project_id: str
|
482
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
482
483
|
app_profile_id: str | None = None,
|
483
484
|
gcp_conn_id: str = "google_cloud_default",
|
484
485
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -559,7 +560,7 @@ class BigtableUpdateClusterOperator(GoogleCloudBaseOperator, BigtableValidationM
|
|
559
560
|
instance_id: str,
|
560
561
|
cluster_id: str,
|
561
562
|
nodes: int,
|
562
|
-
project_id: str
|
563
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
563
564
|
gcp_conn_id: str = "google_cloud_default",
|
564
565
|
impersonation_chain: str | Sequence[str] | None = None,
|
565
566
|
**kwargs,
|