apache-airflow-providers-google 10.19.0rc1__py3-none-any.whl → 10.20.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/LICENSE +4 -4
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +4 -4
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +26 -0
- airflow/providers/google/cloud/hooks/dataflow.py +132 -1
- airflow/providers/google/cloud/hooks/datapipeline.py +22 -73
- airflow/providers/google/cloud/hooks/gcs.py +21 -0
- airflow/providers/google/cloud/hooks/pubsub.py +10 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +15 -3
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/links/dataflow.py +25 -0
- airflow/providers/google/cloud/openlineage/mixins.py +271 -0
- airflow/providers/google/cloud/openlineage/utils.py +5 -218
- airflow/providers/google/cloud/operators/bigquery.py +74 -20
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +76 -0
- airflow/providers/google/cloud/operators/dataflow.py +235 -1
- airflow/providers/google/cloud/operators/datapipeline.py +29 -121
- airflow/providers/google/cloud/operators/dataplex.py +1 -1
- airflow/providers/google/cloud/operators/dataproc_metastore.py +17 -6
- airflow/providers/google/cloud/operators/kubernetes_engine.py +9 -6
- airflow/providers/google/cloud/operators/pubsub.py +18 -0
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +6 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +16 -0
- airflow/providers/google/cloud/sensors/cloud_composer.py +171 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +13 -0
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +56 -1
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +6 -12
- airflow/providers/google/cloud/triggers/cloud_composer.py +115 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -0
- airflow/providers/google/cloud/utils/credentials_provider.py +81 -6
- airflow/providers/google/cloud/utils/external_token_supplier.py +175 -0
- airflow/providers/google/common/hooks/base_google.py +35 -1
- airflow/providers/google/common/utils/id_token_credentials.py +1 -1
- airflow/providers/google/get_provider_info.py +19 -14
- {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/METADATA +41 -35
- {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/RECORD +39 -37
- {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.19.0rc1.dist-info → apache_airflow_providers_google-10.20.0rc1.dist-info}/entry_points.txt +0 -0
@@ -47,7 +47,7 @@ from airflow.providers.common.sql.operators.sql import (
|
|
47
47
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
48
48
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
49
49
|
from airflow.providers.google.cloud.links.bigquery import BigQueryDatasetLink, BigQueryTableLink
|
50
|
-
from airflow.providers.google.cloud.openlineage.
|
50
|
+
from airflow.providers.google.cloud.openlineage.mixins import _BigQueryOpenLineageMixin
|
51
51
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
52
52
|
from airflow.providers.google.cloud.triggers.bigquery import (
|
53
53
|
BigQueryCheckTrigger,
|
@@ -67,6 +67,7 @@ if TYPE_CHECKING:
|
|
67
67
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
68
68
|
from airflow.utils.context import Context
|
69
69
|
|
70
|
+
|
70
71
|
BIGQUERY_JOB_DETAILS_LINK_FMT = "https://console.cloud.google.com/bigquery?j={job_id}"
|
71
72
|
|
72
73
|
LABEL_REGEX = re.compile(r"^[\w-]{0,63}$")
|
@@ -149,7 +150,12 @@ class _BigQueryOperatorsEncryptionConfigurationMixin:
|
|
149
150
|
# annotation of the `self`. Then you can inherit this class in the target operator.
|
150
151
|
# e.g: BigQueryCheckOperator, BigQueryTableCheckOperator
|
151
152
|
def include_encryption_configuration( # type:ignore[misc]
|
152
|
-
self: BigQueryCheckOperator
|
153
|
+
self: BigQueryCheckOperator
|
154
|
+
| BigQueryTableCheckOperator
|
155
|
+
| BigQueryValueCheckOperator
|
156
|
+
| BigQueryColumnCheckOperator
|
157
|
+
| BigQueryGetDataOperator
|
158
|
+
| BigQueryIntervalCheckOperator,
|
153
159
|
configuration: dict,
|
154
160
|
config_key: str,
|
155
161
|
) -> None:
|
@@ -205,7 +211,7 @@ class BigQueryCheckOperator(
|
|
205
211
|
Token Creator IAM role to the directly preceding identity, with first
|
206
212
|
account from the list granting this role to the originating account. (templated)
|
207
213
|
:param labels: a dictionary containing labels for the table, passed to BigQuery.
|
208
|
-
:param encryption_configuration:
|
214
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
209
215
|
|
210
216
|
.. code-block:: python
|
211
217
|
|
@@ -326,7 +332,9 @@ class BigQueryCheckOperator(
|
|
326
332
|
self.log.info("Success.")
|
327
333
|
|
328
334
|
|
329
|
-
class BigQueryValueCheckOperator(
|
335
|
+
class BigQueryValueCheckOperator(
|
336
|
+
_BigQueryDbHookMixin, SQLValueCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
|
337
|
+
):
|
330
338
|
"""Perform a simple value check using sql code.
|
331
339
|
|
332
340
|
.. seealso::
|
@@ -336,6 +344,13 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
336
344
|
:param sql: SQL to execute.
|
337
345
|
:param use_legacy_sql: Whether to use legacy SQL (true)
|
338
346
|
or standard SQL (false).
|
347
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
348
|
+
|
349
|
+
.. code-block:: python
|
350
|
+
|
351
|
+
encryption_configuration = {
|
352
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
353
|
+
}
|
339
354
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
340
355
|
:param location: The geographic location of the job. See details at:
|
341
356
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
@@ -370,6 +385,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
370
385
|
sql: str,
|
371
386
|
pass_value: Any,
|
372
387
|
tolerance: Any = None,
|
388
|
+
encryption_configuration: dict | None = None,
|
373
389
|
gcp_conn_id: str = "google_cloud_default",
|
374
390
|
use_legacy_sql: bool = True,
|
375
391
|
location: str | None = None,
|
@@ -383,6 +399,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
383
399
|
self.location = location
|
384
400
|
self.gcp_conn_id = gcp_conn_id
|
385
401
|
self.use_legacy_sql = use_legacy_sql
|
402
|
+
self.encryption_configuration = encryption_configuration
|
386
403
|
self.impersonation_chain = impersonation_chain
|
387
404
|
self.labels = labels
|
388
405
|
self.deferrable = deferrable
|
@@ -401,6 +418,8 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
401
418
|
},
|
402
419
|
}
|
403
420
|
|
421
|
+
self.include_encryption_configuration(configuration, "query")
|
422
|
+
|
404
423
|
return hook.insert_job(
|
405
424
|
configuration=configuration,
|
406
425
|
project_id=hook.project_id,
|
@@ -460,7 +479,9 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator):
|
|
460
479
|
)
|
461
480
|
|
462
481
|
|
463
|
-
class BigQueryIntervalCheckOperator(
|
482
|
+
class BigQueryIntervalCheckOperator(
|
483
|
+
_BigQueryDbHookMixin, SQLIntervalCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
|
484
|
+
):
|
464
485
|
"""
|
465
486
|
Check that the values of metrics given as SQL expressions are within a tolerance of the older ones.
|
466
487
|
|
@@ -481,6 +502,13 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
481
502
|
between the current day, and the prior days_back.
|
482
503
|
:param use_legacy_sql: Whether to use legacy SQL (true)
|
483
504
|
or standard SQL (false).
|
505
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
506
|
+
|
507
|
+
.. code-block:: python
|
508
|
+
|
509
|
+
encryption_configuration = {
|
510
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
511
|
+
}
|
484
512
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
485
513
|
:param location: The geographic location of the job. See details at:
|
486
514
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
@@ -520,6 +548,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
520
548
|
gcp_conn_id: str = "google_cloud_default",
|
521
549
|
use_legacy_sql: bool = True,
|
522
550
|
location: str | None = None,
|
551
|
+
encryption_configuration: dict | None = None,
|
523
552
|
impersonation_chain: str | Sequence[str] | None = None,
|
524
553
|
labels: dict | None = None,
|
525
554
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
@@ -538,6 +567,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
538
567
|
self.gcp_conn_id = gcp_conn_id
|
539
568
|
self.use_legacy_sql = use_legacy_sql
|
540
569
|
self.location = location
|
570
|
+
self.encryption_configuration = encryption_configuration
|
541
571
|
self.impersonation_chain = impersonation_chain
|
542
572
|
self.labels = labels
|
543
573
|
self.project_id = project_id
|
@@ -552,6 +582,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
552
582
|
) -> BigQueryJob:
|
553
583
|
"""Submit a new job and get the job id for polling the status using Triggerer."""
|
554
584
|
configuration = {"query": {"query": sql, "useLegacySql": self.use_legacy_sql}}
|
585
|
+
self.include_encryption_configuration(configuration, "query")
|
555
586
|
return hook.insert_job(
|
556
587
|
configuration=configuration,
|
557
588
|
project_id=self.project_id or hook.project_id,
|
@@ -608,7 +639,9 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
|
|
608
639
|
)
|
609
640
|
|
610
641
|
|
611
|
-
class BigQueryColumnCheckOperator(
|
642
|
+
class BigQueryColumnCheckOperator(
|
643
|
+
_BigQueryDbHookMixin, SQLColumnCheckOperator, _BigQueryOperatorsEncryptionConfigurationMixin
|
644
|
+
):
|
612
645
|
"""
|
613
646
|
Subclasses the SQLColumnCheckOperator in order to provide a job id for OpenLineage to parse.
|
614
647
|
|
@@ -623,6 +656,13 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
|
|
623
656
|
:param partition_clause: a string SQL statement added to a WHERE clause
|
624
657
|
to partition data
|
625
658
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
659
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
660
|
+
|
661
|
+
.. code-block:: python
|
662
|
+
|
663
|
+
encryption_configuration = {
|
664
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
665
|
+
}
|
626
666
|
:param use_legacy_sql: Whether to use legacy SQL (true)
|
627
667
|
or standard SQL (false).
|
628
668
|
:param location: The geographic location of the job. See details at:
|
@@ -650,6 +690,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
|
|
650
690
|
partition_clause: str | None = None,
|
651
691
|
database: str | None = None,
|
652
692
|
accept_none: bool = True,
|
693
|
+
encryption_configuration: dict | None = None,
|
653
694
|
gcp_conn_id: str = "google_cloud_default",
|
654
695
|
use_legacy_sql: bool = True,
|
655
696
|
location: str | None = None,
|
@@ -671,6 +712,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
|
|
671
712
|
self.database = database
|
672
713
|
self.accept_none = accept_none
|
673
714
|
self.gcp_conn_id = gcp_conn_id
|
715
|
+
self.encryption_configuration = encryption_configuration
|
674
716
|
self.use_legacy_sql = use_legacy_sql
|
675
717
|
self.location = location
|
676
718
|
self.impersonation_chain = impersonation_chain
|
@@ -683,7 +725,7 @@ class BigQueryColumnCheckOperator(_BigQueryDbHookMixin, SQLColumnCheckOperator):
|
|
683
725
|
) -> BigQueryJob:
|
684
726
|
"""Submit a new job and get the job id for polling the status using Trigger."""
|
685
727
|
configuration = {"query": {"query": self.sql, "useLegacySql": self.use_legacy_sql}}
|
686
|
-
|
728
|
+
self.include_encryption_configuration(configuration, "query")
|
687
729
|
return hook.insert_job(
|
688
730
|
configuration=configuration,
|
689
731
|
project_id=hook.project_id,
|
@@ -765,7 +807,7 @@ class BigQueryTableCheckOperator(
|
|
765
807
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
766
808
|
account from the list granting this role to the originating account (templated).
|
767
809
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
768
|
-
:param encryption_configuration:
|
810
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
769
811
|
|
770
812
|
.. code-block:: python
|
771
813
|
|
@@ -851,7 +893,7 @@ class BigQueryTableCheckOperator(
|
|
851
893
|
self.log.info("All tests have passed")
|
852
894
|
|
853
895
|
|
854
|
-
class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
896
|
+
class BigQueryGetDataOperator(GoogleCloudBaseOperator, _BigQueryOperatorsEncryptionConfigurationMixin):
|
855
897
|
"""
|
856
898
|
Fetch data and return it, either from a BigQuery table, or results of a query job.
|
857
899
|
|
@@ -920,6 +962,13 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
920
962
|
from the table. (templated)
|
921
963
|
:param selected_fields: List of fields to return (comma-separated). If
|
922
964
|
unspecified, all fields are returned.
|
965
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
966
|
+
|
967
|
+
.. code-block:: python
|
968
|
+
|
969
|
+
encryption_configuration = {
|
970
|
+
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
971
|
+
}
|
923
972
|
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
924
973
|
:param location: The location used for the operation.
|
925
974
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
@@ -964,6 +1013,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
964
1013
|
selected_fields: str | None = None,
|
965
1014
|
gcp_conn_id: str = "google_cloud_default",
|
966
1015
|
location: str | None = None,
|
1016
|
+
encryption_configuration: dict | None = None,
|
967
1017
|
impersonation_chain: str | Sequence[str] | None = None,
|
968
1018
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
969
1019
|
poll_interval: float = 4.0,
|
@@ -983,6 +1033,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
983
1033
|
self.gcp_conn_id = gcp_conn_id
|
984
1034
|
self.location = location
|
985
1035
|
self.impersonation_chain = impersonation_chain
|
1036
|
+
self.encryption_configuration = encryption_configuration
|
986
1037
|
self.project_id = project_id
|
987
1038
|
self.deferrable = deferrable
|
988
1039
|
self.poll_interval = poll_interval
|
@@ -996,6 +1047,8 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator):
|
|
996
1047
|
) -> BigQueryJob:
|
997
1048
|
get_query = self.generate_query(hook=hook)
|
998
1049
|
configuration = {"query": {"query": get_query, "useLegacySql": self.use_legacy_sql}}
|
1050
|
+
self.include_encryption_configuration(configuration, "query")
|
1051
|
+
|
999
1052
|
"""Submit a new job and get the job id for polling the status using Triggerer."""
|
1000
1053
|
return hook.insert_job(
|
1001
1054
|
configuration=configuration,
|
@@ -1198,7 +1251,7 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator):
|
|
1198
1251
|
:param location: The geographic location of the job. Required except for
|
1199
1252
|
US and EU. See details at
|
1200
1253
|
https://cloud.google.com/bigquery/docs/locations#specifying_your_location
|
1201
|
-
:param encryption_configuration:
|
1254
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
1202
1255
|
|
1203
1256
|
.. code-block:: python
|
1204
1257
|
|
@@ -1392,9 +1445,9 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1392
1445
|
|
1393
1446
|
.. seealso::
|
1394
1447
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
|
1395
|
-
:param gcp_conn_id:
|
1448
|
+
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud and
|
1396
1449
|
interact with the Bigquery service.
|
1397
|
-
:param google_cloud_storage_conn_id:
|
1450
|
+
:param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud.
|
1398
1451
|
and interact with the Google Cloud Storage service.
|
1399
1452
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
1400
1453
|
|
@@ -1432,13 +1485,13 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1432
1485
|
google_cloud_storage_conn_id="airflow-conn-id",
|
1433
1486
|
)
|
1434
1487
|
|
1435
|
-
:param view:
|
1488
|
+
:param view: (Optional) A dictionary containing definition for the view.
|
1436
1489
|
If set, it will create a view instead of a table:
|
1437
1490
|
|
1438
1491
|
.. seealso::
|
1439
1492
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
1440
|
-
:param materialized_view:
|
1441
|
-
:param encryption_configuration:
|
1493
|
+
:param materialized_view: (Optional) The materialized view definition.
|
1494
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
1442
1495
|
|
1443
1496
|
.. code-block:: python
|
1444
1497
|
|
@@ -1446,7 +1499,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator):
|
|
1446
1499
|
"kmsKeyName": "projects/PROJECT/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY",
|
1447
1500
|
}
|
1448
1501
|
:param location: The location used for the operation.
|
1449
|
-
:param cluster_fields:
|
1502
|
+
:param cluster_fields: (Optional) The fields used for clustering.
|
1450
1503
|
BigQuery supports clustering for both partitioned and
|
1451
1504
|
non-partitioned tables.
|
1452
1505
|
|
@@ -1644,7 +1697,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1644
1697
|
:param autodetect: Try to detect schema and format options automatically.
|
1645
1698
|
The schema_fields and schema_object options will be honored when specified explicitly.
|
1646
1699
|
https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
|
1647
|
-
:param compression:
|
1700
|
+
:param compression: (Optional) The compression type of the data source.
|
1648
1701
|
Possible values include GZIP and NONE.
|
1649
1702
|
The default value is NONE.
|
1650
1703
|
This setting is ignored for Google Cloud Bigtable,
|
@@ -1666,7 +1719,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator):
|
|
1666
1719
|
and interact with the Google Cloud Storage service.
|
1667
1720
|
:param src_fmt_configs: configure optional fields specific to the source format
|
1668
1721
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
1669
|
-
:param encryption_configuration:
|
1722
|
+
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
1670
1723
|
|
1671
1724
|
.. code-block:: python
|
1672
1725
|
|
@@ -2666,6 +2719,7 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2666
2719
|
project_id: str = PROVIDE_PROJECT_ID,
|
2667
2720
|
gcp_conn_id: str = "google_cloud_default",
|
2668
2721
|
impersonation_chain: str | Sequence[str] | None = None,
|
2722
|
+
location: str | None = None,
|
2669
2723
|
**kwargs,
|
2670
2724
|
) -> None:
|
2671
2725
|
self.schema_fields_updates = schema_fields_updates
|
@@ -2675,12 +2729,12 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator):
|
|
2675
2729
|
self.project_id = project_id
|
2676
2730
|
self.gcp_conn_id = gcp_conn_id
|
2677
2731
|
self.impersonation_chain = impersonation_chain
|
2732
|
+
self.location = location
|
2678
2733
|
super().__init__(**kwargs)
|
2679
2734
|
|
2680
2735
|
def execute(self, context: Context):
|
2681
2736
|
bq_hook = BigQueryHook(
|
2682
|
-
gcp_conn_id=self.gcp_conn_id,
|
2683
|
-
impersonation_chain=self.impersonation_chain,
|
2737
|
+
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, location=self.location
|
2684
2738
|
)
|
2685
2739
|
|
2686
2740
|
table = bq_hook.update_table_schema(
|
@@ -443,6 +443,82 @@ class CloudDataTransferServiceDeleteJobOperator(GoogleCloudBaseOperator):
|
|
443
443
|
hook.delete_transfer_job(job_name=self.job_name, project_id=self.project_id)
|
444
444
|
|
445
445
|
|
446
|
+
class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
|
447
|
+
"""
|
448
|
+
Runs a transfer job.
|
449
|
+
|
450
|
+
.. seealso::
|
451
|
+
For more information on how to use this operator, take a look at the guide:
|
452
|
+
:ref:`howto/operator:CloudDataTransferServiceRunJobOperator`
|
453
|
+
|
454
|
+
:param job_name: (Required) Name of the job to be run
|
455
|
+
:param project_id: (Optional) the ID of the project that owns the Transfer
|
456
|
+
Job. If set to None or missing, the default project_id from the Google Cloud
|
457
|
+
connection is used.
|
458
|
+
:param gcp_conn_id: The connection ID used to connect to Google Cloud.
|
459
|
+
:param api_version: API version used (e.g. v1).
|
460
|
+
:param google_impersonation_chain: Optional Google service account to impersonate using
|
461
|
+
short-term credentials, or chained list of accounts required to get the access_token
|
462
|
+
of the last account in the list, which will be impersonated in the request.
|
463
|
+
If set as a string, the account must grant the originating account
|
464
|
+
the Service Account Token Creator IAM role.
|
465
|
+
If set as a sequence, the identities from the list must grant
|
466
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
467
|
+
account from the list granting this role to the originating account (templated).
|
468
|
+
"""
|
469
|
+
|
470
|
+
# [START gcp_transfer_job_run_template_fields]
|
471
|
+
template_fields: Sequence[str] = (
|
472
|
+
"job_name",
|
473
|
+
"project_id",
|
474
|
+
"gcp_conn_id",
|
475
|
+
"api_version",
|
476
|
+
"google_impersonation_chain",
|
477
|
+
)
|
478
|
+
# [END gcp_transfer_job_run_template_fields]
|
479
|
+
operator_extra_links = (CloudStorageTransferJobLink(),)
|
480
|
+
|
481
|
+
def __init__(
|
482
|
+
self,
|
483
|
+
*,
|
484
|
+
job_name: str,
|
485
|
+
gcp_conn_id: str = "google_cloud_default",
|
486
|
+
api_version: str = "v1",
|
487
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
488
|
+
google_impersonation_chain: str | Sequence[str] | None = None,
|
489
|
+
**kwargs,
|
490
|
+
) -> None:
|
491
|
+
super().__init__(**kwargs)
|
492
|
+
self.job_name = job_name
|
493
|
+
self.project_id = project_id
|
494
|
+
self.gcp_conn_id = gcp_conn_id
|
495
|
+
self.api_version = api_version
|
496
|
+
self.google_impersonation_chain = google_impersonation_chain
|
497
|
+
|
498
|
+
def _validate_inputs(self) -> None:
|
499
|
+
if not self.job_name:
|
500
|
+
raise AirflowException("The required parameter 'job_name' is empty or None")
|
501
|
+
|
502
|
+
def execute(self, context: Context) -> dict:
|
503
|
+
self._validate_inputs()
|
504
|
+
hook = CloudDataTransferServiceHook(
|
505
|
+
api_version=self.api_version,
|
506
|
+
gcp_conn_id=self.gcp_conn_id,
|
507
|
+
impersonation_chain=self.google_impersonation_chain,
|
508
|
+
)
|
509
|
+
|
510
|
+
project_id = self.project_id or hook.project_id
|
511
|
+
if project_id:
|
512
|
+
CloudStorageTransferJobLink.persist(
|
513
|
+
context=context,
|
514
|
+
task_instance=self,
|
515
|
+
project_id=project_id,
|
516
|
+
job_name=self.job_name,
|
517
|
+
)
|
518
|
+
|
519
|
+
return hook.run_transfer_job(job_name=self.job_name, project_id=project_id)
|
520
|
+
|
521
|
+
|
446
522
|
class CloudDataTransferServiceGetOperationOperator(GoogleCloudBaseOperator):
|
447
523
|
"""
|
448
524
|
Gets the latest state of a long-running operation in Google Storage Transfer Service.
|
@@ -28,6 +28,7 @@ from functools import cached_property
|
|
28
28
|
from typing import TYPE_CHECKING, Any, Sequence
|
29
29
|
|
30
30
|
from deprecated import deprecated
|
31
|
+
from googleapiclient.errors import HttpError
|
31
32
|
|
32
33
|
from airflow.configuration import conf
|
33
34
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -38,7 +39,7 @@ from airflow.providers.google.cloud.hooks.dataflow import (
|
|
38
39
|
process_line_and_extract_dataflow_job_id_callback,
|
39
40
|
)
|
40
41
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
41
|
-
from airflow.providers.google.cloud.links.dataflow import DataflowJobLink
|
42
|
+
from airflow.providers.google.cloud.links.dataflow import DataflowJobLink, DataflowPipelineLink
|
42
43
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
43
44
|
from airflow.providers.google.cloud.triggers.dataflow import TemplateJobStartTrigger
|
44
45
|
from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME
|
@@ -1358,3 +1359,236 @@ class DataflowStopJobOperator(GoogleCloudBaseOperator):
|
|
1358
1359
|
self.log.info("No jobs to stop")
|
1359
1360
|
|
1360
1361
|
return None
|
1362
|
+
|
1363
|
+
|
1364
|
+
class DataflowCreatePipelineOperator(GoogleCloudBaseOperator):
|
1365
|
+
"""
|
1366
|
+
Creates a new Dataflow Data Pipeline instance.
|
1367
|
+
|
1368
|
+
.. seealso::
|
1369
|
+
For more information on how to use this operator, take a look at the guide:
|
1370
|
+
:ref:`howto/operator:DataflowCreatePipelineOperator`
|
1371
|
+
|
1372
|
+
:param body: The request body (contains instance of Pipeline). See:
|
1373
|
+
https://cloud.google.com/dataflow/docs/reference/data-pipelines/rest/v1/projects.locations.pipelines/create#request-body
|
1374
|
+
:param project_id: The ID of the GCP project that owns the job.
|
1375
|
+
:param location: The location to direct the Data Pipelines instance to (for example us-central1).
|
1376
|
+
:param gcp_conn_id: The connection ID to connect to the Google Cloud
|
1377
|
+
Platform.
|
1378
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
1379
|
+
credentials, or chained list of accounts required to get the access_token
|
1380
|
+
of the last account in the list, which will be impersonated in the request.
|
1381
|
+
If set as a string, the account must grant the originating account
|
1382
|
+
the Service Account Token Creator IAM role.
|
1383
|
+
If set as a sequence, the identities from the list must grant
|
1384
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1385
|
+
account from the list granting this role to the originating account (templated).
|
1386
|
+
|
1387
|
+
.. warning::
|
1388
|
+
This option requires Apache Beam 2.39.0 or newer.
|
1389
|
+
|
1390
|
+
Returns the created Dataflow Data Pipeline instance in JSON representation.
|
1391
|
+
"""
|
1392
|
+
|
1393
|
+
operator_extra_links = (DataflowPipelineLink(),)
|
1394
|
+
|
1395
|
+
def __init__(
|
1396
|
+
self,
|
1397
|
+
*,
|
1398
|
+
body: dict,
|
1399
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1400
|
+
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1401
|
+
gcp_conn_id: str = "google_cloud_default",
|
1402
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
1403
|
+
**kwargs,
|
1404
|
+
) -> None:
|
1405
|
+
super().__init__(**kwargs)
|
1406
|
+
|
1407
|
+
self.body = body
|
1408
|
+
self.project_id = project_id
|
1409
|
+
self.location = location
|
1410
|
+
self.gcp_conn_id = gcp_conn_id
|
1411
|
+
self.impersonation_chain = impersonation_chain
|
1412
|
+
self.dataflow_hook: DataflowHook | None = None
|
1413
|
+
|
1414
|
+
self.pipeline_name = self.body["name"].split("/")[-1] if self.body else None
|
1415
|
+
|
1416
|
+
def execute(self, context: Context):
|
1417
|
+
if self.body is None:
|
1418
|
+
raise AirflowException(
|
1419
|
+
"Request Body not given; cannot create a Data Pipeline without the Request Body."
|
1420
|
+
)
|
1421
|
+
if self.project_id is None:
|
1422
|
+
raise AirflowException(
|
1423
|
+
"Project ID not given; cannot create a Data Pipeline without the Project ID."
|
1424
|
+
)
|
1425
|
+
if self.location is None:
|
1426
|
+
raise AirflowException("location not given; cannot create a Data Pipeline without the location.")
|
1427
|
+
|
1428
|
+
self.dataflow_hook = DataflowHook(
|
1429
|
+
gcp_conn_id=self.gcp_conn_id,
|
1430
|
+
impersonation_chain=self.impersonation_chain,
|
1431
|
+
)
|
1432
|
+
self.body["pipelineSources"] = {"airflow": "airflow"}
|
1433
|
+
try:
|
1434
|
+
self.pipeline = self.dataflow_hook.create_data_pipeline(
|
1435
|
+
project_id=self.project_id,
|
1436
|
+
body=self.body,
|
1437
|
+
location=self.location,
|
1438
|
+
)
|
1439
|
+
except HttpError as e:
|
1440
|
+
if e.resp.status == 409:
|
1441
|
+
# If the pipeline already exists, retrieve it
|
1442
|
+
self.log.info("Pipeline with given name already exists.")
|
1443
|
+
self.pipeline = self.dataflow_hook.get_data_pipeline(
|
1444
|
+
project_id=self.project_id,
|
1445
|
+
pipeline_name=self.pipeline_name,
|
1446
|
+
location=self.location,
|
1447
|
+
)
|
1448
|
+
DataflowPipelineLink.persist(self, context, self.project_id, self.location, self.pipeline_name)
|
1449
|
+
self.xcom_push(context, key="pipeline_name", value=self.pipeline_name)
|
1450
|
+
if self.pipeline:
|
1451
|
+
if "error" in self.pipeline:
|
1452
|
+
raise AirflowException(self.pipeline.get("error").get("message"))
|
1453
|
+
|
1454
|
+
return self.pipeline
|
1455
|
+
|
1456
|
+
|
1457
|
+
class DataflowRunPipelineOperator(GoogleCloudBaseOperator):
|
1458
|
+
"""
|
1459
|
+
Runs a Dataflow Data Pipeline.
|
1460
|
+
|
1461
|
+
.. seealso::
|
1462
|
+
For more information on how to use this operator, take a look at the guide:
|
1463
|
+
:ref:`howto/operator:DataflowRunPipelineOperator`
|
1464
|
+
|
1465
|
+
:param pipeline_name: The display name of the pipeline. In example
|
1466
|
+
projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
|
1467
|
+
:param project_id: The ID of the GCP project that owns the job.
|
1468
|
+
:param location: The location to direct the Data Pipelines instance to (for example us-central1).
|
1469
|
+
:param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
|
1470
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
1471
|
+
credentials, or chained list of accounts required to get the access_token
|
1472
|
+
of the last account in the list, which will be impersonated in the request.
|
1473
|
+
If set as a string, the account must grant the originating account
|
1474
|
+
the Service Account Token Creator IAM role.
|
1475
|
+
If set as a sequence, the identities from the list must grant
|
1476
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1477
|
+
account from the list granting this role to the originating account (templated).
|
1478
|
+
|
1479
|
+
Returns the created Job in JSON representation.
|
1480
|
+
"""
|
1481
|
+
|
1482
|
+
operator_extra_links = (DataflowJobLink(),)
|
1483
|
+
|
1484
|
+
def __init__(
|
1485
|
+
self,
|
1486
|
+
pipeline_name: str,
|
1487
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1488
|
+
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1489
|
+
gcp_conn_id: str = "google_cloud_default",
|
1490
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
1491
|
+
**kwargs,
|
1492
|
+
) -> None:
|
1493
|
+
super().__init__(**kwargs)
|
1494
|
+
|
1495
|
+
self.pipeline_name = pipeline_name
|
1496
|
+
self.project_id = project_id
|
1497
|
+
self.location = location
|
1498
|
+
self.gcp_conn_id = gcp_conn_id
|
1499
|
+
self.impersonation_chain = impersonation_chain
|
1500
|
+
self.dataflow_hook: DataflowHook | None = None
|
1501
|
+
|
1502
|
+
def execute(self, context: Context):
|
1503
|
+
self.dataflow_hook = DataflowHook(
|
1504
|
+
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
|
1505
|
+
)
|
1506
|
+
|
1507
|
+
if self.pipeline_name is None:
|
1508
|
+
raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
|
1509
|
+
if self.project_id is None:
|
1510
|
+
raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
|
1511
|
+
if self.location is None:
|
1512
|
+
raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
|
1513
|
+
try:
|
1514
|
+
self.job = self.dataflow_hook.run_data_pipeline(
|
1515
|
+
pipeline_name=self.pipeline_name,
|
1516
|
+
project_id=self.project_id,
|
1517
|
+
location=self.location,
|
1518
|
+
)["job"]
|
1519
|
+
job_id = self.dataflow_hook.extract_job_id(self.job)
|
1520
|
+
self.xcom_push(context, key="job_id", value=job_id)
|
1521
|
+
DataflowJobLink.persist(self, context, self.project_id, self.location, job_id)
|
1522
|
+
except HttpError as e:
|
1523
|
+
if e.resp.status == 404:
|
1524
|
+
raise AirflowException("Pipeline with given name was not found.")
|
1525
|
+
except Exception as exc:
|
1526
|
+
raise AirflowException("Error occurred when running Pipeline: %s", exc)
|
1527
|
+
|
1528
|
+
return self.job
|
1529
|
+
|
1530
|
+
|
1531
|
+
class DataflowDeletePipelineOperator(GoogleCloudBaseOperator):
|
1532
|
+
"""
|
1533
|
+
Deletes a Dataflow Data Pipeline.
|
1534
|
+
|
1535
|
+
.. seealso::
|
1536
|
+
For more information on how to use this operator, take a look at the guide:
|
1537
|
+
:ref:`howto/operator:DataflowDeletePipelineOperator`
|
1538
|
+
|
1539
|
+
:param pipeline_name: The display name of the pipeline. In example
|
1540
|
+
projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID.
|
1541
|
+
:param project_id: The ID of the GCP project that owns the job.
|
1542
|
+
:param location: The location to direct the Data Pipelines instance to (for example us-central1).
|
1543
|
+
:param gcp_conn_id: The connection ID to connect to the Google Cloud Platform.
|
1544
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
1545
|
+
credentials, or chained list of accounts required to get the access_token
|
1546
|
+
of the last account in the list, which will be impersonated in the request.
|
1547
|
+
If set as a string, the account must grant the originating account
|
1548
|
+
the Service Account Token Creator IAM role.
|
1549
|
+
If set as a sequence, the identities from the list must grant
|
1550
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1551
|
+
account from the list granting this role to the originating account (templated).
|
1552
|
+
"""
|
1553
|
+
|
1554
|
+
def __init__(
|
1555
|
+
self,
|
1556
|
+
pipeline_name: str,
|
1557
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
1558
|
+
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1559
|
+
gcp_conn_id: str = "google_cloud_default",
|
1560
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
1561
|
+
**kwargs,
|
1562
|
+
) -> None:
|
1563
|
+
super().__init__(**kwargs)
|
1564
|
+
|
1565
|
+
self.pipeline_name = pipeline_name
|
1566
|
+
self.project_id = project_id
|
1567
|
+
self.location = location
|
1568
|
+
self.gcp_conn_id = gcp_conn_id
|
1569
|
+
self.impersonation_chain = impersonation_chain
|
1570
|
+
self.dataflow_hook: DataflowHook | None = None
|
1571
|
+
self.response: dict | None = None
|
1572
|
+
|
1573
|
+
def execute(self, context: Context):
|
1574
|
+
self.dataflow_hook = DataflowHook(
|
1575
|
+
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
|
1576
|
+
)
|
1577
|
+
|
1578
|
+
if self.pipeline_name is None:
|
1579
|
+
raise AirflowException("Data Pipeline name not given; cannot run unspecified pipeline.")
|
1580
|
+
if self.project_id is None:
|
1581
|
+
raise AirflowException("Data Pipeline Project ID not given; cannot run pipeline.")
|
1582
|
+
if self.location is None:
|
1583
|
+
raise AirflowException("Data Pipeline location not given; cannot run pipeline.")
|
1584
|
+
|
1585
|
+
self.response = self.dataflow_hook.delete_data_pipeline(
|
1586
|
+
pipeline_name=self.pipeline_name,
|
1587
|
+
project_id=self.project_id,
|
1588
|
+
location=self.location,
|
1589
|
+
)
|
1590
|
+
|
1591
|
+
if self.response:
|
1592
|
+
raise AirflowException(self.response)
|
1593
|
+
|
1594
|
+
return None
|