apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
|
36
36
|
from google.protobuf.duration_pb2 import Duration
|
37
37
|
from google.protobuf.field_mask_pb2 import FieldMask
|
38
38
|
|
39
|
+
from airflow.configuration import conf
|
39
40
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
40
41
|
from airflow.providers.google.cloud.hooks.dataproc import DataprocHook, DataProcJobBuilder
|
41
42
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -64,8 +65,7 @@ if TYPE_CHECKING:
|
|
64
65
|
|
65
66
|
|
66
67
|
class ClusterGenerator:
|
67
|
-
"""
|
68
|
-
Create a new Dataproc Cluster.
|
68
|
+
"""Create a new Dataproc Cluster.
|
69
69
|
|
70
70
|
:param cluster_name: The name of the DataProc cluster to create. (templated)
|
71
71
|
:param project_id: The ID of the google cloud project in which
|
@@ -173,7 +173,6 @@ class ClusterGenerator:
|
|
173
173
|
enable_component_gateway: bool | None = False,
|
174
174
|
**kwargs,
|
175
175
|
) -> None:
|
176
|
-
|
177
176
|
self.project_id = project_id
|
178
177
|
self.num_masters = num_masters
|
179
178
|
self.num_workers = num_workers
|
@@ -395,9 +394,12 @@ class ClusterGenerator:
|
|
395
394
|
|
396
395
|
|
397
396
|
class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
398
|
-
"""
|
399
|
-
|
400
|
-
creation is successful or an error occurs
|
397
|
+
"""Create a new cluster on Google Cloud Dataproc.
|
398
|
+
|
399
|
+
The operator will wait until the creation is successful or an error occurs
|
400
|
+
in the creation process.
|
401
|
+
|
402
|
+
If the cluster already exists and ``use_if_exists`` is True, the operator will:
|
401
403
|
|
402
404
|
If the cluster already exists and ``use_if_exists`` is True then the operator will:
|
403
405
|
- if cluster state is ERROR then delete it if specified and raise error
|
@@ -483,11 +485,10 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
483
485
|
metadata: Sequence[tuple[str, str]] = (),
|
484
486
|
gcp_conn_id: str = "google_cloud_default",
|
485
487
|
impersonation_chain: str | Sequence[str] | None = None,
|
486
|
-
deferrable: bool = False,
|
488
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
487
489
|
polling_interval_seconds: int = 10,
|
488
490
|
**kwargs,
|
489
491
|
) -> None:
|
490
|
-
|
491
492
|
# TODO: remove one day
|
492
493
|
if cluster_config is None and virtual_cluster_config is None:
|
493
494
|
warnings.warn(
|
@@ -668,20 +669,22 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
668
669
|
|
669
670
|
|
670
671
|
class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
671
|
-
"""
|
672
|
-
|
672
|
+
"""Scale, up or down, a cluster on Google Cloud Dataproc.
|
673
|
+
|
673
674
|
The operator will wait until the cluster is re-scaled.
|
674
675
|
|
675
|
-
|
676
|
+
Example usage:
|
677
|
+
|
678
|
+
.. code-block:: python
|
676
679
|
|
677
680
|
t1 = DataprocClusterScaleOperator(
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
681
|
+
task_id="dataproc_scale",
|
682
|
+
project_id="my-project",
|
683
|
+
cluster_name="cluster-1",
|
684
|
+
num_workers=10,
|
685
|
+
num_preemptible_workers=10,
|
686
|
+
graceful_decommission_timeout="1h",
|
687
|
+
)
|
685
688
|
|
686
689
|
.. seealso::
|
687
690
|
For more detail on about scaling clusters have a look at the reference:
|
@@ -804,8 +807,7 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
|
804
807
|
|
805
808
|
|
806
809
|
class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
807
|
-
"""
|
808
|
-
Deletes a cluster in a project.
|
810
|
+
"""Delete a cluster in a project.
|
809
811
|
|
810
812
|
:param region: Required. The Cloud Dataproc region in which to handle the request (templated).
|
811
813
|
:param cluster_name: Required. The cluster name (templated).
|
@@ -848,7 +850,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
848
850
|
metadata: Sequence[tuple[str, str]] = (),
|
849
851
|
gcp_conn_id: str = "google_cloud_default",
|
850
852
|
impersonation_chain: str | Sequence[str] | None = None,
|
851
|
-
deferrable: bool = False,
|
853
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
852
854
|
polling_interval_seconds: int = 10,
|
853
855
|
**kwargs,
|
854
856
|
):
|
@@ -917,8 +919,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
917
919
|
|
918
920
|
|
919
921
|
class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
920
|
-
"""
|
921
|
-
The base class for operators that launch job on DataProc.
|
922
|
+
"""Base class for operators that launch job on DataProc.
|
922
923
|
|
923
924
|
:param region: The specified region where the dataproc cluster is created.
|
924
925
|
:param job_name: The job name used in the DataProc cluster. This name by default
|
@@ -981,7 +982,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
981
982
|
job_error_states: set[str] | None = None,
|
982
983
|
impersonation_chain: str | Sequence[str] | None = None,
|
983
984
|
asynchronous: bool = False,
|
984
|
-
deferrable: bool = False,
|
985
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
985
986
|
polling_interval_seconds: int = 10,
|
986
987
|
**kwargs,
|
987
988
|
) -> None:
|
@@ -1095,9 +1096,9 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
1095
1096
|
|
1096
1097
|
|
1097
1098
|
class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
1098
|
-
"""
|
1099
|
-
|
1100
|
-
will be passed to the cluster.
|
1099
|
+
"""Start a Pig query Job on a Cloud DataProc cluster.
|
1100
|
+
|
1101
|
+
The parameters of the operation will be passed to the cluster.
|
1101
1102
|
|
1102
1103
|
It's a good practice to define dataproc_* parameters in the default_args of the dag
|
1103
1104
|
like the cluster name and UDFs.
|
@@ -1116,13 +1117,13 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
|
1116
1117
|
variables for the pig script to be resolved on the cluster or use the parameters to
|
1117
1118
|
be resolved in the script as template parameters.
|
1118
1119
|
|
1119
|
-
|
1120
|
+
.. code-block:: python
|
1120
1121
|
|
1121
1122
|
t1 = DataProcPigOperator(
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1123
|
+
task_id="dataproc_pig",
|
1124
|
+
query="a_pig_script.pig",
|
1125
|
+
variables={"out": "gs://example/output/{{ds}}"},
|
1126
|
+
)
|
1126
1127
|
|
1127
1128
|
.. seealso::
|
1128
1129
|
For more detail on about job submission have a look at the reference:
|
@@ -1203,8 +1204,7 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
|
1203
1204
|
|
1204
1205
|
|
1205
1206
|
class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
|
1206
|
-
"""
|
1207
|
-
Start a Hive query Job on a Cloud DataProc cluster.
|
1207
|
+
"""Start a Hive query Job on a Cloud DataProc cluster.
|
1208
1208
|
|
1209
1209
|
:param query: The query or reference to the query file (q extension).
|
1210
1210
|
:param query_uri: The HCFS URI of the script that contains the Hive queries.
|
@@ -1278,8 +1278,7 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
|
|
1278
1278
|
|
1279
1279
|
|
1280
1280
|
class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
|
1281
|
-
"""
|
1282
|
-
Start a Spark SQL query Job on a Cloud DataProc cluster.
|
1281
|
+
"""Start a Spark SQL query Job on a Cloud DataProc cluster.
|
1283
1282
|
|
1284
1283
|
:param query: The query or reference to the query file (q extension). (templated)
|
1285
1284
|
:param query_uri: The HCFS URI of the script that contains the SQL queries.
|
@@ -1352,8 +1351,7 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
|
|
1352
1351
|
|
1353
1352
|
|
1354
1353
|
class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
|
1355
|
-
"""
|
1356
|
-
Start a Spark Job on a Cloud DataProc cluster.
|
1354
|
+
"""Start a Spark Job on a Cloud DataProc cluster.
|
1357
1355
|
|
1358
1356
|
:param main_jar: The HCFS URI of the jar file that contains the main class
|
1359
1357
|
(use this or the main_class, not both together).
|
@@ -1426,8 +1424,7 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
|
|
1426
1424
|
|
1427
1425
|
|
1428
1426
|
class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
1429
|
-
"""
|
1430
|
-
Start a Hadoop Job on a Cloud DataProc cluster.
|
1427
|
+
"""Start a Hadoop Job on a Cloud DataProc cluster.
|
1431
1428
|
|
1432
1429
|
:param main_jar: The HCFS URI of the jar file containing the main class
|
1433
1430
|
(use this or the main_class, not both together).
|
@@ -1478,8 +1475,7 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
|
1478
1475
|
self.files = files
|
1479
1476
|
|
1480
1477
|
def generate_job(self):
|
1481
|
-
"""
|
1482
|
-
Helper method for easier migration to `DataprocSubmitJobOperator`.
|
1478
|
+
"""Helper method for easier migration to `DataprocSubmitJobOperator`.
|
1483
1479
|
|
1484
1480
|
:return: Dict representing Dataproc job
|
1485
1481
|
"""
|
@@ -1500,8 +1496,7 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
|
1500
1496
|
|
1501
1497
|
|
1502
1498
|
class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
1503
|
-
"""
|
1504
|
-
Start a PySpark Job on a Cloud DataProc cluster.
|
1499
|
+
"""Start a PySpark Job on a Cloud DataProc cluster.
|
1505
1500
|
|
1506
1501
|
:param main: [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main
|
1507
1502
|
Python file to use as the driver. Must be a .py file. (templated)
|
@@ -1577,8 +1572,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
|
1577
1572
|
self.pyfiles = pyfiles
|
1578
1573
|
|
1579
1574
|
def generate_job(self):
|
1580
|
-
"""
|
1581
|
-
Helper method for easier migration to `DataprocSubmitJobOperator`.
|
1575
|
+
"""Helper method for easier migration to :class:`DataprocSubmitJobOperator`.
|
1582
1576
|
|
1583
1577
|
:return: Dict representing Dataproc job
|
1584
1578
|
"""
|
@@ -1617,8 +1611,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
|
1617
1611
|
|
1618
1612
|
|
1619
1613
|
class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
1620
|
-
"""
|
1621
|
-
Creates new workflow template.
|
1614
|
+
"""Creates new workflow template.
|
1622
1615
|
|
1623
1616
|
:param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
|
1624
1617
|
:param region: Required. The Cloud Dataproc region in which to handle the request.
|
@@ -1682,9 +1675,9 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
1682
1675
|
|
1683
1676
|
|
1684
1677
|
class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
1685
|
-
"""
|
1686
|
-
|
1687
|
-
until the WorkflowTemplate is finished executing.
|
1678
|
+
"""Instantiate a WorkflowTemplate on Google Cloud Dataproc.
|
1679
|
+
|
1680
|
+
The operator will wait until the WorkflowTemplate is finished executing.
|
1688
1681
|
|
1689
1682
|
.. seealso::
|
1690
1683
|
Please refer to:
|
@@ -1739,7 +1732,7 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
1739
1732
|
metadata: Sequence[tuple[str, str]] = (),
|
1740
1733
|
gcp_conn_id: str = "google_cloud_default",
|
1741
1734
|
impersonation_chain: str | Sequence[str] | None = None,
|
1742
|
-
deferrable: bool = False,
|
1735
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1743
1736
|
polling_interval_seconds: int = 10,
|
1744
1737
|
**kwargs,
|
1745
1738
|
) -> None:
|
@@ -1796,10 +1789,10 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
1796
1789
|
)
|
1797
1790
|
|
1798
1791
|
def execute_complete(self, context, event=None) -> None:
|
1799
|
-
"""
|
1800
|
-
|
1801
|
-
|
1802
|
-
successful.
|
1792
|
+
"""Callback for when the trigger fires.
|
1793
|
+
|
1794
|
+
This returns immediately. It relies on trigger to throw an exception,
|
1795
|
+
otherwise it assumes execution was successful.
|
1803
1796
|
"""
|
1804
1797
|
if event["status"] == "failed" or event["status"] == "error":
|
1805
1798
|
self.log.exception("Unexpected error in the operation.")
|
@@ -1809,9 +1802,9 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
1809
1802
|
|
1810
1803
|
|
1811
1804
|
class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
1812
|
-
"""
|
1813
|
-
|
1814
|
-
wait until the WorkflowTemplate is finished executing.
|
1805
|
+
"""Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.
|
1806
|
+
|
1807
|
+
The operator will wait until the WorkflowTemplate is finished executing.
|
1815
1808
|
|
1816
1809
|
.. seealso::
|
1817
1810
|
For more information on how to use this operator, take a look at the guide:
|
@@ -1867,7 +1860,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1867
1860
|
metadata: Sequence[tuple[str, str]] = (),
|
1868
1861
|
gcp_conn_id: str = "google_cloud_default",
|
1869
1862
|
impersonation_chain: str | Sequence[str] | None = None,
|
1870
|
-
deferrable: bool = False,
|
1863
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1871
1864
|
polling_interval_seconds: int = 10,
|
1872
1865
|
**kwargs,
|
1873
1866
|
) -> None:
|
@@ -1921,10 +1914,10 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1921
1914
|
)
|
1922
1915
|
|
1923
1916
|
def execute_complete(self, context, event=None) -> None:
|
1924
|
-
"""
|
1925
|
-
|
1926
|
-
|
1927
|
-
successful.
|
1917
|
+
"""Callback for when the trigger fires.
|
1918
|
+
|
1919
|
+
This returns immediately. It relies on trigger to throw an exception,
|
1920
|
+
otherwise it assumes execution was successful.
|
1928
1921
|
"""
|
1929
1922
|
if event["status"] == "failed" or event["status"] == "error":
|
1930
1923
|
self.log.exception("Unexpected error in the operation.")
|
@@ -1934,8 +1927,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
1934
1927
|
|
1935
1928
|
|
1936
1929
|
class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
1937
|
-
"""
|
1938
|
-
Submits a job to a cluster.
|
1930
|
+
"""Submit a job to a cluster.
|
1939
1931
|
|
1940
1932
|
:param project_id: Optional. The ID of the Google Cloud project that the job belongs to.
|
1941
1933
|
:param region: Required. The Cloud Dataproc region in which to handle the request.
|
@@ -1988,7 +1980,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
1988
1980
|
gcp_conn_id: str = "google_cloud_default",
|
1989
1981
|
impersonation_chain: str | Sequence[str] | None = None,
|
1990
1982
|
asynchronous: bool = False,
|
1991
|
-
deferrable: bool = False,
|
1983
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1992
1984
|
polling_interval_seconds: int = 10,
|
1993
1985
|
cancel_on_kill: bool = True,
|
1994
1986
|
wait_timeout: int | None = None,
|
@@ -2063,10 +2055,10 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
2063
2055
|
return self.job_id
|
2064
2056
|
|
2065
2057
|
def execute_complete(self, context, event=None) -> None:
|
2066
|
-
"""
|
2067
|
-
|
2068
|
-
|
2069
|
-
successful.
|
2058
|
+
"""Callback for when the trigger fires.
|
2059
|
+
|
2060
|
+
This returns immediately. It relies on trigger to throw an exception,
|
2061
|
+
otherwise it assumes execution was successful.
|
2070
2062
|
"""
|
2071
2063
|
job_state = event["job_state"]
|
2072
2064
|
job_id = event["job_id"]
|
@@ -2083,8 +2075,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
2083
2075
|
|
2084
2076
|
|
2085
2077
|
class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
2086
|
-
"""
|
2087
|
-
Updates a cluster in a project.
|
2078
|
+
"""Update a cluster in a project.
|
2088
2079
|
|
2089
2080
|
:param region: Required. The Cloud Dataproc region in which to handle the request.
|
2090
2081
|
:param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
|
@@ -2149,7 +2140,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
2149
2140
|
metadata: Sequence[tuple[str, str]] = (),
|
2150
2141
|
gcp_conn_id: str = "google_cloud_default",
|
2151
2142
|
impersonation_chain: str | Sequence[str] | None = None,
|
2152
|
-
deferrable: bool = False,
|
2143
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
2153
2144
|
polling_interval_seconds: int = 10,
|
2154
2145
|
**kwargs,
|
2155
2146
|
):
|
@@ -2222,8 +2213,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
2222
2213
|
|
2223
2214
|
|
2224
2215
|
class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
2225
|
-
"""
|
2226
|
-
Creates a batch workload.
|
2216
|
+
"""Create a batch workload.
|
2227
2217
|
|
2228
2218
|
:param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to. (templated)
|
2229
2219
|
:param region: Required. The Cloud Dataproc region in which to handle the request. (templated)
|
@@ -2281,7 +2271,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2281
2271
|
impersonation_chain: str | Sequence[str] | None = None,
|
2282
2272
|
result_retry: Retry | _MethodDefault = DEFAULT,
|
2283
2273
|
asynchronous: bool = False,
|
2284
|
-
deferrable: bool = False,
|
2274
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
2285
2275
|
polling_interval_seconds: int = 5,
|
2286
2276
|
**kwargs,
|
2287
2277
|
):
|
@@ -2342,6 +2332,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2342
2332
|
return self.operation.operation.name
|
2343
2333
|
|
2344
2334
|
else:
|
2335
|
+
# processing ends in execute_complete
|
2345
2336
|
self.defer(
|
2346
2337
|
trigger=DataprocBatchTrigger(
|
2347
2338
|
batch_id=self.batch_id,
|
@@ -2359,62 +2350,79 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
2359
2350
|
# This is only likely to happen if batch_id was provided
|
2360
2351
|
# Could be running if Airflow was restarted after task started
|
2361
2352
|
# poll until a final state is reached
|
2362
|
-
|
2363
|
-
|
2364
|
-
|
2365
|
-
|
2366
|
-
|
2367
|
-
|
2368
|
-
|
2369
|
-
|
2370
|
-
|
2371
|
-
|
2353
|
+
|
2354
|
+
self.log.info("Attaching to the job %s if it is still running.", self.batch_id)
|
2355
|
+
|
2356
|
+
# deferrable handling of a batch_id that already exists - processing ends in execute_complete
|
2357
|
+
if self.deferrable:
|
2358
|
+
self.defer(
|
2359
|
+
trigger=DataprocBatchTrigger(
|
2360
|
+
batch_id=self.batch_id,
|
2361
|
+
project_id=self.project_id,
|
2362
|
+
region=self.region,
|
2363
|
+
gcp_conn_id=self.gcp_conn_id,
|
2364
|
+
impersonation_chain=self.impersonation_chain,
|
2365
|
+
polling_interval_seconds=self.polling_interval_seconds,
|
2366
|
+
),
|
2367
|
+
method_name="execute_complete",
|
2372
2368
|
)
|
2373
|
-
# It is possible we don't have a result in the case where batch_id was not provide, one was generated
|
2374
|
-
# by chance, AlreadyExists was caught, but we can't reattach because we don't have the generated id
|
2375
|
-
if result is None:
|
2376
|
-
raise AirflowException("The job could not be reattached because the id was generated.")
|
2377
2369
|
|
2378
|
-
|
2379
|
-
|
2380
|
-
|
2370
|
+
# non-deferrable handling of a batch_id that already exists
|
2371
|
+
result = hook.wait_for_batch(
|
2372
|
+
batch_id=self.batch_id,
|
2373
|
+
region=self.region,
|
2374
|
+
project_id=self.project_id,
|
2375
|
+
retry=self.retry,
|
2376
|
+
timeout=self.timeout,
|
2377
|
+
metadata=self.metadata,
|
2378
|
+
wait_check_interval=self.polling_interval_seconds,
|
2379
|
+
)
|
2381
2380
|
batch_id = self.batch_id or result.name.split("/")[-1]
|
2382
|
-
|
2383
|
-
if result.state == Batch.State.FAILED:
|
2384
|
-
raise AirflowException(f"Batch job {batch_id} failed. Driver Logs: {link}")
|
2385
|
-
if result.state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
|
2386
|
-
raise AirflowException(f"Batch job {batch_id} was cancelled. Driver logs: {link}")
|
2387
|
-
if result.state == Batch.State.STATE_UNSPECIFIED:
|
2388
|
-
raise AirflowException(f"Batch job {batch_id} unspecified. Driver logs: {link}")
|
2389
|
-
self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
|
2390
|
-
DataprocLink.persist(context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id)
|
2381
|
+
self.handle_batch_status(context, result.state, batch_id)
|
2391
2382
|
return Batch.to_dict(result)
|
2392
2383
|
|
2393
2384
|
def execute_complete(self, context, event=None) -> None:
|
2394
|
-
"""
|
2395
|
-
|
2396
|
-
|
2397
|
-
successful.
|
2385
|
+
"""Callback for when the trigger fires.
|
2386
|
+
|
2387
|
+
This returns immediately. It relies on trigger to throw an exception,
|
2388
|
+
otherwise it assumes execution was successful.
|
2398
2389
|
"""
|
2399
2390
|
if event is None:
|
2400
2391
|
raise AirflowException("Batch failed.")
|
2401
|
-
|
2392
|
+
state = event["batch_state"]
|
2402
2393
|
batch_id = event["batch_id"]
|
2403
|
-
|
2404
|
-
if batch_state == Batch.State.FAILED:
|
2405
|
-
raise AirflowException(f"Batch failed:\n{batch_id}")
|
2406
|
-
if batch_state == Batch.State.CANCELLED:
|
2407
|
-
raise AirflowException(f"Batch was cancelled:\n{batch_id}")
|
2408
|
-
self.log.info("%s completed successfully.", self.task_id)
|
2394
|
+
self.handle_batch_status(context, state, batch_id)
|
2409
2395
|
|
2410
2396
|
def on_kill(self):
|
2411
2397
|
if self.operation:
|
2412
2398
|
self.operation.cancel()
|
2413
2399
|
|
2400
|
+
def handle_batch_status(self, context: Context, state: Batch.State, batch_id: str) -> None:
|
2401
|
+
# The existing batch may be a number of states other than 'SUCCEEDED'\
|
2402
|
+
# wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
|
2403
|
+
# finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
|
2404
|
+
link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, resource=batch_id)
|
2405
|
+
if state == Batch.State.FAILED:
|
2406
|
+
DataprocLink.persist(
|
2407
|
+
context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
|
2408
|
+
)
|
2409
|
+
raise AirflowException("Batch job %s failed. Driver Logs: %s", batch_id, link)
|
2410
|
+
if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
|
2411
|
+
DataprocLink.persist(
|
2412
|
+
context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
|
2413
|
+
)
|
2414
|
+
raise AirflowException("Batch job %s was cancelled. Driver logs: %s", batch_id, link)
|
2415
|
+
if state == Batch.State.STATE_UNSPECIFIED:
|
2416
|
+
DataprocLink.persist(
|
2417
|
+
context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
|
2418
|
+
)
|
2419
|
+
raise AirflowException("Batch job %s unspecified. Driver logs: %s", batch_id, link)
|
2420
|
+
self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
|
2421
|
+
DataprocLink.persist(context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id)
|
2422
|
+
|
2414
2423
|
|
2415
2424
|
class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
2416
|
-
"""
|
2417
|
-
Deletes the batch workload resource.
|
2425
|
+
"""Delete the batch workload resource.
|
2418
2426
|
|
2419
2427
|
:param batch_id: Required. The ID to use for the batch, which will become the final component
|
2420
2428
|
of the batch's resource name.
|
@@ -2477,8 +2485,7 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
2477
2485
|
|
2478
2486
|
|
2479
2487
|
class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
2480
|
-
"""
|
2481
|
-
Gets the batch workload resource representation.
|
2488
|
+
"""Get the batch workload resource representation.
|
2482
2489
|
|
2483
2490
|
:param batch_id: Required. The ID to use for the batch, which will become the final component
|
2484
2491
|
of the batch's resource name.
|
@@ -2545,8 +2552,7 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
2545
2552
|
|
2546
2553
|
|
2547
2554
|
class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
2548
|
-
"""
|
2549
|
-
Lists batch workloads.
|
2555
|
+
"""List batch workloads.
|
2550
2556
|
|
2551
2557
|
:param region: Required. The Cloud Dataproc region in which to handle the request.
|
2552
2558
|
:param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to.
|
@@ -2568,7 +2574,6 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
2568
2574
|
If set as a sequence, the identities from the list must grant
|
2569
2575
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
2570
2576
|
account from the list granting this role to the originating account (templated).
|
2571
|
-
|
2572
2577
|
"""
|
2573
2578
|
|
2574
2579
|
template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
|
@@ -2615,8 +2620,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
2615
2620
|
|
2616
2621
|
|
2617
2622
|
class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
|
2618
|
-
"""
|
2619
|
-
Cancel the batch workload resource.
|
2623
|
+
"""Cancel the batch workload resource.
|
2620
2624
|
|
2621
2625
|
:param operation_name: Required. The name of the operation resource to be cancelled.
|
2622
2626
|
:param region: Required. The Cloud Dataproc region in which to handle the request.
|
@@ -145,8 +145,7 @@ class DataprocMetastoreDetailedLink(BaseOperatorLink):
|
|
145
145
|
|
146
146
|
|
147
147
|
class DataprocMetastoreCreateBackupOperator(GoogleCloudBaseOperator):
|
148
|
-
"""
|
149
|
-
Creates a new backup in a given project and location.
|
148
|
+
"""Create a new backup in a given project and location.
|
150
149
|
|
151
150
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
152
151
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
@@ -260,8 +259,7 @@ class DataprocMetastoreCreateBackupOperator(GoogleCloudBaseOperator):
|
|
260
259
|
|
261
260
|
|
262
261
|
class DataprocMetastoreCreateMetadataImportOperator(GoogleCloudBaseOperator):
|
263
|
-
"""
|
264
|
-
Creates a new MetadataImport in a given project and location.
|
262
|
+
"""Create a new MetadataImport in a given project and location.
|
265
263
|
|
266
264
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
267
265
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
@@ -361,8 +359,7 @@ class DataprocMetastoreCreateMetadataImportOperator(GoogleCloudBaseOperator):
|
|
361
359
|
|
362
360
|
|
363
361
|
class DataprocMetastoreCreateServiceOperator(GoogleCloudBaseOperator):
|
364
|
-
"""
|
365
|
-
Creates a metastore service in a project and location.
|
362
|
+
"""Create a metastore service in a project and location.
|
366
363
|
|
367
364
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
368
365
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
@@ -463,8 +460,7 @@ class DataprocMetastoreCreateServiceOperator(GoogleCloudBaseOperator):
|
|
463
460
|
|
464
461
|
|
465
462
|
class DataprocMetastoreDeleteBackupOperator(GoogleCloudBaseOperator):
|
466
|
-
"""
|
467
|
-
Deletes a single backup.
|
463
|
+
"""Delete a single backup.
|
468
464
|
|
469
465
|
:param project_id: Required. The ID of the Google Cloud project that the backup belongs to.
|
470
466
|
:param region: Required. The ID of the Google Cloud region that the backup belongs to.
|
@@ -548,8 +544,7 @@ class DataprocMetastoreDeleteBackupOperator(GoogleCloudBaseOperator):
|
|
548
544
|
|
549
545
|
|
550
546
|
class DataprocMetastoreDeleteServiceOperator(GoogleCloudBaseOperator):
|
551
|
-
"""
|
552
|
-
Deletes a single service.
|
547
|
+
"""Delete a single service.
|
553
548
|
|
554
549
|
:param request: The request object. Request message for
|
555
550
|
[DataprocMetastore.DeleteService][google.cloud.metastore.v1.DataprocMetastore.DeleteService].
|
@@ -606,8 +601,7 @@ class DataprocMetastoreDeleteServiceOperator(GoogleCloudBaseOperator):
|
|
606
601
|
|
607
602
|
|
608
603
|
class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
|
609
|
-
"""
|
610
|
-
Exports metadata from a service.
|
604
|
+
"""Export metadata from a service.
|
611
605
|
|
612
606
|
:param destination_gcs_folder: A Cloud Storage URI of a folder, in the format
|
613
607
|
``gs://<bucket_name>/<path_inside_bucket>``. A sub-folder
|
@@ -699,9 +693,10 @@ class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
|
|
699
693
|
return destination_uri[5:] if destination_uri.startswith("gs://") else destination_uri
|
700
694
|
|
701
695
|
def _wait_for_export_metadata(self, hook: DataprocMetastoreHook):
|
702
|
-
"""
|
703
|
-
|
704
|
-
|
696
|
+
"""Check that export was created successfully.
|
697
|
+
|
698
|
+
This is a workaround to an issue parsing result to MetadataExport inside
|
699
|
+
the SDK.
|
705
700
|
"""
|
706
701
|
for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
|
707
702
|
sleep(time_to_wait)
|
@@ -724,8 +719,7 @@ class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
|
|
724
719
|
|
725
720
|
|
726
721
|
class DataprocMetastoreGetServiceOperator(GoogleCloudBaseOperator):
|
727
|
-
"""
|
728
|
-
Gets the details of a single service.
|
722
|
+
"""Get the details of a single service.
|
729
723
|
|
730
724
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
731
725
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
@@ -797,8 +791,7 @@ class DataprocMetastoreGetServiceOperator(GoogleCloudBaseOperator):
|
|
797
791
|
|
798
792
|
|
799
793
|
class DataprocMetastoreListBackupsOperator(GoogleCloudBaseOperator):
|
800
|
-
"""
|
801
|
-
Lists backups in a service.
|
794
|
+
"""List backups in a service.
|
802
795
|
|
803
796
|
:param project_id: Required. The ID of the Google Cloud project that the backup belongs to.
|
804
797
|
:param region: Required. The ID of the Google Cloud region that the backup belongs to.
|
@@ -882,8 +875,7 @@ class DataprocMetastoreListBackupsOperator(GoogleCloudBaseOperator):
|
|
882
875
|
|
883
876
|
|
884
877
|
class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
|
885
|
-
"""
|
886
|
-
Restores a service from a backup.
|
878
|
+
"""Restore a service from a backup.
|
887
879
|
|
888
880
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
889
881
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
@@ -987,9 +979,10 @@ class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
|
|
987
979
|
DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
|
988
980
|
|
989
981
|
def _wait_for_restore_service(self, hook: DataprocMetastoreHook):
|
990
|
-
"""
|
991
|
-
|
992
|
-
|
982
|
+
"""Check that export was created successfully.
|
983
|
+
|
984
|
+
This is a workaround to an issue parsing result to MetadataExport inside
|
985
|
+
the SDK.
|
993
986
|
"""
|
994
987
|
for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
|
995
988
|
sleep(time_to_wait)
|
@@ -1010,8 +1003,7 @@ class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
|
|
1010
1003
|
|
1011
1004
|
|
1012
1005
|
class DataprocMetastoreUpdateServiceOperator(GoogleCloudBaseOperator):
|
1013
|
-
"""
|
1014
|
-
Updates the parameters of a single service.
|
1006
|
+
"""Update the parameters of a single service.
|
1015
1007
|
|
1016
1008
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
1017
1009
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|