apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +38 -39
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
  5. airflow/providers/google/cloud/hooks/bigquery.py +328 -318
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
  8. airflow/providers/google/cloud/hooks/dataflow.py +11 -15
  9. airflow/providers/google/cloud/hooks/dataform.py +3 -3
  10. airflow/providers/google/cloud/hooks/dataproc.py +577 -573
  11. airflow/providers/google/cloud/hooks/functions.py +60 -76
  12. airflow/providers/google/cloud/hooks/gcs.py +108 -18
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
  14. airflow/providers/google/cloud/links/datafusion.py +4 -3
  15. airflow/providers/google/cloud/operators/bigquery.py +201 -191
  16. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  17. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  18. airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
  19. airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
  20. airflow/providers/google/cloud/operators/dataflow.py +6 -4
  21. airflow/providers/google/cloud/operators/dataform.py +3 -2
  22. airflow/providers/google/cloud/operators/dataproc.py +127 -123
  23. airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
  24. airflow/providers/google/cloud/operators/gcs.py +35 -13
  25. airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
  26. airflow/providers/google/cloud/operators/mlengine.py +2 -6
  27. airflow/providers/google/cloud/operators/vision.py +47 -56
  28. airflow/providers/google/cloud/sensors/bigquery.py +3 -2
  29. airflow/providers/google/cloud/sensors/gcs.py +5 -7
  30. airflow/providers/google/cloud/sensors/pubsub.py +2 -2
  31. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
  32. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  33. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  34. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
  35. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
  36. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
  37. airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
  38. airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
  39. airflow/providers/google/cloud/utils/bigquery.py +17 -0
  40. airflow/providers/google/get_provider_info.py +7 -2
  41. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
  42. airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
  43. apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
  44. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
  45. apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
  46. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
  47. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
  48. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
  49. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
  50. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
36
36
  from google.protobuf.duration_pb2 import Duration
37
37
  from google.protobuf.field_mask_pb2 import FieldMask
38
38
 
39
+ from airflow.configuration import conf
39
40
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
40
41
  from airflow.providers.google.cloud.hooks.dataproc import DataprocHook, DataProcJobBuilder
41
42
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -64,8 +65,7 @@ if TYPE_CHECKING:
64
65
 
65
66
 
66
67
  class ClusterGenerator:
67
- """
68
- Create a new Dataproc Cluster.
68
+ """Create a new Dataproc Cluster.
69
69
 
70
70
  :param cluster_name: The name of the DataProc cluster to create. (templated)
71
71
  :param project_id: The ID of the google cloud project in which
@@ -173,7 +173,6 @@ class ClusterGenerator:
173
173
  enable_component_gateway: bool | None = False,
174
174
  **kwargs,
175
175
  ) -> None:
176
-
177
176
  self.project_id = project_id
178
177
  self.num_masters = num_masters
179
178
  self.num_workers = num_workers
@@ -395,9 +394,12 @@ class ClusterGenerator:
395
394
 
396
395
 
397
396
  class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
398
- """
399
- Create a new cluster on Google Cloud Dataproc. The operator will wait until the
400
- creation is successful or an error occurs in the creation process.
397
+ """Create a new cluster on Google Cloud Dataproc.
398
+
399
+ The operator will wait until the creation is successful or an error occurs
400
+ in the creation process.
401
+
402
+ If the cluster already exists and ``use_if_exists`` is True, the operator will:
401
403
 
402
404
  If the cluster already exists and ``use_if_exists`` is True then the operator will:
403
405
  - if cluster state is ERROR then delete it if specified and raise error
@@ -483,11 +485,10 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
483
485
  metadata: Sequence[tuple[str, str]] = (),
484
486
  gcp_conn_id: str = "google_cloud_default",
485
487
  impersonation_chain: str | Sequence[str] | None = None,
486
- deferrable: bool = False,
488
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
487
489
  polling_interval_seconds: int = 10,
488
490
  **kwargs,
489
491
  ) -> None:
490
-
491
492
  # TODO: remove one day
492
493
  if cluster_config is None and virtual_cluster_config is None:
493
494
  warnings.warn(
@@ -668,20 +669,22 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
668
669
 
669
670
 
670
671
  class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
671
- """
672
- Scale, up or down, a cluster on Google Cloud Dataproc.
672
+ """Scale, up or down, a cluster on Google Cloud Dataproc.
673
+
673
674
  The operator will wait until the cluster is re-scaled.
674
675
 
675
- **Example**: ::
676
+ Example usage:
677
+
678
+ .. code-block:: python
676
679
 
677
680
  t1 = DataprocClusterScaleOperator(
678
- task_id='dataproc_scale',
679
- project_id='my-project',
680
- cluster_name='cluster-1',
681
- num_workers=10,
682
- num_preemptible_workers=10,
683
- graceful_decommission_timeout='1h',
684
- dag=dag)
681
+ task_id="dataproc_scale",
682
+ project_id="my-project",
683
+ cluster_name="cluster-1",
684
+ num_workers=10,
685
+ num_preemptible_workers=10,
686
+ graceful_decommission_timeout="1h",
687
+ )
685
688
 
686
689
  .. seealso::
687
690
  For more detail on about scaling clusters have a look at the reference:
@@ -804,8 +807,7 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
804
807
 
805
808
 
806
809
  class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
807
- """
808
- Deletes a cluster in a project.
810
+ """Delete a cluster in a project.
809
811
 
810
812
  :param region: Required. The Cloud Dataproc region in which to handle the request (templated).
811
813
  :param cluster_name: Required. The cluster name (templated).
@@ -848,7 +850,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
848
850
  metadata: Sequence[tuple[str, str]] = (),
849
851
  gcp_conn_id: str = "google_cloud_default",
850
852
  impersonation_chain: str | Sequence[str] | None = None,
851
- deferrable: bool = False,
853
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
852
854
  polling_interval_seconds: int = 10,
853
855
  **kwargs,
854
856
  ):
@@ -917,8 +919,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
917
919
 
918
920
 
919
921
  class DataprocJobBaseOperator(GoogleCloudBaseOperator):
920
- """
921
- The base class for operators that launch job on DataProc.
922
+ """Base class for operators that launch job on DataProc.
922
923
 
923
924
  :param region: The specified region where the dataproc cluster is created.
924
925
  :param job_name: The job name used in the DataProc cluster. This name by default
@@ -981,7 +982,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
981
982
  job_error_states: set[str] | None = None,
982
983
  impersonation_chain: str | Sequence[str] | None = None,
983
984
  asynchronous: bool = False,
984
- deferrable: bool = False,
985
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
985
986
  polling_interval_seconds: int = 10,
986
987
  **kwargs,
987
988
  ) -> None:
@@ -1095,9 +1096,9 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1095
1096
 
1096
1097
 
1097
1098
  class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1098
- """
1099
- Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation
1100
- will be passed to the cluster.
1099
+ """Start a Pig query Job on a Cloud DataProc cluster.
1100
+
1101
+ The parameters of the operation will be passed to the cluster.
1101
1102
 
1102
1103
  It's a good practice to define dataproc_* parameters in the default_args of the dag
1103
1104
  like the cluster name and UDFs.
@@ -1116,13 +1117,13 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1116
1117
  variables for the pig script to be resolved on the cluster or use the parameters to
1117
1118
  be resolved in the script as template parameters.
1118
1119
 
1119
- **Example**: ::
1120
+ .. code-block:: python
1120
1121
 
1121
1122
  t1 = DataProcPigOperator(
1122
- task_id='dataproc_pig',
1123
- query='a_pig_script.pig',
1124
- variables={'out': 'gs://example/output/{{ds}}'},
1125
- dag=dag)
1123
+ task_id="dataproc_pig",
1124
+ query="a_pig_script.pig",
1125
+ variables={"out": "gs://example/output/{{ds}}"},
1126
+ )
1126
1127
 
1127
1128
  .. seealso::
1128
1129
  For more detail on about job submission have a look at the reference:
@@ -1203,8 +1204,7 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1203
1204
 
1204
1205
 
1205
1206
  class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1206
- """
1207
- Start a Hive query Job on a Cloud DataProc cluster.
1207
+ """Start a Hive query Job on a Cloud DataProc cluster.
1208
1208
 
1209
1209
  :param query: The query or reference to the query file (q extension).
1210
1210
  :param query_uri: The HCFS URI of the script that contains the Hive queries.
@@ -1278,8 +1278,7 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1278
1278
 
1279
1279
 
1280
1280
  class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1281
- """
1282
- Start a Spark SQL query Job on a Cloud DataProc cluster.
1281
+ """Start a Spark SQL query Job on a Cloud DataProc cluster.
1283
1282
 
1284
1283
  :param query: The query or reference to the query file (q extension). (templated)
1285
1284
  :param query_uri: The HCFS URI of the script that contains the SQL queries.
@@ -1352,8 +1351,7 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1352
1351
 
1353
1352
 
1354
1353
  class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1355
- """
1356
- Start a Spark Job on a Cloud DataProc cluster.
1354
+ """Start a Spark Job on a Cloud DataProc cluster.
1357
1355
 
1358
1356
  :param main_jar: The HCFS URI of the jar file that contains the main class
1359
1357
  (use this or the main_class, not both together).
@@ -1426,8 +1424,7 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1426
1424
 
1427
1425
 
1428
1426
  class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1429
- """
1430
- Start a Hadoop Job on a Cloud DataProc cluster.
1427
+ """Start a Hadoop Job on a Cloud DataProc cluster.
1431
1428
 
1432
1429
  :param main_jar: The HCFS URI of the jar file containing the main class
1433
1430
  (use this or the main_class, not both together).
@@ -1478,8 +1475,7 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1478
1475
  self.files = files
1479
1476
 
1480
1477
  def generate_job(self):
1481
- """
1482
- Helper method for easier migration to `DataprocSubmitJobOperator`.
1478
+ """Helper method for easier migration to `DataprocSubmitJobOperator`.
1483
1479
 
1484
1480
  :return: Dict representing Dataproc job
1485
1481
  """
@@ -1500,8 +1496,7 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1500
1496
 
1501
1497
 
1502
1498
  class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
1503
- """
1504
- Start a PySpark Job on a Cloud DataProc cluster.
1499
+ """Start a PySpark Job on a Cloud DataProc cluster.
1505
1500
 
1506
1501
  :param main: [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main
1507
1502
  Python file to use as the driver. Must be a .py file. (templated)
@@ -1577,8 +1572,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
1577
1572
  self.pyfiles = pyfiles
1578
1573
 
1579
1574
  def generate_job(self):
1580
- """
1581
- Helper method for easier migration to `DataprocSubmitJobOperator`.
1575
+ """Helper method for easier migration to :class:`DataprocSubmitJobOperator`.
1582
1576
 
1583
1577
  :return: Dict representing Dataproc job
1584
1578
  """
@@ -1617,8 +1611,7 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
1617
1611
 
1618
1612
 
1619
1613
  class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1620
- """
1621
- Creates new workflow template.
1614
+ """Creates new workflow template.
1622
1615
 
1623
1616
  :param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
1624
1617
  :param region: Required. The Cloud Dataproc region in which to handle the request.
@@ -1682,9 +1675,9 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1682
1675
 
1683
1676
 
1684
1677
  class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1685
- """
1686
- Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait
1687
- until the WorkflowTemplate is finished executing.
1678
+ """Instantiate a WorkflowTemplate on Google Cloud Dataproc.
1679
+
1680
+ The operator will wait until the WorkflowTemplate is finished executing.
1688
1681
 
1689
1682
  .. seealso::
1690
1683
  Please refer to:
@@ -1739,7 +1732,7 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1739
1732
  metadata: Sequence[tuple[str, str]] = (),
1740
1733
  gcp_conn_id: str = "google_cloud_default",
1741
1734
  impersonation_chain: str | Sequence[str] | None = None,
1742
- deferrable: bool = False,
1735
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1743
1736
  polling_interval_seconds: int = 10,
1744
1737
  **kwargs,
1745
1738
  ) -> None:
@@ -1796,10 +1789,10 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1796
1789
  )
1797
1790
 
1798
1791
  def execute_complete(self, context, event=None) -> None:
1799
- """
1800
- Callback for when the trigger fires - returns immediately.
1801
- Relies on trigger to throw an exception, otherwise it assumes execution was
1802
- successful.
1792
+ """Callback for when the trigger fires.
1793
+
1794
+ This returns immediately. It relies on trigger to throw an exception,
1795
+ otherwise it assumes execution was successful.
1803
1796
  """
1804
1797
  if event["status"] == "failed" or event["status"] == "error":
1805
1798
  self.log.exception("Unexpected error in the operation.")
@@ -1809,9 +1802,9 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1809
1802
 
1810
1803
 
1811
1804
  class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator):
1812
- """
1813
- Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will
1814
- wait until the WorkflowTemplate is finished executing.
1805
+ """Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.
1806
+
1807
+ The operator will wait until the WorkflowTemplate is finished executing.
1815
1808
 
1816
1809
  .. seealso::
1817
1810
  For more information on how to use this operator, take a look at the guide:
@@ -1867,7 +1860,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1867
1860
  metadata: Sequence[tuple[str, str]] = (),
1868
1861
  gcp_conn_id: str = "google_cloud_default",
1869
1862
  impersonation_chain: str | Sequence[str] | None = None,
1870
- deferrable: bool = False,
1863
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1871
1864
  polling_interval_seconds: int = 10,
1872
1865
  **kwargs,
1873
1866
  ) -> None:
@@ -1921,10 +1914,10 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1921
1914
  )
1922
1915
 
1923
1916
  def execute_complete(self, context, event=None) -> None:
1924
- """
1925
- Callback for when the trigger fires - returns immediately.
1926
- Relies on trigger to throw an exception, otherwise it assumes execution was
1927
- successful.
1917
+ """Callback for when the trigger fires.
1918
+
1919
+ This returns immediately. It relies on trigger to throw an exception,
1920
+ otherwise it assumes execution was successful.
1928
1921
  """
1929
1922
  if event["status"] == "failed" or event["status"] == "error":
1930
1923
  self.log.exception("Unexpected error in the operation.")
@@ -1934,8 +1927,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1934
1927
 
1935
1928
 
1936
1929
  class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1937
- """
1938
- Submits a job to a cluster.
1930
+ """Submit a job to a cluster.
1939
1931
 
1940
1932
  :param project_id: Optional. The ID of the Google Cloud project that the job belongs to.
1941
1933
  :param region: Required. The Cloud Dataproc region in which to handle the request.
@@ -1988,7 +1980,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1988
1980
  gcp_conn_id: str = "google_cloud_default",
1989
1981
  impersonation_chain: str | Sequence[str] | None = None,
1990
1982
  asynchronous: bool = False,
1991
- deferrable: bool = False,
1983
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1992
1984
  polling_interval_seconds: int = 10,
1993
1985
  cancel_on_kill: bool = True,
1994
1986
  wait_timeout: int | None = None,
@@ -2063,10 +2055,10 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2063
2055
  return self.job_id
2064
2056
 
2065
2057
  def execute_complete(self, context, event=None) -> None:
2066
- """
2067
- Callback for when the trigger fires - returns immediately.
2068
- Relies on trigger to throw an exception, otherwise it assumes execution was
2069
- successful.
2058
+ """Callback for when the trigger fires.
2059
+
2060
+ This returns immediately. It relies on trigger to throw an exception,
2061
+ otherwise it assumes execution was successful.
2070
2062
  """
2071
2063
  job_state = event["job_state"]
2072
2064
  job_id = event["job_id"]
@@ -2083,8 +2075,7 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2083
2075
 
2084
2076
 
2085
2077
  class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2086
- """
2087
- Updates a cluster in a project.
2078
+ """Update a cluster in a project.
2088
2079
 
2089
2080
  :param region: Required. The Cloud Dataproc region in which to handle the request.
2090
2081
  :param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
@@ -2149,7 +2140,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2149
2140
  metadata: Sequence[tuple[str, str]] = (),
2150
2141
  gcp_conn_id: str = "google_cloud_default",
2151
2142
  impersonation_chain: str | Sequence[str] | None = None,
2152
- deferrable: bool = False,
2143
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
2153
2144
  polling_interval_seconds: int = 10,
2154
2145
  **kwargs,
2155
2146
  ):
@@ -2222,8 +2213,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2222
2213
 
2223
2214
 
2224
2215
  class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2225
- """
2226
- Creates a batch workload.
2216
+ """Create a batch workload.
2227
2217
 
2228
2218
  :param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to. (templated)
2229
2219
  :param region: Required. The Cloud Dataproc region in which to handle the request. (templated)
@@ -2281,7 +2271,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2281
2271
  impersonation_chain: str | Sequence[str] | None = None,
2282
2272
  result_retry: Retry | _MethodDefault = DEFAULT,
2283
2273
  asynchronous: bool = False,
2284
- deferrable: bool = False,
2274
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
2285
2275
  polling_interval_seconds: int = 5,
2286
2276
  **kwargs,
2287
2277
  ):
@@ -2342,6 +2332,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2342
2332
  return self.operation.operation.name
2343
2333
 
2344
2334
  else:
2335
+ # processing ends in execute_complete
2345
2336
  self.defer(
2346
2337
  trigger=DataprocBatchTrigger(
2347
2338
  batch_id=self.batch_id,
@@ -2359,62 +2350,79 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2359
2350
  # This is only likely to happen if batch_id was provided
2360
2351
  # Could be running if Airflow was restarted after task started
2361
2352
  # poll until a final state is reached
2362
- if self.batch_id:
2363
- self.log.info("Attaching to the job (%s) if it is still running.", self.batch_id)
2364
- result = hook.wait_for_batch(
2365
- batch_id=self.batch_id,
2366
- region=self.region,
2367
- project_id=self.project_id,
2368
- retry=self.retry,
2369
- timeout=self.timeout,
2370
- metadata=self.metadata,
2371
- wait_check_interval=self.polling_interval_seconds,
2353
+
2354
+ self.log.info("Attaching to the job %s if it is still running.", self.batch_id)
2355
+
2356
+ # deferrable handling of a batch_id that already exists - processing ends in execute_complete
2357
+ if self.deferrable:
2358
+ self.defer(
2359
+ trigger=DataprocBatchTrigger(
2360
+ batch_id=self.batch_id,
2361
+ project_id=self.project_id,
2362
+ region=self.region,
2363
+ gcp_conn_id=self.gcp_conn_id,
2364
+ impersonation_chain=self.impersonation_chain,
2365
+ polling_interval_seconds=self.polling_interval_seconds,
2366
+ ),
2367
+ method_name="execute_complete",
2372
2368
  )
2373
- # It is possible we don't have a result in the case where batch_id was not provide, one was generated
2374
- # by chance, AlreadyExists was caught, but we can't reattach because we don't have the generated id
2375
- if result is None:
2376
- raise AirflowException("The job could not be reattached because the id was generated.")
2377
2369
 
2378
- # The existing batch may be a number of states other than 'SUCCEEDED'\
2379
- # wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
2380
- # finds a cancelling|canceled|unspecified job from wait_for_batch
2370
+ # non-deferrable handling of a batch_id that already exists
2371
+ result = hook.wait_for_batch(
2372
+ batch_id=self.batch_id,
2373
+ region=self.region,
2374
+ project_id=self.project_id,
2375
+ retry=self.retry,
2376
+ timeout=self.timeout,
2377
+ metadata=self.metadata,
2378
+ wait_check_interval=self.polling_interval_seconds,
2379
+ )
2381
2380
  batch_id = self.batch_id or result.name.split("/")[-1]
2382
- link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, resource=batch_id)
2383
- if result.state == Batch.State.FAILED:
2384
- raise AirflowException(f"Batch job {batch_id} failed. Driver Logs: {link}")
2385
- if result.state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
2386
- raise AirflowException(f"Batch job {batch_id} was cancelled. Driver logs: {link}")
2387
- if result.state == Batch.State.STATE_UNSPECIFIED:
2388
- raise AirflowException(f"Batch job {batch_id} unspecified. Driver logs: {link}")
2389
- self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
2390
- DataprocLink.persist(context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id)
2381
+ self.handle_batch_status(context, result.state, batch_id)
2391
2382
  return Batch.to_dict(result)
2392
2383
 
2393
2384
  def execute_complete(self, context, event=None) -> None:
2394
- """
2395
- Callback for when the trigger fires - returns immediately.
2396
- Relies on trigger to throw an exception, otherwise it assumes execution was
2397
- successful.
2385
+ """Callback for when the trigger fires.
2386
+
2387
+ This returns immediately. It relies on trigger to throw an exception,
2388
+ otherwise it assumes execution was successful.
2398
2389
  """
2399
2390
  if event is None:
2400
2391
  raise AirflowException("Batch failed.")
2401
- batch_state = event["batch_state"]
2392
+ state = event["batch_state"]
2402
2393
  batch_id = event["batch_id"]
2403
-
2404
- if batch_state == Batch.State.FAILED:
2405
- raise AirflowException(f"Batch failed:\n{batch_id}")
2406
- if batch_state == Batch.State.CANCELLED:
2407
- raise AirflowException(f"Batch was cancelled:\n{batch_id}")
2408
- self.log.info("%s completed successfully.", self.task_id)
2394
+ self.handle_batch_status(context, state, batch_id)
2409
2395
 
2410
2396
  def on_kill(self):
2411
2397
  if self.operation:
2412
2398
  self.operation.cancel()
2413
2399
 
2400
+ def handle_batch_status(self, context: Context, state: Batch.State, batch_id: str) -> None:
2401
+ # The existing batch may be a number of states other than 'SUCCEEDED'\
2402
+ # wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
2403
+ # finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
2404
+ link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, resource=batch_id)
2405
+ if state == Batch.State.FAILED:
2406
+ DataprocLink.persist(
2407
+ context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
2408
+ )
2409
+ raise AirflowException("Batch job %s failed. Driver Logs: %s", batch_id, link)
2410
+ if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
2411
+ DataprocLink.persist(
2412
+ context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
2413
+ )
2414
+ raise AirflowException("Batch job %s was cancelled. Driver logs: %s", batch_id, link)
2415
+ if state == Batch.State.STATE_UNSPECIFIED:
2416
+ DataprocLink.persist(
2417
+ context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id
2418
+ )
2419
+ raise AirflowException("Batch job %s unspecified. Driver logs: %s", batch_id, link)
2420
+ self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
2421
+ DataprocLink.persist(context=context, task_instance=self, url=DATAPROC_BATCH_LINK, resource=batch_id)
2422
+
2414
2423
 
2415
2424
  class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2416
- """
2417
- Deletes the batch workload resource.
2425
+ """Delete the batch workload resource.
2418
2426
 
2419
2427
  :param batch_id: Required. The ID to use for the batch, which will become the final component
2420
2428
  of the batch's resource name.
@@ -2477,8 +2485,7 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2477
2485
 
2478
2486
 
2479
2487
  class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2480
- """
2481
- Gets the batch workload resource representation.
2488
+ """Get the batch workload resource representation.
2482
2489
 
2483
2490
  :param batch_id: Required. The ID to use for the batch, which will become the final component
2484
2491
  of the batch's resource name.
@@ -2545,8 +2552,7 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2545
2552
 
2546
2553
 
2547
2554
  class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2548
- """
2549
- Lists batch workloads.
2555
+ """List batch workloads.
2550
2556
 
2551
2557
  :param region: Required. The Cloud Dataproc region in which to handle the request.
2552
2558
  :param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to.
@@ -2568,7 +2574,6 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2568
2574
  If set as a sequence, the identities from the list must grant
2569
2575
  Service Account Token Creator IAM role to the directly preceding identity, with first
2570
2576
  account from the list granting this role to the originating account (templated).
2571
-
2572
2577
  """
2573
2578
 
2574
2579
  template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
@@ -2615,8 +2620,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2615
2620
 
2616
2621
 
2617
2622
  class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
2618
- """
2619
- Cancel the batch workload resource.
2623
+ """Cancel the batch workload resource.
2620
2624
 
2621
2625
  :param operation_name: Required. The name of the operation resource to be cancelled.
2622
2626
  :param region: Required. The Cloud Dataproc region in which to handle the request.
@@ -145,8 +145,7 @@ class DataprocMetastoreDetailedLink(BaseOperatorLink):
145
145
 
146
146
 
147
147
  class DataprocMetastoreCreateBackupOperator(GoogleCloudBaseOperator):
148
- """
149
- Creates a new backup in a given project and location.
148
+ """Create a new backup in a given project and location.
150
149
 
151
150
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
152
151
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
@@ -260,8 +259,7 @@ class DataprocMetastoreCreateBackupOperator(GoogleCloudBaseOperator):
260
259
 
261
260
 
262
261
  class DataprocMetastoreCreateMetadataImportOperator(GoogleCloudBaseOperator):
263
- """
264
- Creates a new MetadataImport in a given project and location.
262
+ """Create a new MetadataImport in a given project and location.
265
263
 
266
264
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
267
265
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
@@ -361,8 +359,7 @@ class DataprocMetastoreCreateMetadataImportOperator(GoogleCloudBaseOperator):
361
359
 
362
360
 
363
361
  class DataprocMetastoreCreateServiceOperator(GoogleCloudBaseOperator):
364
- """
365
- Creates a metastore service in a project and location.
362
+ """Create a metastore service in a project and location.
366
363
 
367
364
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
368
365
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
@@ -463,8 +460,7 @@ class DataprocMetastoreCreateServiceOperator(GoogleCloudBaseOperator):
463
460
 
464
461
 
465
462
  class DataprocMetastoreDeleteBackupOperator(GoogleCloudBaseOperator):
466
- """
467
- Deletes a single backup.
463
+ """Delete a single backup.
468
464
 
469
465
  :param project_id: Required. The ID of the Google Cloud project that the backup belongs to.
470
466
  :param region: Required. The ID of the Google Cloud region that the backup belongs to.
@@ -548,8 +544,7 @@ class DataprocMetastoreDeleteBackupOperator(GoogleCloudBaseOperator):
548
544
 
549
545
 
550
546
  class DataprocMetastoreDeleteServiceOperator(GoogleCloudBaseOperator):
551
- """
552
- Deletes a single service.
547
+ """Delete a single service.
553
548
 
554
549
  :param request: The request object. Request message for
555
550
  [DataprocMetastore.DeleteService][google.cloud.metastore.v1.DataprocMetastore.DeleteService].
@@ -606,8 +601,7 @@ class DataprocMetastoreDeleteServiceOperator(GoogleCloudBaseOperator):
606
601
 
607
602
 
608
603
  class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
609
- """
610
- Exports metadata from a service.
604
+ """Export metadata from a service.
611
605
 
612
606
  :param destination_gcs_folder: A Cloud Storage URI of a folder, in the format
613
607
  ``gs://<bucket_name>/<path_inside_bucket>``. A sub-folder
@@ -699,9 +693,10 @@ class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
699
693
  return destination_uri[5:] if destination_uri.startswith("gs://") else destination_uri
700
694
 
701
695
  def _wait_for_export_metadata(self, hook: DataprocMetastoreHook):
702
- """
703
- Workaround to check that export was created successfully.
704
- We discovered a issue to parse result to MetadataExport inside the SDK.
696
+ """Check that export was created successfully.
697
+
698
+ This is a workaround to an issue parsing result to MetadataExport inside
699
+ the SDK.
705
700
  """
706
701
  for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
707
702
  sleep(time_to_wait)
@@ -724,8 +719,7 @@ class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator):
724
719
 
725
720
 
726
721
  class DataprocMetastoreGetServiceOperator(GoogleCloudBaseOperator):
727
- """
728
- Gets the details of a single service.
722
+ """Get the details of a single service.
729
723
 
730
724
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
731
725
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
@@ -797,8 +791,7 @@ class DataprocMetastoreGetServiceOperator(GoogleCloudBaseOperator):
797
791
 
798
792
 
799
793
  class DataprocMetastoreListBackupsOperator(GoogleCloudBaseOperator):
800
- """
801
- Lists backups in a service.
794
+ """List backups in a service.
802
795
 
803
796
  :param project_id: Required. The ID of the Google Cloud project that the backup belongs to.
804
797
  :param region: Required. The ID of the Google Cloud region that the backup belongs to.
@@ -882,8 +875,7 @@ class DataprocMetastoreListBackupsOperator(GoogleCloudBaseOperator):
882
875
 
883
876
 
884
877
  class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
885
- """
886
- Restores a service from a backup.
878
+ """Restore a service from a backup.
887
879
 
888
880
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
889
881
  :param region: Required. The ID of the Google Cloud region that the service belongs to.
@@ -987,9 +979,10 @@ class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
987
979
  DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
988
980
 
989
981
  def _wait_for_restore_service(self, hook: DataprocMetastoreHook):
990
- """
991
- Workaround to check that restore service was finished successfully.
992
- We discovered an issue to parse result to Restore inside the SDK.
982
+ """Check that export was created successfully.
983
+
984
+ This is a workaround to an issue parsing result to MetadataExport inside
985
+ the SDK.
993
986
  """
994
987
  for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
995
988
  sleep(time_to_wait)
@@ -1010,8 +1003,7 @@ class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator):
1010
1003
 
1011
1004
 
1012
1005
  class DataprocMetastoreUpdateServiceOperator(GoogleCloudBaseOperator):
1013
- """
1014
- Updates the parameters of a single service.
1006
+ """Update the parameters of a single service.
1015
1007
 
1016
1008
  :param project_id: Required. The ID of the Google Cloud project that the service belongs to.
1017
1009
  :param region: Required. The ID of the Google Cloud region that the service belongs to.