apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +38 -39
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
  4. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
  5. airflow/providers/google/cloud/hooks/bigquery.py +328 -318
  6. airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
  7. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
  8. airflow/providers/google/cloud/hooks/dataflow.py +11 -15
  9. airflow/providers/google/cloud/hooks/dataform.py +3 -3
  10. airflow/providers/google/cloud/hooks/dataproc.py +577 -573
  11. airflow/providers/google/cloud/hooks/functions.py +60 -76
  12. airflow/providers/google/cloud/hooks/gcs.py +108 -18
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
  14. airflow/providers/google/cloud/links/datafusion.py +4 -3
  15. airflow/providers/google/cloud/operators/bigquery.py +201 -191
  16. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  17. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  18. airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
  19. airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
  20. airflow/providers/google/cloud/operators/dataflow.py +6 -4
  21. airflow/providers/google/cloud/operators/dataform.py +3 -2
  22. airflow/providers/google/cloud/operators/dataproc.py +127 -123
  23. airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
  24. airflow/providers/google/cloud/operators/gcs.py +35 -13
  25. airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
  26. airflow/providers/google/cloud/operators/mlengine.py +2 -6
  27. airflow/providers/google/cloud/operators/vision.py +47 -56
  28. airflow/providers/google/cloud/sensors/bigquery.py +3 -2
  29. airflow/providers/google/cloud/sensors/gcs.py +5 -7
  30. airflow/providers/google/cloud/sensors/pubsub.py +2 -2
  31. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
  32. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  33. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
  34. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
  35. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
  36. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
  37. airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
  38. airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
  39. airflow/providers/google/cloud/utils/bigquery.py +17 -0
  40. airflow/providers/google/get_provider_info.py +7 -2
  41. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
  42. airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
  43. apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
  44. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
  45. apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
  46. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
  47. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
  48. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
  49. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
  50. {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -78,8 +78,7 @@ class DataProcJobBuilder:
78
78
  self.job["job"][job_type]["properties"] = properties
79
79
 
80
80
  def add_labels(self, labels: dict | None = None) -> None:
81
- """
82
- Set labels for Dataproc job.
81
+ """Set labels for Dataproc job.
83
82
 
84
83
  :param labels: Labels for the job query.
85
84
  """
@@ -87,8 +86,7 @@ class DataProcJobBuilder:
87
86
  self.job["job"]["labels"].update(labels)
88
87
 
89
88
  def add_variables(self, variables: dict | None = None) -> None:
90
- """
91
- Set variables for Dataproc job.
89
+ """Set variables for Dataproc job.
92
90
 
93
91
  :param variables: Variables for the job query.
94
92
  """
@@ -96,8 +94,7 @@ class DataProcJobBuilder:
96
94
  self.job["job"][self.job_type]["script_variables"] = variables
97
95
 
98
96
  def add_args(self, args: list[str] | None = None) -> None:
99
- """
100
- Set args for Dataproc job.
97
+ """Set args for Dataproc job.
101
98
 
102
99
  :param args: Args for the job query.
103
100
  """
@@ -105,24 +102,21 @@ class DataProcJobBuilder:
105
102
  self.job["job"][self.job_type]["args"] = args
106
103
 
107
104
  def add_query(self, query: str) -> None:
108
- """
109
- Set query for Dataproc job.
105
+ """Set query for Dataproc job.
110
106
 
111
107
  :param query: query for the job.
112
108
  """
113
109
  self.job["job"][self.job_type]["query_list"] = {"queries": [query]}
114
110
 
115
111
  def add_query_uri(self, query_uri: str) -> None:
116
- """
117
- Set query uri for Dataproc job.
112
+ """Set query uri for Dataproc job.
118
113
 
119
114
  :param query_uri: URI for the job query.
120
115
  """
121
116
  self.job["job"][self.job_type]["query_file_uri"] = query_uri
122
117
 
123
118
  def add_jar_file_uris(self, jars: list[str] | None = None) -> None:
124
- """
125
- Set jars uris for Dataproc job.
119
+ """Set jars uris for Dataproc job.
126
120
 
127
121
  :param jars: List of jars URIs
128
122
  """
@@ -130,8 +124,7 @@ class DataProcJobBuilder:
130
124
  self.job["job"][self.job_type]["jar_file_uris"] = jars
131
125
 
132
126
  def add_archive_uris(self, archives: list[str] | None = None) -> None:
133
- """
134
- Set archives uris for Dataproc job.
127
+ """Set archives uris for Dataproc job.
135
128
 
136
129
  :param archives: List of archives URIs
137
130
  """
@@ -139,8 +132,7 @@ class DataProcJobBuilder:
139
132
  self.job["job"][self.job_type]["archive_uris"] = archives
140
133
 
141
134
  def add_file_uris(self, files: list[str] | None = None) -> None:
142
- """
143
- Set file uris for Dataproc job.
135
+ """Set file uris for Dataproc job.
144
136
 
145
137
  :param files: List of files URIs
146
138
  """
@@ -148,8 +140,7 @@ class DataProcJobBuilder:
148
140
  self.job["job"][self.job_type]["file_uris"] = files
149
141
 
150
142
  def add_python_file_uris(self, pyfiles: list[str] | None = None) -> None:
151
- """
152
- Set python file uris for Dataproc job.
143
+ """Set python file uris for Dataproc job.
153
144
 
154
145
  :param pyfiles: List of python files URIs
155
146
  """
@@ -157,8 +148,7 @@ class DataProcJobBuilder:
157
148
  self.job["job"][self.job_type]["python_file_uris"] = pyfiles
158
149
 
159
150
  def set_main(self, main_jar: str | None = None, main_class: str | None = None) -> None:
160
- """
161
- Set Dataproc main class.
151
+ """Set Dataproc main class.
162
152
 
163
153
  :param main_jar: URI for the main file.
164
154
  :param main_class: Name of the main class.
@@ -172,16 +162,16 @@ class DataProcJobBuilder:
172
162
  self.job["job"][self.job_type]["main_class"] = main_class
173
163
 
174
164
  def set_python_main(self, main: str) -> None:
175
- """
176
- Set Dataproc main python file uri.
165
+ """Set Dataproc main python file uri.
177
166
 
178
167
  :param main: URI for the python main file.
179
168
  """
180
169
  self.job["job"][self.job_type]["main_python_file_uri"] = main
181
170
 
182
171
  def set_job_name(self, name: str) -> None:
183
- """
184
- Set Dataproc job name. Job name is sanitized, replacing dots by underscores.
172
+ """Set Dataproc job name.
173
+
174
+ Job name is sanitized, replacing dots by underscores.
185
175
 
186
176
  :param name: Job name.
187
177
  """
@@ -189,8 +179,7 @@ class DataProcJobBuilder:
189
179
  self.job["job"]["reference"]["job_id"] = sanitized_name
190
180
 
191
181
  def build(self) -> dict:
192
- """
193
- Returns Dataproc job.
182
+ """Return Dataproc job.
194
183
 
195
184
  :return: Dataproc job
196
185
  """
@@ -198,8 +187,7 @@ class DataProcJobBuilder:
198
187
 
199
188
 
200
189
  class DataprocHook(GoogleBaseHook):
201
- """
202
- Hook for Google Cloud Dataproc APIs.
190
+ """Google Cloud Dataproc APIs.
203
191
 
204
192
  All the methods in the hook where project_id is used must be called with
205
193
  keyword arguments rather than positional.
@@ -219,7 +207,7 @@ class DataprocHook(GoogleBaseHook):
219
207
  super().__init__(gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain)
220
208
 
221
209
  def get_cluster_client(self, region: str | None = None) -> ClusterControllerClient:
222
- """Returns ClusterControllerClient."""
210
+ """Create a ClusterControllerClient."""
223
211
  client_options = None
224
212
  if region and region != "global":
225
213
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -229,7 +217,7 @@ class DataprocHook(GoogleBaseHook):
229
217
  )
230
218
 
231
219
  def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceClient:
232
- """Returns WorkflowTemplateServiceClient."""
220
+ """Create a WorkflowTemplateServiceClient."""
233
221
  client_options = None
234
222
  if region and region != "global":
235
223
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -239,7 +227,7 @@ class DataprocHook(GoogleBaseHook):
239
227
  )
240
228
 
241
229
  def get_job_client(self, region: str | None = None) -> JobControllerClient:
242
- """Returns JobControllerClient."""
230
+ """Create a JobControllerClient."""
243
231
  client_options = None
244
232
  if region and region != "global":
245
233
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -249,7 +237,7 @@ class DataprocHook(GoogleBaseHook):
249
237
  )
250
238
 
251
239
  def get_batch_client(self, region: str | None = None) -> BatchControllerClient:
252
- """Returns BatchControllerClient."""
240
+ """Create a BatchControllerClient."""
253
241
  client_options = None
254
242
  if region and region != "global":
255
243
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -258,8 +246,8 @@ class DataprocHook(GoogleBaseHook):
258
246
  credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
259
247
  )
260
248
 
261
- def get_operations_client(self, region):
262
- """Returns OperationsClient."""
249
+ def get_operations_client(self, region: str | None):
250
+ """Create a OperationsClient."""
263
251
  return self.get_batch_client(region=region).transport.operations_client
264
252
 
265
253
  def wait_for_operation(
@@ -267,8 +255,8 @@ class DataprocHook(GoogleBaseHook):
267
255
  operation: Operation,
268
256
  timeout: float | None = None,
269
257
  result_retry: Retry | _MethodDefault = DEFAULT,
270
- ):
271
- """Waits for long-lasting operation to complete."""
258
+ ) -> Any:
259
+ """Wait for a long-lasting operation to complete."""
272
260
  try:
273
261
  return operation.result(timeout=timeout, retry=result_retry)
274
262
  except Exception:
@@ -288,28 +276,30 @@ class DataprocHook(GoogleBaseHook):
288
276
  retry: Retry | _MethodDefault = DEFAULT,
289
277
  timeout: float | None = None,
290
278
  metadata: Sequence[tuple[str, str]] = (),
291
- ):
292
- """
293
- Creates a cluster in a project.
294
-
295
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
296
- :param region: Required. The Cloud Dataproc region in which to handle the request.
297
- :param cluster_name: Name of the cluster to create
298
- :param labels: Labels that will be assigned to created cluster
299
- :param cluster_config: Required. The cluster config to create.
300
- If a dict is provided, it must be of the same form as the protobuf message
301
- :class:`~google.cloud.dataproc_v1.types.ClusterConfig`
302
- :param virtual_cluster_config: Optional. The virtual cluster config, used when creating a Dataproc
303
- cluster that does not directly control the underlying compute resources, for example, when
304
- creating a `Dataproc-on-GKE cluster`
305
- :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`
306
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
307
- ``CreateClusterRequest`` requests with the same id, then the second request will be ignored and
308
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
309
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
310
- retried.
311
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
312
- ``retry`` is specified, the timeout applies to each individual attempt.
279
+ ) -> Operation:
280
+ """Create a cluster in a specified project.
281
+
282
+ :param project_id: Google Cloud project ID that the cluster belongs to.
283
+ :param region: Cloud Dataproc region in which to handle the request.
284
+ :param cluster_name: Name of the cluster to create.
285
+ :param labels: Labels that will be assigned to created cluster.
286
+ :param cluster_config: The cluster config to create. If a dict is
287
+ provided, it must be of the same form as the protobuf message
288
+ :class:`~google.cloud.dataproc_v1.types.ClusterConfig`.
289
+ :param virtual_cluster_config: The virtual cluster config, used when
290
+ creating a Dataproc cluster that does not directly control the
291
+ underlying compute resources, for example, when creating a
292
+ Dataproc-on-GKE cluster with
293
+ :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`.
294
+ :param request_id: A unique id used to identify the request. If the
295
+ server receives two *CreateClusterRequest* requests with the same
296
+ ID, the second request will be ignored, and an operation created
297
+ for the first one and stored in the backend is returned.
298
+ :param retry: A retry object used to retry requests. If *None*, requests
299
+ will not be retried.
300
+ :param timeout: The amount of time, in seconds, to wait for the request
301
+ to complete. If *retry* is specified, the timeout applies to each
302
+ individual attempt.
313
303
  :param metadata: Additional metadata that is provided to the method.
314
304
  """
315
305
  # Dataproc labels must conform to the following regex:
@@ -353,22 +343,23 @@ class DataprocHook(GoogleBaseHook):
353
343
  retry: Retry | _MethodDefault = DEFAULT,
354
344
  timeout: float | None = None,
355
345
  metadata: Sequence[tuple[str, str]] = (),
356
- ):
357
- """
358
- Deletes a cluster in a project.
359
-
360
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
361
- :param region: Required. The Cloud Dataproc region in which to handle the request.
362
- :param cluster_name: Required. The cluster name.
363
- :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail
364
- if cluster with specified UUID does not exist.
365
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
366
- ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and
367
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
368
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
369
- retried.
370
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
371
- ``retry`` is specified, the timeout applies to each individual attempt.
346
+ ) -> Operation:
347
+ """Delete a cluster in a project.
348
+
349
+ :param project_id: Google Cloud project ID that the cluster belongs to.
350
+ :param region: Cloud Dataproc region in which to handle the request.
351
+ :param cluster_name: Name of the cluster to delete.
352
+ :param cluster_uuid: If specified, the RPC should fail if cluster with
353
+ the UUID does not exist.
354
+ :param request_id: A unique id used to identify the request. If the
355
+ server receives two *DeleteClusterRequest* requests with the same
356
+ ID, the second request will be ignored, and an operation created
357
+ for the first one and stored in the backend is returned.
358
+ :param retry: A retry object used to retry requests. If *None*, requests
359
+ will not be retried.
360
+ :param timeout: The amount of time, in seconds, to wait for the request
361
+ to complete. If *retry* is specified, the timeout applies to each
362
+ individual attempt.
372
363
  :param metadata: Additional metadata that is provided to the method.
373
364
  """
374
365
  client = self.get_cluster_client(region=region)
@@ -395,18 +386,19 @@ class DataprocHook(GoogleBaseHook):
395
386
  retry: Retry | _MethodDefault = DEFAULT,
396
387
  timeout: float | None = None,
397
388
  metadata: Sequence[tuple[str, str]] = (),
398
- ):
399
- """
400
- Gets cluster diagnostic information. After the operation completes GCS uri to
401
- diagnose is returned.
402
-
403
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
404
- :param region: Required. The Cloud Dataproc region in which to handle the request.
405
- :param cluster_name: Required. The cluster name.
406
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
407
- retried.
408
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
409
- ``retry`` is specified, the timeout applies to each individual attempt.
389
+ ) -> str:
390
+ """Get cluster diagnostic information.
391
+
392
+ After the operation completes, the GCS URI to diagnose is returned.
393
+
394
+ :param project_id: Google Cloud project ID that the cluster belongs to.
395
+ :param region: Cloud Dataproc region in which to handle the request.
396
+ :param cluster_name: Name of the cluster.
397
+ :param retry: A retry object used to retry requests. If *None*, requests
398
+ will not be retried.
399
+ :param timeout: The amount of time, in seconds, to wait for the request
400
+ to complete. If *retry* is specified, the timeout applies to each
401
+ individual attempt.
410
402
  :param metadata: Additional metadata that is provided to the method.
411
403
  """
412
404
  client = self.get_cluster_client(region=region)
@@ -429,17 +421,17 @@ class DataprocHook(GoogleBaseHook):
429
421
  retry: Retry | _MethodDefault = DEFAULT,
430
422
  timeout: float | None = None,
431
423
  metadata: Sequence[tuple[str, str]] = (),
432
- ):
433
- """
434
- Gets the resource representation for a cluster in a project.
435
-
436
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
437
- :param region: Required. The Cloud Dataproc region in which to handle the request.
438
- :param cluster_name: Required. The cluster name.
439
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
440
- retried.
441
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
442
- ``retry`` is specified, the timeout applies to each individual attempt.
424
+ ) -> Cluster:
425
+ """Get the resource representation for a cluster in a project.
426
+
427
+ :param project_id: Google Cloud project ID that the cluster belongs to.
428
+ :param region: Cloud Dataproc region to handle the request.
429
+ :param cluster_name: The cluster name.
430
+ :param retry: A retry object used to retry requests. If *None*, requests
431
+ will not be retried.
432
+ :param timeout: The amount of time, in seconds, to wait for the request
433
+ to complete. If *retry* is specified, the timeout applies to each
434
+ individual attempt.
443
435
  :param metadata: Additional metadata that is provided to the method.
444
436
  """
445
437
  client = self.get_cluster_client(region=region)
@@ -462,19 +454,21 @@ class DataprocHook(GoogleBaseHook):
462
454
  timeout: float | None = None,
463
455
  metadata: Sequence[tuple[str, str]] = (),
464
456
  ):
465
- """
466
- Lists all regions/{region}/clusters in a project.
467
-
468
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
469
- :param region: Required. The Cloud Dataproc region in which to handle the request.
470
- :param filter_: Optional. A filter constraining the clusters to list. Filters are case-sensitive.
471
- :param page_size: The maximum number of resources contained in the underlying API response. If page
472
- streaming is performed per- resource, this parameter does not affect the return value. If page
473
- streaming is performed per-page, this determines the maximum number of resources in a page.
474
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
475
- retried.
476
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
477
- ``retry`` is specified, the timeout applies to each individual attempt.
457
+ """List all regions/{region}/clusters in a project.
458
+
459
+ :param project_id: Google Cloud project ID that the cluster belongs to.
460
+ :param region: Cloud Dataproc region to handle the request.
461
+ :param filter_: To constrain the clusters to. Case-sensitive.
462
+ :param page_size: The maximum number of resources contained in the
463
+ underlying API response. If page streaming is performed
464
+ per-resource, this parameter does not affect the return value. If
465
+ page streaming is performed per-page, this determines the maximum
466
+ number of resources in a page.
467
+ :param retry: A retry object used to retry requests. If *None*, requests
468
+ will not be retried.
469
+ :param timeout: The amount of time, in seconds, to wait for the request
470
+ to complete. If *retry* is specified, the timeout applies to each
471
+ individual attempt.
478
472
  :param metadata: Additional metadata that is provided to the method.
479
473
  """
480
474
  client = self.get_cluster_client(region=region)
@@ -499,53 +493,56 @@ class DataprocHook(GoogleBaseHook):
499
493
  retry: Retry | _MethodDefault = DEFAULT,
500
494
  timeout: float | None = None,
501
495
  metadata: Sequence[tuple[str, str]] = (),
502
- ):
503
- """
504
- Updates a cluster in a project.
505
-
506
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
507
- :param region: Required. The Cloud Dataproc region in which to handle the request.
508
- :param cluster_name: Required. The cluster name.
509
- :param cluster: Required. The changes to the cluster.
510
-
511
- If a dict is provided, it must be of the same form as the protobuf message
512
- :class:`~google.cloud.dataproc_v1.types.Cluster`
513
- :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For
514
- example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be
515
- specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify
516
- the new value, as follows:
517
-
518
- ::
519
-
520
- { "config":{ "workerConfig":{ "numInstances":"5" } } }
521
-
522
- Similarly, to change the number of preemptible workers in a cluster to 5, the ``update_mask``
523
- parameter would be ``config.secondary_worker_config.num_instances``, and the ``PATCH`` request
524
- body would be set as follows:
525
-
526
- ::
527
-
528
- { "config":{ "secondaryWorkerConfig":{ "numInstances":"5" } } }
529
-
530
- If a dict is provided, it must be of the same form as the protobuf message
531
- :class:`~google.cloud.dataproc_v1.types.FieldMask`
532
- :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful
533
- decommissioning allows removing nodes from the cluster without interrupting jobs in progress.
534
- Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes
535
- (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the
536
- maximum allowed timeout is 1 day.
496
+ ) -> Operation:
497
+ """Update a cluster in a project.
498
+
499
+ :param project_id: Google Cloud project ID that the cluster belongs to.
500
+ :param region: Cloud Dataproc region to handle the request.
501
+ :param cluster_name: The cluster name.
502
+ :param cluster: Changes to the cluster. If a dict is provided, it must
503
+ be of the same form as the protobuf message
504
+ :class:`~google.cloud.dataproc_v1.types.Cluster`.
505
+ :param update_mask: Specifies the path, relative to ``Cluster``, of the
506
+ field to update. For example, to change the number of workers in a
507
+ cluster to 5, this would be specified as
508
+ ``config.worker_config.num_instances``, and the ``PATCH`` request
509
+ body would specify the new value:
510
+
511
+ .. code-block:: python
512
+
513
+ {"config": {"workerConfig": {"numInstances": "5"}}}
514
+
515
+ Similarly, to change the number of preemptible workers in a cluster
516
+ to 5, this would be ``config.secondary_worker_config.num_instances``
517
+ and the ``PATCH`` request body would be:
518
+
519
+ .. code-block:: python
520
+
521
+ {"config": {"secondaryWorkerConfig": {"numInstances": "5"}}}
522
+
523
+ If a dict is provided, it must be of the same form as the protobuf
524
+ message :class:`~google.cloud.dataproc_v1.types.FieldMask`.
525
+ :param graceful_decommission_timeout: Timeout for graceful YARN
526
+ decommissioning. Graceful decommissioning allows removing nodes from
527
+ the cluster without interrupting jobs in progress. Timeout specifies
528
+ how long to wait for jobs in progress to finish before forcefully
529
+ removing nodes (and potentially interrupting jobs). Default timeout
530
+ is 0 (for forceful decommission), and the maximum allowed timeout is
531
+ one day.
537
532
 
538
533
  Only supported on Dataproc image versions 1.2 and higher.
539
534
 
540
- If a dict is provided, it must be of the same form as the protobuf message
541
- :class:`~google.cloud.dataproc_v1.types.Duration`
542
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
543
- ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and
544
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
545
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
546
- retried.
547
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
548
- ``retry`` is specified, the timeout applies to each individual attempt.
535
+ If a dict is provided, it must be of the same form as the protobuf
536
+ message :class:`~google.cloud.dataproc_v1.types.Duration`.
537
+ :param request_id: A unique id used to identify the request. If the
538
+ server receives two *UpdateClusterRequest* requests with the same
539
+ ID, the second request will be ignored, and an operation created
540
+ for the first one and stored in the backend is returned.
541
+ :param retry: A retry object used to retry requests. If *None*, requests
542
+ will not be retried.
543
+ :param timeout: The amount of time, in seconds, to wait for the request
544
+ to complete. If *retry* is specified, the timeout applies to each
545
+ individual attempt.
549
546
  :param metadata: Additional metadata that is provided to the method.
550
547
  """
551
548
  if region is None:
@@ -577,17 +574,18 @@ class DataprocHook(GoogleBaseHook):
577
574
  timeout: float | None = None,
578
575
  metadata: Sequence[tuple[str, str]] = (),
579
576
  ) -> WorkflowTemplate:
580
- """
581
- Creates new workflow template.
582
-
583
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
584
- :param region: Required. The Cloud Dataproc region in which to handle the request.
585
- :param template: The Dataproc workflow template to create. If a dict is provided,
586
- it must be of the same form as the protobuf message WorkflowTemplate.
587
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
588
- retried.
589
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
590
- ``retry`` is specified, the timeout applies to each individual attempt.
577
+ """Create a new workflow template.
578
+
579
+ :param project_id: Google Cloud project ID that the cluster belongs to.
580
+ :param region: Cloud Dataproc region to handle the request.
581
+ :param template: The Dataproc workflow template to create. If a dict is
582
+ provided, it must be of the same form as the protobuf message
583
+ WorkflowTemplate.
584
+ :param retry: A retry object used to retry requests. If *None*, requests
585
+ will not be retried.
586
+ :param timeout: The amount of time, in seconds, to wait for the request
587
+ to complete. If *retry* is specified, the timeout applies to each
588
+ individual attempt.
591
589
  :param metadata: Additional metadata that is provided to the method.
592
590
  """
593
591
  if region is None:
@@ -611,27 +609,27 @@ class DataprocHook(GoogleBaseHook):
611
609
  retry: Retry | _MethodDefault = DEFAULT,
612
610
  timeout: float | None = None,
613
611
  metadata: Sequence[tuple[str, str]] = (),
614
- ):
615
- """
616
- Instantiates a template and begins execution.
612
+ ) -> Operation:
613
+ """Instantiate a template and begins execution.
617
614
 
618
615
  :param template_name: Name of template to instantiate.
619
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
620
- :param region: Required. The Cloud Dataproc region in which to handle the request.
621
- :param version: Optional. The version of workflow template to instantiate. If specified,
622
- the workflow will be instantiated only if the current version of
623
- the workflow template has the supplied version.
624
- This option cannot be used to instantiate a previous version of
625
- workflow template.
626
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
627
- with the same tag from running. This mitigates risk of concurrent
628
- instances started due to retries.
629
- :param parameters: Optional. Map from parameter names to values that should be used for those
630
- parameters. Values may not exceed 100 characters.
631
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
632
- retried.
633
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
634
- ``retry`` is specified, the timeout applies to each individual attempt.
616
+ :param project_id: Google Cloud project ID that the cluster belongs to.
617
+ :param region: Cloud Dataproc region to handle the request.
618
+ :param version: Version of workflow template to instantiate. If
619
+ specified, the workflow will be instantiated only if the current
620
+ version of the workflow template has the supplied version. This
621
+ option cannot be used to instantiate a previous version of workflow
622
+ template.
623
+ :param request_id: A tag that prevents multiple concurrent workflow
624
+ instances with the same tag from running. This mitigates risk of
625
+ concurrent instances started due to retries.
626
+ :param parameters: Map from parameter names to values that should be
627
+ used for those parameters. Values may not exceed 100 characters.
628
+ :param retry: A retry object used to retry requests. If *None*, requests
629
+ will not be retried.
630
+ :param timeout: The amount of time, in seconds, to wait for the request
631
+ to complete. If *retry* is specified, the timeout applies to each
632
+ individual attempt.
635
633
  :param metadata: Additional metadata that is provided to the method.
636
634
  """
637
635
  if region is None:
@@ -657,21 +655,22 @@ class DataprocHook(GoogleBaseHook):
657
655
  retry: Retry | _MethodDefault = DEFAULT,
658
656
  timeout: float | None = None,
659
657
  metadata: Sequence[tuple[str, str]] = (),
660
- ):
661
- """
662
- Instantiates a template and begins execution.
663
-
664
- :param template: The workflow template to instantiate. If a dict is provided,
665
- it must be of the same form as the protobuf message WorkflowTemplate
666
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
667
- :param region: Required. The Cloud Dataproc region in which to handle the request.
668
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
669
- with the same tag from running. This mitigates risk of concurrent
670
- instances started due to retries.
671
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
672
- retried.
673
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
674
- ``retry`` is specified, the timeout applies to each individual attempt.
658
+ ) -> Operation:
659
+ """Instantiate a template and begin execution.
660
+
661
+ :param template: The workflow template to instantiate. If a dict is
662
+ provided, it must be of the same form as the protobuf message
663
+ WorkflowTemplate.
664
+ :param project_id: Google Cloud project ID that the cluster belongs to.
665
+ :param region: Cloud Dataproc region to handle the request.
666
+ :param request_id: A tag that prevents multiple concurrent workflow
667
+ instances with the same tag from running. This mitigates risk of
668
+ concurrent instances started due to retries.
669
+ :param retry: A retry object used to retry requests. If *None*, requests
670
+ will not be retried.
671
+ :param timeout: The amount of time, in seconds, to wait for the request
672
+ to complete. If *retry* is specified, the timeout applies to each
673
+ individual attempt.
675
674
  :param metadata: Additional metadata that is provided to the method.
676
675
  """
677
676
  if region is None:
@@ -696,14 +695,13 @@ class DataprocHook(GoogleBaseHook):
696
695
  wait_time: int = 10,
697
696
  timeout: int | None = None,
698
697
  ) -> None:
699
- """
700
- Helper method which polls a job to check if it finishes.
698
+ """Poll a job to check if it has finished.
701
699
 
702
- :param job_id: Id of the Dataproc job
703
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
704
- :param region: Required. The Cloud Dataproc region in which to handle the request.
705
- :param wait_time: Number of seconds between checks
706
- :param timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
700
+ :param job_id: Dataproc job ID.
701
+ :param project_id: Google Cloud project ID that the cluster belongs to.
702
+ :param region: Cloud Dataproc region to handle the request.
703
+ :param wait_time: Number of seconds between checks.
704
+ :param timeout: How many seconds wait for job to be ready.
707
705
  """
708
706
  if region is None:
709
707
  raise TypeError("missing 1 required keyword argument: 'region'")
@@ -734,16 +732,16 @@ class DataprocHook(GoogleBaseHook):
734
732
  timeout: float | None = None,
735
733
  metadata: Sequence[tuple[str, str]] = (),
736
734
  ) -> Job:
737
- """
738
- Gets the resource representation for a job in a project.
739
-
740
- :param job_id: Id of the Dataproc job
741
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
742
- :param region: Required. The Cloud Dataproc region in which to handle the request.
743
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
744
- retried.
745
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
746
- ``retry`` is specified, the timeout applies to each individual attempt.
735
+ """Get the resource representation for a job in a project.
736
+
737
+ :param job_id: Dataproc job ID.
738
+ :param project_id: Google Cloud project ID that the cluster belongs to.
739
+ :param region: Cloud Dataproc region to handle the request.
740
+ :param retry: A retry object used to retry requests. If *None*, requests
741
+ will not be retried.
742
+ :param timeout: The amount of time, in seconds, to wait for the request
743
+ to complete. If *retry* is specified, the timeout applies to each
744
+ individual attempt.
747
745
  :param metadata: Additional metadata that is provided to the method.
748
746
  """
749
747
  if region is None:
@@ -768,20 +766,20 @@ class DataprocHook(GoogleBaseHook):
768
766
  timeout: float | None = None,
769
767
  metadata: Sequence[tuple[str, str]] = (),
770
768
  ) -> Job:
771
- """
772
- Submits a job to a cluster.
773
-
774
- :param job: The job resource. If a dict is provided,
775
- it must be of the same form as the protobuf message Job
776
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
777
- :param region: Required. The Cloud Dataproc region in which to handle the request.
778
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
779
- with the same tag from running. This mitigates risk of concurrent
780
- instances started due to retries.
781
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
782
- retried.
783
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
784
- ``retry`` is specified, the timeout applies to each individual attempt.
769
+ """Submit a job to a cluster.
770
+
771
+ :param job: The job resource. If a dict is provided, it must be of the
772
+ same form as the protobuf message Job.
773
+ :param project_id: Google Cloud project ID that the cluster belongs to.
774
+ :param region: Cloud Dataproc region to handle the request.
775
+ :param request_id: A tag that prevents multiple concurrent workflow
776
+ instances with the same tag from running. This mitigates risk of
777
+ concurrent instances started due to retries.
778
+ :param retry: A retry object used to retry requests. If *None*, requests
779
+ will not be retried.
780
+ :param timeout: The amount of time, in seconds, to wait for the request
781
+ to complete. If *retry* is specified, the timeout applies to each
782
+ individual attempt.
785
783
  :param metadata: Additional metadata that is provided to the method.
786
784
  """
787
785
  if region is None:
@@ -804,16 +802,16 @@ class DataprocHook(GoogleBaseHook):
804
802
  timeout: float | None = None,
805
803
  metadata: Sequence[tuple[str, str]] = (),
806
804
  ) -> Job:
807
- """
808
- Starts a job cancellation request.
809
-
810
- :param project_id: Required. The ID of the Google Cloud project that the job belongs to.
811
- :param region: Required. The Cloud Dataproc region in which to handle the request.
812
- :param job_id: Required. The job ID.
813
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
814
- retried.
815
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
816
- ``retry`` is specified, the timeout applies to each individual attempt.
805
+ """Start a job cancellation request.
806
+
807
+ :param project_id: Google Cloud project ID that the cluster belongs to.
808
+ :param region: Cloud Dataproc region to handle the request.
809
+ :param job_id: The job ID.
810
+ :param retry: A retry object used to retry requests. If *None*, requests
811
+ will not be retried.
812
+ :param timeout: The amount of time, in seconds, to wait for the request
813
+ to complete. If *retry* is specified, the timeout applies to each
814
+ individual attempt.
817
815
  :param metadata: Additional metadata that is provided to the method.
818
816
  """
819
817
  client = self.get_job_client(region=region)
@@ -838,22 +836,23 @@ class DataprocHook(GoogleBaseHook):
838
836
  timeout: float | None = None,
839
837
  metadata: Sequence[tuple[str, str]] = (),
840
838
  ) -> Operation:
841
- """
842
- Creates a batch workload.
843
-
844
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
845
- :param region: Required. The Cloud Dataproc region in which to handle the request.
846
- :param batch: Required. The batch to create.
847
- :param batch_id: Optional. The ID to use for the batch, which will become the final component
848
- of the batch's resource name.
849
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
850
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
851
- ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and
852
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
853
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
854
- retried.
855
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
856
- ``retry`` is specified, the timeout applies to each individual attempt.
839
+ """Create a batch workload.
840
+
841
+ :param project_id: Google Cloud project ID that the cluster belongs to.
842
+ :param region: Cloud Dataproc region to handle the request.
843
+ :param batch: The batch to create.
844
+ :param batch_id: The ID to use for the batch, which will become the
845
+ final component of the batch's resource name. This value must be of
846
+ 4-63 characters. Valid characters are ``[a-z][0-9]-``.
847
+ :param request_id: A unique id used to identify the request. If the
848
+ server receives two *CreateBatchRequest* requests with the same
849
+ ID, the second request will be ignored, and an operation created
850
+ for the first one and stored in the backend is returned.
851
+ :param retry: A retry object used to retry requests. If *None*, requests
852
+ will not be retried.
853
+ :param timeout: The amount of time, in seconds, to wait for the request
854
+ to complete. If *retry* is specified, the timeout applies to each
855
+ individual attempt.
857
856
  :param metadata: Additional metadata that is provided to the method.
858
857
  """
859
858
  client = self.get_batch_client(region)
@@ -882,22 +881,20 @@ class DataprocHook(GoogleBaseHook):
882
881
  timeout: float | None = None,
883
882
  metadata: Sequence[tuple[str, str]] = (),
884
883
  ) -> None:
885
- """
886
- Deletes the batch workload resource.
887
-
888
- :param batch_id: Required. The ID to use for the batch, which will become the final component
889
- of the batch's resource name.
890
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
891
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
892
- :param region: Required. The Cloud Dataproc region in which to handle the request.
893
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
894
- retried.
895
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
896
- ``retry`` is specified, the timeout applies to each individual attempt.
884
+ """Delete the batch workload resource.
885
+
886
+ :param batch_id: The batch ID.
887
+ :param project_id: Google Cloud project ID that the cluster belongs to.
888
+ :param region: Cloud Dataproc region to handle the request.
889
+ :param retry: A retry object used to retry requests. If *None*, requests
890
+ will not be retried.
891
+ :param timeout: The amount of time, in seconds, to wait for the request
892
+ to complete. If *retry* is specified, the timeout applies to each
893
+ individual attempt.
897
894
  :param metadata: Additional metadata that is provided to the method.
898
895
  """
899
896
  client = self.get_batch_client(region)
900
- name = f"projects/{project_id}/regions/{region}/batches/{batch_id}"
897
+ name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
901
898
 
902
899
  client.delete_batch(
903
900
  request={
@@ -918,22 +915,20 @@ class DataprocHook(GoogleBaseHook):
918
915
  timeout: float | None = None,
919
916
  metadata: Sequence[tuple[str, str]] = (),
920
917
  ) -> Batch:
921
- """
922
- Gets the batch workload resource representation.
923
-
924
- :param batch_id: Required. The ID to use for the batch, which will become the final component
925
- of the batch's resource name.
926
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
927
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
928
- :param region: Required. The Cloud Dataproc region in which to handle the request.
929
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
930
- retried.
931
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
932
- ``retry`` is specified, the timeout applies to each individual attempt.
918
+ """Get the batch workload resource representation.
919
+
920
+ :param batch_id: The batch ID.
921
+ :param project_id: Google Cloud project ID that the cluster belongs to.
922
+ :param region: Cloud Dataproc region to handle the request.
923
+ :param retry: A retry object used to retry requests. If *None*, requests
924
+ will not be retried.
925
+ :param timeout: The amount of time, in seconds, to wait for the request
926
+ to complete. If *retry* is specified, the timeout applies to each
927
+ individual attempt.
933
928
  :param metadata: Additional metadata that is provided to the method.
934
929
  """
935
930
  client = self.get_batch_client(region)
936
- name = f"projects/{project_id}/regions/{region}/batches/{batch_id}"
931
+ name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
937
932
 
938
933
  result = client.get_batch(
939
934
  request={
@@ -956,19 +951,20 @@ class DataprocHook(GoogleBaseHook):
956
951
  timeout: float | None = None,
957
952
  metadata: Sequence[tuple[str, str]] = (),
958
953
  ):
959
- """
960
- Lists batch workloads.
961
-
962
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
963
- :param region: Required. The Cloud Dataproc region in which to handle the request.
964
- :param page_size: Optional. The maximum number of batches to return in each response. The service may
965
- return fewer than this value. The default page size is 20; the maximum page size is 1000.
966
- :param page_token: Optional. A page token received from a previous ``ListBatches`` call.
967
- Provide this token to retrieve the subsequent page.
968
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
969
- retried.
970
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
971
- ``retry`` is specified, the timeout applies to each individual attempt.
954
+ """List batch workloads.
955
+
956
+ :param project_id: Google Cloud project ID that the cluster belongs to.
957
+ :param region: Cloud Dataproc region to handle the request.
958
+ :param page_size: The maximum number of batches to return in each
959
+ response. The service may return fewer than this value. The default
960
+ page size is 20; the maximum page size is 1000.
961
+ :param page_token: A page token received from a previous ``ListBatches``
962
+ call. Provide this token to retrieve the subsequent page.
963
+ :param retry: A retry object used to retry requests. If *None*, requests
964
+ will not be retried.
965
+ :param timeout: The amount of time, in seconds, to wait for the request
966
+ to complete. If *retry* is specified, the timeout applies to each
967
+ individual attempt.
972
968
  :param metadata: Additional metadata that is provided to the method.
973
969
  """
974
970
  client = self.get_batch_client(region)
@@ -997,24 +993,24 @@ class DataprocHook(GoogleBaseHook):
997
993
  timeout: float | None = None,
998
994
  metadata: Sequence[tuple[str, str]] = (),
999
995
  ) -> Batch:
1000
- """
1001
- Wait for a Batch job to complete.
1002
-
1003
- After Batch job submission, the operator will wait for the job to complete, however, this is useful
1004
- in the case where Airflow is restarted or the task pid is killed for any reason. In this case, the
1005
- Batch create will happen again, AlreadyExists will be raised and caught, then should fall to this
1006
- function for waiting on completion.
1007
-
1008
- :param batch_id: Required. The ID to use for the batch, which will become the final component
1009
- of the batch's resource name.
1010
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
1011
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1012
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1013
- :param wait_check_interval: The amount of time to pause between checks for job completion
1014
- :param retry: A retry object used to retry requests to get_batch.
1015
- If ``None`` is specified, requests will not be retried.
1016
- :param timeout: The amount of time, in seconds, to wait for the create_batch request to complete.
1017
- Note that if ``retry`` is specified, the timeout applies to each individual attempt.
996
+ """Wait for a batch job to complete.
997
+
998
+ After submission of a batch job, the operator waits for the job to
999
+ complete. This hook is, however, useful in the case when Airflow is
1000
+ restarted or the task pid is killed for any reason. In this case, the
1001
+ creation would happen again, catching the raised AlreadyExists, and fail
1002
+ to this function for waiting on completion.
1003
+
1004
+ :param batch_id: The batch ID.
1005
+ :param region: Cloud Dataproc region to handle the request.
1006
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1007
+ :param wait_check_interval: The amount of time to pause between checks
1008
+ for job completion.
1009
+ :param retry: A retry object used to retry requests. If *None*, requests
1010
+ will not be retried.
1011
+ :param timeout: The amount of time, in seconds, to wait for the request
1012
+ to complete. If *retry* is specified, the timeout applies to each
1013
+ individual attempt.
1018
1014
  :param metadata: Additional metadata that is provided to the method.
1019
1015
  """
1020
1016
  state = None
@@ -1050,8 +1046,7 @@ class DataprocHook(GoogleBaseHook):
1050
1046
 
1051
1047
 
1052
1048
  class DataprocAsyncHook(GoogleBaseHook):
1053
- """
1054
- Asynchronous Hook for Google Cloud Dataproc APIs.
1049
+ """Asynchronous interaction with Google Cloud Dataproc APIs.
1055
1050
 
1056
1051
  All the methods in the hook where project_id is used must be called with
1057
1052
  keyword arguments rather than positional.
@@ -1072,7 +1067,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1072
1067
  self._cached_client: JobControllerAsyncClient | None = None
1073
1068
 
1074
1069
  def get_cluster_client(self, region: str | None = None) -> ClusterControllerAsyncClient:
1075
- """Returns ClusterControllerAsyncClient."""
1070
+ """Create a ClusterControllerAsyncClient."""
1076
1071
  client_options = None
1077
1072
  if region and region != "global":
1078
1073
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -1082,7 +1077,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1082
1077
  )
1083
1078
 
1084
1079
  def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceAsyncClient:
1085
- """Returns WorkflowTemplateServiceAsyncClient."""
1080
+ """Create a WorkflowTemplateServiceAsyncClient."""
1086
1081
  client_options = None
1087
1082
  if region and region != "global":
1088
1083
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -1092,7 +1087,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1092
1087
  )
1093
1088
 
1094
1089
  def get_job_client(self, region: str | None = None) -> JobControllerAsyncClient:
1095
- """Returns JobControllerAsyncClient."""
1090
+ """Create a JobControllerAsyncClient."""
1096
1091
  if self._cached_client is None:
1097
1092
  client_options = None
1098
1093
  if region and region != "global":
@@ -1106,7 +1101,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1106
1101
  return self._cached_client
1107
1102
 
1108
1103
  def get_batch_client(self, region: str | None = None) -> BatchControllerAsyncClient:
1109
- """Returns BatchControllerAsyncClient."""
1104
+ """Create a BatchControllerAsyncClient."""
1110
1105
  client_options = None
1111
1106
  if region and region != "global":
1112
1107
  client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
@@ -1116,7 +1111,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1116
1111
  )
1117
1112
 
1118
1113
  def get_operations_client(self, region: str) -> OperationsClient:
1119
- """Returns OperationsClient."""
1114
+ """Create a OperationsClient."""
1120
1115
  return self.get_template_client(region=region).transport.operations_client
1121
1116
 
1122
1117
  @GoogleBaseHook.fallback_to_default_project_id
@@ -1132,28 +1127,30 @@ class DataprocAsyncHook(GoogleBaseHook):
1132
1127
  retry: Retry | _MethodDefault = DEFAULT,
1133
1128
  timeout: float | None = None,
1134
1129
  metadata: Sequence[tuple[str, str]] = (),
1135
- ):
1136
- """
1137
- Creates a cluster in a project.
1138
-
1139
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1140
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1141
- :param cluster_name: Name of the cluster to create
1142
- :param labels: Labels that will be assigned to created cluster
1143
- :param cluster_config: Required. The cluster config to create.
1144
- If a dict is provided, it must be of the same form as the protobuf message
1145
- :class:`~google.cloud.dataproc_v1.types.ClusterConfig`
1146
- :param virtual_cluster_config: Optional. The virtual cluster config, used when creating a Dataproc
1147
- cluster that does not directly control the underlying compute resources, for example, when
1148
- creating a `Dataproc-on-GKE cluster`
1149
- :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`
1150
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
1151
- ``CreateClusterRequest`` requests with the same id, then the second request will be ignored and
1152
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
1153
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1154
- retried.
1155
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1156
- ``retry`` is specified, the timeout applies to each individual attempt.
1130
+ ) -> AsyncOperation:
1131
+ """Create a cluster in a project.
1132
+
1133
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1134
+ :param region: Cloud Dataproc region in which to handle the request.
1135
+ :param cluster_name: Name of the cluster to create.
1136
+ :param labels: Labels that will be assigned to created cluster.
1137
+ :param cluster_config: The cluster config to create. If a dict is
1138
+ provided, it must be of the same form as the protobuf message
1139
+ :class:`~google.cloud.dataproc_v1.types.ClusterConfig`.
1140
+ :param virtual_cluster_config: The virtual cluster config, used when
1141
+ creating a Dataproc cluster that does not directly control the
1142
+ underlying compute resources, for example, when creating a
1143
+ Dataproc-on-GKE cluster with
1144
+ :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`.
1145
+ :param request_id: A unique id used to identify the request. If the
1146
+ server receives two *CreateClusterRequest* requests with the same
1147
+ ID, the second request will be ignored, and an operation created
1148
+ for the first one and stored in the backend is returned.
1149
+ :param retry: A retry object used to retry requests. If *None*, requests
1150
+ will not be retried.
1151
+ :param timeout: The amount of time, in seconds, to wait for the request
1152
+ to complete. If *retry* is specified, the timeout applies to each
1153
+ individual attempt.
1157
1154
  :param metadata: Additional metadata that is provided to the method.
1158
1155
  """
1159
1156
  # Dataproc labels must conform to the following regex:
@@ -1197,26 +1194,27 @@ class DataprocAsyncHook(GoogleBaseHook):
1197
1194
  retry: Retry | _MethodDefault = DEFAULT,
1198
1195
  timeout: float | None = None,
1199
1196
  metadata: Sequence[tuple[str, str]] = (),
1200
- ):
1201
- """
1202
- Deletes a cluster in a project.
1203
-
1204
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1205
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1206
- :param cluster_name: Required. The cluster name.
1207
- :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail
1208
- if cluster with specified UUID does not exist.
1209
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
1210
- ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and
1211
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
1212
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1213
- retried.
1214
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1215
- ``retry`` is specified, the timeout applies to each individual attempt.
1197
+ ) -> AsyncOperation:
1198
+ """Delete a cluster in a project.
1199
+
1200
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1201
+ :param region: Cloud Dataproc region in which to handle the request.
1202
+ :param cluster_name: Name of the cluster to delete.
1203
+ :param cluster_uuid: If specified, the RPC should fail if cluster with
1204
+ the UUID does not exist.
1205
+ :param request_id: A unique id used to identify the request. If the
1206
+ server receives two *DeleteClusterRequest* requests with the same
1207
+ ID, the second request will be ignored, and an operation created
1208
+ for the first one and stored in the backend is returned.
1209
+ :param retry: A retry object used to retry requests. If *None*, requests
1210
+ will not be retried.
1211
+ :param timeout: The amount of time, in seconds, to wait for the request
1212
+ to complete. If *retry* is specified, the timeout applies to each
1213
+ individual attempt.
1216
1214
  :param metadata: Additional metadata that is provided to the method.
1217
1215
  """
1218
1216
  client = self.get_cluster_client(region=region)
1219
- result = client.delete_cluster(
1217
+ result = await client.delete_cluster(
1220
1218
  request={
1221
1219
  "project_id": project_id,
1222
1220
  "region": region,
@@ -1239,18 +1237,19 @@ class DataprocAsyncHook(GoogleBaseHook):
1239
1237
  retry: Retry | _MethodDefault = DEFAULT,
1240
1238
  timeout: float | None = None,
1241
1239
  metadata: Sequence[tuple[str, str]] = (),
1242
- ):
1243
- """
1244
- Gets cluster diagnostic information. After the operation completes GCS uri to
1245
- diagnose is returned.
1246
-
1247
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1248
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1249
- :param cluster_name: Required. The cluster name.
1250
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1251
- retried.
1252
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1253
- ``retry`` is specified, the timeout applies to each individual attempt.
1240
+ ) -> str:
1241
+ """Get cluster diagnostic information.
1242
+
1243
+ After the operation completes, the GCS URI to diagnose is returned.
1244
+
1245
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1246
+ :param region: Cloud Dataproc region in which to handle the request.
1247
+ :param cluster_name: Name of the cluster.
1248
+ :param retry: A retry object used to retry requests. If *None*, requests
1249
+ will not be retried.
1250
+ :param timeout: The amount of time, in seconds, to wait for the request
1251
+ to complete. If *retry* is specified, the timeout applies to each
1252
+ individual attempt.
1254
1253
  :param metadata: Additional metadata that is provided to the method.
1255
1254
  """
1256
1255
  client = self.get_cluster_client(region=region)
@@ -1273,17 +1272,17 @@ class DataprocAsyncHook(GoogleBaseHook):
1273
1272
  retry: Retry | _MethodDefault = DEFAULT,
1274
1273
  timeout: float | None = None,
1275
1274
  metadata: Sequence[tuple[str, str]] = (),
1276
- ):
1277
- """
1278
- Gets the resource representation for a cluster in a project.
1279
-
1280
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1281
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1282
- :param cluster_name: Required. The cluster name.
1283
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1284
- retried.
1285
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1286
- ``retry`` is specified, the timeout applies to each individual attempt.
1275
+ ) -> Cluster:
1276
+ """Get the resource representation for a cluster in a project.
1277
+
1278
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1279
+ :param region: Cloud Dataproc region to handle the request.
1280
+ :param cluster_name: The cluster name.
1281
+ :param retry: A retry object used to retry requests. If *None*, requests
1282
+ will not be retried.
1283
+ :param timeout: The amount of time, in seconds, to wait for the request
1284
+ to complete. If *retry* is specified, the timeout applies to each
1285
+ individual attempt.
1287
1286
  :param metadata: Additional metadata that is provided to the method.
1288
1287
  """
1289
1288
  client = self.get_cluster_client(region=region)
@@ -1306,19 +1305,21 @@ class DataprocAsyncHook(GoogleBaseHook):
1306
1305
  timeout: float | None = None,
1307
1306
  metadata: Sequence[tuple[str, str]] = (),
1308
1307
  ):
1309
- """
1310
- Lists all regions/{region}/clusters in a project.
1311
-
1312
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1313
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1314
- :param filter_: Optional. A filter constraining the clusters to list. Filters are case-sensitive.
1315
- :param page_size: The maximum number of resources contained in the underlying API response. If page
1316
- streaming is performed per- resource, this parameter does not affect the return value. If page
1317
- streaming is performed per-page, this determines the maximum number of resources in a page.
1318
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1319
- retried.
1320
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1321
- ``retry`` is specified, the timeout applies to each individual attempt.
1308
+ """List all regions/{region}/clusters in a project.
1309
+
1310
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1311
+ :param region: Cloud Dataproc region to handle the request.
1312
+ :param filter_: To constrain the clusters to. Case-sensitive.
1313
+ :param page_size: The maximum number of resources contained in the
1314
+ underlying API response. If page streaming is performed
1315
+ per-resource, this parameter does not affect the return value. If
1316
+ page streaming is performed per-page, this determines the maximum
1317
+ number of resources in a page.
1318
+ :param retry: A retry object used to retry requests. If *None*, requests
1319
+ will not be retried.
1320
+ :param timeout: The amount of time, in seconds, to wait for the request
1321
+ to complete. If *retry* is specified, the timeout applies to each
1322
+ individual attempt.
1322
1323
  :param metadata: Additional metadata that is provided to the method.
1323
1324
  """
1324
1325
  client = self.get_cluster_client(region=region)
@@ -1343,53 +1344,56 @@ class DataprocAsyncHook(GoogleBaseHook):
1343
1344
  retry: Retry | _MethodDefault = DEFAULT,
1344
1345
  timeout: float | None = None,
1345
1346
  metadata: Sequence[tuple[str, str]] = (),
1346
- ):
1347
- """
1348
- Updates a cluster in a project.
1349
-
1350
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1351
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1352
- :param cluster_name: Required. The cluster name.
1353
- :param cluster: Required. The changes to the cluster.
1354
-
1355
- If a dict is provided, it must be of the same form as the protobuf message
1356
- :class:`~google.cloud.dataproc_v1.types.Cluster`
1357
- :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For
1358
- example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be
1359
- specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify
1360
- the new value, as follows:
1361
-
1362
- ::
1363
-
1364
- { "config":{ "workerConfig":{ "numInstances":"5" } } }
1365
-
1366
- Similarly, to change the number of preemptible workers in a cluster to 5, the ``update_mask``
1367
- parameter would be ``config.secondary_worker_config.num_instances``, and the ``PATCH`` request
1368
- body would be set as follows:
1369
-
1370
- ::
1371
-
1372
- { "config":{ "secondaryWorkerConfig":{ "numInstances":"5" } } }
1373
-
1374
- If a dict is provided, it must be of the same form as the protobuf message
1375
- :class:`~google.cloud.dataproc_v1.types.FieldMask`
1376
- :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful
1377
- decommissioning allows removing nodes from the cluster without interrupting jobs in progress.
1378
- Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes
1379
- (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the
1380
- maximum allowed timeout is 1 day.
1347
+ ) -> AsyncOperation:
1348
+ """Update a cluster in a project.
1349
+
1350
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1351
+ :param region: Cloud Dataproc region to handle the request.
1352
+ :param cluster_name: The cluster name.
1353
+ :param cluster: Changes to the cluster. If a dict is provided, it must
1354
+ be of the same form as the protobuf message
1355
+ :class:`~google.cloud.dataproc_v1.types.Cluster`.
1356
+ :param update_mask: Specifies the path, relative to ``Cluster``, of the
1357
+ field to update. For example, to change the number of workers in a
1358
+ cluster to 5, this would be specified as
1359
+ ``config.worker_config.num_instances``, and the ``PATCH`` request
1360
+ body would specify the new value:
1361
+
1362
+ .. code-block:: python
1363
+
1364
+ {"config": {"workerConfig": {"numInstances": "5"}}}
1365
+
1366
+ Similarly, to change the number of preemptible workers in a cluster
1367
+ to 5, this would be ``config.secondary_worker_config.num_instances``
1368
+ and the ``PATCH`` request body would be:
1369
+
1370
+ .. code-block:: python
1371
+
1372
+ {"config": {"secondaryWorkerConfig": {"numInstances": "5"}}}
1373
+
1374
+ If a dict is provided, it must be of the same form as the protobuf
1375
+ message :class:`~google.cloud.dataproc_v1.types.FieldMask`.
1376
+ :param graceful_decommission_timeout: Timeout for graceful YARN
1377
+ decommissioning. Graceful decommissioning allows removing nodes from
1378
+ the cluster without interrupting jobs in progress. Timeout specifies
1379
+ how long to wait for jobs in progress to finish before forcefully
1380
+ removing nodes (and potentially interrupting jobs). Default timeout
1381
+ is 0 (for forceful decommission), and the maximum allowed timeout is
1382
+ one day.
1381
1383
 
1382
1384
  Only supported on Dataproc image versions 1.2 and higher.
1383
1385
 
1384
- If a dict is provided, it must be of the same form as the protobuf message
1385
- :class:`~google.cloud.dataproc_v1.types.Duration`
1386
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
1387
- ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and
1388
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
1389
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1390
- retried.
1391
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1392
- ``retry`` is specified, the timeout applies to each individual attempt.
1386
+ If a dict is provided, it must be of the same form as the protobuf
1387
+ message :class:`~google.cloud.dataproc_v1.types.Duration`.
1388
+ :param request_id: A unique id used to identify the request. If the
1389
+ server receives two *UpdateClusterRequest* requests with the same
1390
+ ID, the second request will be ignored, and an operation created
1391
+ for the first one and stored in the backend is returned.
1392
+ :param retry: A retry object used to retry requests. If *None*, requests
1393
+ will not be retried.
1394
+ :param timeout: The amount of time, in seconds, to wait for the request
1395
+ to complete. If *retry* is specified, the timeout applies to each
1396
+ individual attempt.
1393
1397
  :param metadata: Additional metadata that is provided to the method.
1394
1398
  """
1395
1399
  if region is None:
@@ -1421,17 +1425,18 @@ class DataprocAsyncHook(GoogleBaseHook):
1421
1425
  timeout: float | None = None,
1422
1426
  metadata: Sequence[tuple[str, str]] = (),
1423
1427
  ) -> WorkflowTemplate:
1424
- """
1425
- Creates new workflow template.
1426
-
1427
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1428
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1429
- :param template: The Dataproc workflow template to create. If a dict is provided,
1430
- it must be of the same form as the protobuf message WorkflowTemplate.
1431
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1432
- retried.
1433
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1434
- ``retry`` is specified, the timeout applies to each individual attempt.
1428
+ """Create a new workflow template.
1429
+
1430
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1431
+ :param region: Cloud Dataproc region to handle the request.
1432
+ :param template: The Dataproc workflow template to create. If a dict is
1433
+ provided, it must be of the same form as the protobuf message
1434
+ WorkflowTemplate.
1435
+ :param retry: A retry object used to retry requests. If *None*, requests
1436
+ will not be retried.
1437
+ :param timeout: The amount of time, in seconds, to wait for the request
1438
+ to complete. If *retry* is specified, the timeout applies to each
1439
+ individual attempt.
1435
1440
  :param metadata: Additional metadata that is provided to the method.
1436
1441
  """
1437
1442
  if region is None:
@@ -1455,27 +1460,27 @@ class DataprocAsyncHook(GoogleBaseHook):
1455
1460
  retry: Retry | _MethodDefault = DEFAULT,
1456
1461
  timeout: float | None = None,
1457
1462
  metadata: Sequence[tuple[str, str]] = (),
1458
- ):
1459
- """
1460
- Instantiates a template and begins execution.
1463
+ ) -> AsyncOperation:
1464
+ """Instantiate a template and begins execution.
1461
1465
 
1462
1466
  :param template_name: Name of template to instantiate.
1463
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1464
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1465
- :param version: Optional. The version of workflow template to instantiate. If specified,
1466
- the workflow will be instantiated only if the current version of
1467
- the workflow template has the supplied version.
1468
- This option cannot be used to instantiate a previous version of
1469
- workflow template.
1470
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
1471
- with the same tag from running. This mitigates risk of concurrent
1472
- instances started due to retries.
1473
- :param parameters: Optional. Map from parameter names to values that should be used for those
1474
- parameters. Values may not exceed 100 characters.
1475
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1476
- retried.
1477
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1478
- ``retry`` is specified, the timeout applies to each individual attempt.
1467
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1468
+ :param region: Cloud Dataproc region to handle the request.
1469
+ :param version: Version of workflow template to instantiate. If
1470
+ specified, the workflow will be instantiated only if the current
1471
+ version of the workflow template has the supplied version. This
1472
+ option cannot be used to instantiate a previous version of workflow
1473
+ template.
1474
+ :param request_id: A tag that prevents multiple concurrent workflow
1475
+ instances with the same tag from running. This mitigates risk of
1476
+ concurrent instances started due to retries.
1477
+ :param parameters: Map from parameter names to values that should be
1478
+ used for those parameters. Values may not exceed 100 characters.
1479
+ :param retry: A retry object used to retry requests. If *None*, requests
1480
+ will not be retried.
1481
+ :param timeout: The amount of time, in seconds, to wait for the request
1482
+ to complete. If *retry* is specified, the timeout applies to each
1483
+ individual attempt.
1479
1484
  :param metadata: Additional metadata that is provided to the method.
1480
1485
  """
1481
1486
  if region is None:
@@ -1501,21 +1506,22 @@ class DataprocAsyncHook(GoogleBaseHook):
1501
1506
  retry: Retry | _MethodDefault = DEFAULT,
1502
1507
  timeout: float | None = None,
1503
1508
  metadata: Sequence[tuple[str, str]] = (),
1504
- ):
1505
- """
1506
- Instantiates a template and begins execution.
1507
-
1508
- :param template: The workflow template to instantiate. If a dict is provided,
1509
- it must be of the same form as the protobuf message WorkflowTemplate
1510
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1511
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1512
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
1513
- with the same tag from running. This mitigates risk of concurrent
1514
- instances started due to retries.
1515
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1516
- retried.
1517
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1518
- ``retry`` is specified, the timeout applies to each individual attempt.
1509
+ ) -> AsyncOperation:
1510
+ """Instantiate a template and begin execution.
1511
+
1512
+ :param template: The workflow template to instantiate. If a dict is
1513
+ provided, it must be of the same form as the protobuf message
1514
+ WorkflowTemplate.
1515
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1516
+ :param region: Cloud Dataproc region to handle the request.
1517
+ :param request_id: A tag that prevents multiple concurrent workflow
1518
+ instances with the same tag from running. This mitigates risk of
1519
+ concurrent instances started due to retries.
1520
+ :param retry: A retry object used to retry requests. If *None*, requests
1521
+ will not be retried.
1522
+ :param timeout: The amount of time, in seconds, to wait for the request
1523
+ to complete. If *retry* is specified, the timeout applies to each
1524
+ individual attempt.
1519
1525
  :param metadata: Additional metadata that is provided to the method.
1520
1526
  """
1521
1527
  if region is None:
@@ -1544,16 +1550,16 @@ class DataprocAsyncHook(GoogleBaseHook):
1544
1550
  timeout: float | None = None,
1545
1551
  metadata: Sequence[tuple[str, str]] = (),
1546
1552
  ) -> Job:
1547
- """
1548
- Gets the resource representation for a job in a project.
1549
-
1550
- :param job_id: Id of the Dataproc job
1551
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1552
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1553
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1554
- retried.
1555
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1556
- ``retry`` is specified, the timeout applies to each individual attempt.
1553
+ """Get the resource representation for a job in a project.
1554
+
1555
+ :param job_id: Dataproc job ID.
1556
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1557
+ :param region: Cloud Dataproc region to handle the request.
1558
+ :param retry: A retry object used to retry requests. If *None*, requests
1559
+ will not be retried.
1560
+ :param timeout: The amount of time, in seconds, to wait for the request
1561
+ to complete. If *retry* is specified, the timeout applies to each
1562
+ individual attempt.
1557
1563
  :param metadata: Additional metadata that is provided to the method.
1558
1564
  """
1559
1565
  if region is None:
@@ -1578,20 +1584,20 @@ class DataprocAsyncHook(GoogleBaseHook):
1578
1584
  timeout: float | None = None,
1579
1585
  metadata: Sequence[tuple[str, str]] = (),
1580
1586
  ) -> Job:
1581
- """
1582
- Submits a job to a cluster.
1583
-
1584
- :param job: The job resource. If a dict is provided,
1585
- it must be of the same form as the protobuf message Job
1586
- :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
1587
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1588
- :param request_id: Optional. A tag that prevents multiple concurrent workflow instances
1589
- with the same tag from running. This mitigates risk of concurrent
1590
- instances started due to retries.
1591
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1592
- retried.
1593
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1594
- ``retry`` is specified, the timeout applies to each individual attempt.
1587
+ """Submit a job to a cluster.
1588
+
1589
+ :param job: The job resource. If a dict is provided, it must be of the
1590
+ same form as the protobuf message Job.
1591
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1592
+ :param region: Cloud Dataproc region to handle the request.
1593
+ :param request_id: A tag that prevents multiple concurrent workflow
1594
+ instances with the same tag from running. This mitigates risk of
1595
+ concurrent instances started due to retries.
1596
+ :param retry: A retry object used to retry requests. If *None*, requests
1597
+ will not be retried.
1598
+ :param timeout: The amount of time, in seconds, to wait for the request
1599
+ to complete. If *retry* is specified, the timeout applies to each
1600
+ individual attempt.
1595
1601
  :param metadata: Additional metadata that is provided to the method.
1596
1602
  """
1597
1603
  if region is None:
@@ -1614,16 +1620,16 @@ class DataprocAsyncHook(GoogleBaseHook):
1614
1620
  timeout: float | None = None,
1615
1621
  metadata: Sequence[tuple[str, str]] = (),
1616
1622
  ) -> Job:
1617
- """
1618
- Starts a job cancellation request.
1619
-
1620
- :param project_id: Required. The ID of the Google Cloud project that the job belongs to.
1621
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1622
- :param job_id: Required. The job ID.
1623
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1624
- retried.
1625
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1626
- ``retry`` is specified, the timeout applies to each individual attempt.
1623
+ """Start a job cancellation request.
1624
+
1625
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1626
+ :param region: Cloud Dataproc region to handle the request.
1627
+ :param job_id: The job ID.
1628
+ :param retry: A retry object used to retry requests. If *None*, requests
1629
+ will not be retried.
1630
+ :param timeout: The amount of time, in seconds, to wait for the request
1631
+ to complete. If *retry* is specified, the timeout applies to each
1632
+ individual attempt.
1627
1633
  :param metadata: Additional metadata that is provided to the method.
1628
1634
  """
1629
1635
  client = self.get_job_client(region=region)
@@ -1648,22 +1654,23 @@ class DataprocAsyncHook(GoogleBaseHook):
1648
1654
  timeout: float | None = None,
1649
1655
  metadata: Sequence[tuple[str, str]] = (),
1650
1656
  ) -> AsyncOperation:
1651
- """
1652
- Creates a batch workload.
1653
-
1654
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1655
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1656
- :param batch: Required. The batch to create.
1657
- :param batch_id: Optional. The ID to use for the batch, which will become the final component
1658
- of the batch's resource name.
1659
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
1660
- :param request_id: Optional. A unique id used to identify the request. If the server receives two
1661
- ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and
1662
- the first ``google.longrunning.Operation`` created and stored in the backend is returned.
1663
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1664
- retried.
1665
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1666
- ``retry`` is specified, the timeout applies to each individual attempt.
1657
+ """Create a batch workload.
1658
+
1659
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1660
+ :param region: Cloud Dataproc region to handle the request.
1661
+ :param batch: The batch to create.
1662
+ :param batch_id: The ID to use for the batch, which will become the
1663
+ final component of the batch's resource name. This value must be of
1664
+ 4-63 characters. Valid characters are ``[a-z][0-9]-``.
1665
+ :param request_id: A unique id used to identify the request. If the
1666
+ server receives two *CreateBatchRequest* requests with the same
1667
+ ID, the second request will be ignored, and an operation created
1668
+ for the first one and stored in the backend is returned.
1669
+ :param retry: A retry object used to retry requests. If *None*, requests
1670
+ will not be retried.
1671
+ :param timeout: The amount of time, in seconds, to wait for the request
1672
+ to complete. If *retry* is specified, the timeout applies to each
1673
+ individual attempt.
1667
1674
  :param metadata: Additional metadata that is provided to the method.
1668
1675
  """
1669
1676
  client = self.get_batch_client(region)
@@ -1692,22 +1699,20 @@ class DataprocAsyncHook(GoogleBaseHook):
1692
1699
  timeout: float | None = None,
1693
1700
  metadata: Sequence[tuple[str, str]] = (),
1694
1701
  ) -> None:
1695
- """
1696
- Deletes the batch workload resource.
1697
-
1698
- :param batch_id: Required. The ID to use for the batch, which will become the final component
1699
- of the batch's resource name.
1700
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
1701
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1702
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1703
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1704
- retried.
1705
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1706
- ``retry`` is specified, the timeout applies to each individual attempt.
1702
+ """Delete the batch workload resource.
1703
+
1704
+ :param batch_id: The batch ID.
1705
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1706
+ :param region: Cloud Dataproc region to handle the request.
1707
+ :param retry: A retry object used to retry requests. If *None*, requests
1708
+ will not be retried.
1709
+ :param timeout: The amount of time, in seconds, to wait for the request
1710
+ to complete. If *retry* is specified, the timeout applies to each
1711
+ individual attempt.
1707
1712
  :param metadata: Additional metadata that is provided to the method.
1708
1713
  """
1709
1714
  client = self.get_batch_client(region)
1710
- name = f"projects/{project_id}/regions/{region}/batches/{batch_id}"
1715
+ name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
1711
1716
 
1712
1717
  await client.delete_batch(
1713
1718
  request={
@@ -1728,22 +1733,20 @@ class DataprocAsyncHook(GoogleBaseHook):
1728
1733
  timeout: float | None = None,
1729
1734
  metadata: Sequence[tuple[str, str]] = (),
1730
1735
  ) -> Batch:
1731
- """
1732
- Gets the batch workload resource representation.
1733
-
1734
- :param batch_id: Required. The ID to use for the batch, which will become the final component
1735
- of the batch's resource name.
1736
- This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
1737
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1738
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1739
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1740
- retried.
1741
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1742
- ``retry`` is specified, the timeout applies to each individual attempt.
1736
+ """Get the batch workload resource representation.
1737
+
1738
+ :param batch_id: The batch ID.
1739
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1740
+ :param region: Cloud Dataproc region to handle the request.
1741
+ :param retry: A retry object used to retry requests. If *None*, requests
1742
+ will not be retried.
1743
+ :param timeout: The amount of time, in seconds, to wait for the request
1744
+ to complete. If *retry* is specified, the timeout applies to each
1745
+ individual attempt.
1743
1746
  :param metadata: Additional metadata that is provided to the method.
1744
1747
  """
1745
1748
  client = self.get_batch_client(region)
1746
- name = f"projects/{project_id}/regions/{region}/batches/{batch_id}"
1749
+ name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
1747
1750
 
1748
1751
  result = await client.get_batch(
1749
1752
  request={
@@ -1766,19 +1769,20 @@ class DataprocAsyncHook(GoogleBaseHook):
1766
1769
  timeout: float | None = None,
1767
1770
  metadata: Sequence[tuple[str, str]] = (),
1768
1771
  ):
1769
- """
1770
- Lists batch workloads.
1771
-
1772
- :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to.
1773
- :param region: Required. The Cloud Dataproc region in which to handle the request.
1774
- :param page_size: Optional. The maximum number of batches to return in each response. The service may
1775
- return fewer than this value. The default page size is 20; the maximum page size is 1000.
1776
- :param page_token: Optional. A page token received from a previous ``ListBatches`` call.
1777
- Provide this token to retrieve the subsequent page.
1778
- :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1779
- retried.
1780
- :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1781
- ``retry`` is specified, the timeout applies to each individual attempt.
1772
+ """List batch workloads.
1773
+
1774
+ :param project_id: Google Cloud project ID that the cluster belongs to.
1775
+ :param region: Cloud Dataproc region to handle the request.
1776
+ :param page_size: The maximum number of batches to return in each
1777
+ response. The service may return fewer than this value. The default
1778
+ page size is 20; the maximum page size is 1000.
1779
+ :param page_token: A page token received from a previous ``ListBatches``
1780
+ call. Provide this token to retrieve the subsequent page.
1781
+ :param retry: A retry object used to retry requests. If *None*, requests
1782
+ will not be retried.
1783
+ :param timeout: The amount of time, in seconds, to wait for the request
1784
+ to complete. If *retry* is specified, the timeout applies to each
1785
+ individual attempt.
1782
1786
  :param metadata: Additional metadata that is provided to the method.
1783
1787
  """
1784
1788
  client = self.get_batch_client(region)