apache-airflow-providers-google 10.2.0rc1__py3-none-any.whl → 10.3.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +38 -39
- airflow/providers/google/ads/transfers/ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +6 -9
- airflow/providers/google/cloud/hooks/bigquery.py +328 -318
- airflow/providers/google/cloud/hooks/cloud_sql.py +66 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +46 -70
- airflow/providers/google/cloud/hooks/dataflow.py +11 -15
- airflow/providers/google/cloud/hooks/dataform.py +3 -3
- airflow/providers/google/cloud/hooks/dataproc.py +577 -573
- airflow/providers/google/cloud/hooks/functions.py +60 -76
- airflow/providers/google/cloud/hooks/gcs.py +108 -18
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +69 -90
- airflow/providers/google/cloud/links/datafusion.py +4 -3
- airflow/providers/google/cloud/operators/bigquery.py +201 -191
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +4 -3
- airflow/providers/google/cloud/operators/cloud_sql.py +62 -28
- airflow/providers/google/cloud/operators/dataflow.py +6 -4
- airflow/providers/google/cloud/operators/dataform.py +3 -2
- airflow/providers/google/cloud/operators/dataproc.py +127 -123
- airflow/providers/google/cloud/operators/dataproc_metastore.py +18 -26
- airflow/providers/google/cloud/operators/gcs.py +35 -13
- airflow/providers/google/cloud/operators/kubernetes_engine.py +92 -42
- airflow/providers/google/cloud/operators/mlengine.py +2 -6
- airflow/providers/google/cloud/operators/vision.py +47 -56
- airflow/providers/google/cloud/sensors/bigquery.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +5 -7
- airflow/providers/google/cloud/sensors/pubsub.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +6 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +46 -7
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +5 -2
- airflow/providers/google/cloud/triggers/cloud_sql.py +102 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +28 -6
- airflow/providers/google/cloud/utils/bigquery.py +17 -0
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +4 -0
- airflow/providers/google/suite/transfers/local_to_drive.py +28 -26
- apache_airflow_providers_google-10.3.0rc1.dist-info/METADATA +289 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/RECORD +49 -48
- apache_airflow_providers_google-10.2.0rc1.dist-info/METADATA +0 -1824
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.2.0rc1.dist-info → apache_airflow_providers_google-10.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -78,8 +78,7 @@ class DataProcJobBuilder:
|
|
78
78
|
self.job["job"][job_type]["properties"] = properties
|
79
79
|
|
80
80
|
def add_labels(self, labels: dict | None = None) -> None:
|
81
|
-
"""
|
82
|
-
Set labels for Dataproc job.
|
81
|
+
"""Set labels for Dataproc job.
|
83
82
|
|
84
83
|
:param labels: Labels for the job query.
|
85
84
|
"""
|
@@ -87,8 +86,7 @@ class DataProcJobBuilder:
|
|
87
86
|
self.job["job"]["labels"].update(labels)
|
88
87
|
|
89
88
|
def add_variables(self, variables: dict | None = None) -> None:
|
90
|
-
"""
|
91
|
-
Set variables for Dataproc job.
|
89
|
+
"""Set variables for Dataproc job.
|
92
90
|
|
93
91
|
:param variables: Variables for the job query.
|
94
92
|
"""
|
@@ -96,8 +94,7 @@ class DataProcJobBuilder:
|
|
96
94
|
self.job["job"][self.job_type]["script_variables"] = variables
|
97
95
|
|
98
96
|
def add_args(self, args: list[str] | None = None) -> None:
|
99
|
-
"""
|
100
|
-
Set args for Dataproc job.
|
97
|
+
"""Set args for Dataproc job.
|
101
98
|
|
102
99
|
:param args: Args for the job query.
|
103
100
|
"""
|
@@ -105,24 +102,21 @@ class DataProcJobBuilder:
|
|
105
102
|
self.job["job"][self.job_type]["args"] = args
|
106
103
|
|
107
104
|
def add_query(self, query: str) -> None:
|
108
|
-
"""
|
109
|
-
Set query for Dataproc job.
|
105
|
+
"""Set query for Dataproc job.
|
110
106
|
|
111
107
|
:param query: query for the job.
|
112
108
|
"""
|
113
109
|
self.job["job"][self.job_type]["query_list"] = {"queries": [query]}
|
114
110
|
|
115
111
|
def add_query_uri(self, query_uri: str) -> None:
|
116
|
-
"""
|
117
|
-
Set query uri for Dataproc job.
|
112
|
+
"""Set query uri for Dataproc job.
|
118
113
|
|
119
114
|
:param query_uri: URI for the job query.
|
120
115
|
"""
|
121
116
|
self.job["job"][self.job_type]["query_file_uri"] = query_uri
|
122
117
|
|
123
118
|
def add_jar_file_uris(self, jars: list[str] | None = None) -> None:
|
124
|
-
"""
|
125
|
-
Set jars uris for Dataproc job.
|
119
|
+
"""Set jars uris for Dataproc job.
|
126
120
|
|
127
121
|
:param jars: List of jars URIs
|
128
122
|
"""
|
@@ -130,8 +124,7 @@ class DataProcJobBuilder:
|
|
130
124
|
self.job["job"][self.job_type]["jar_file_uris"] = jars
|
131
125
|
|
132
126
|
def add_archive_uris(self, archives: list[str] | None = None) -> None:
|
133
|
-
"""
|
134
|
-
Set archives uris for Dataproc job.
|
127
|
+
"""Set archives uris for Dataproc job.
|
135
128
|
|
136
129
|
:param archives: List of archives URIs
|
137
130
|
"""
|
@@ -139,8 +132,7 @@ class DataProcJobBuilder:
|
|
139
132
|
self.job["job"][self.job_type]["archive_uris"] = archives
|
140
133
|
|
141
134
|
def add_file_uris(self, files: list[str] | None = None) -> None:
|
142
|
-
"""
|
143
|
-
Set file uris for Dataproc job.
|
135
|
+
"""Set file uris for Dataproc job.
|
144
136
|
|
145
137
|
:param files: List of files URIs
|
146
138
|
"""
|
@@ -148,8 +140,7 @@ class DataProcJobBuilder:
|
|
148
140
|
self.job["job"][self.job_type]["file_uris"] = files
|
149
141
|
|
150
142
|
def add_python_file_uris(self, pyfiles: list[str] | None = None) -> None:
|
151
|
-
"""
|
152
|
-
Set python file uris for Dataproc job.
|
143
|
+
"""Set python file uris for Dataproc job.
|
153
144
|
|
154
145
|
:param pyfiles: List of python files URIs
|
155
146
|
"""
|
@@ -157,8 +148,7 @@ class DataProcJobBuilder:
|
|
157
148
|
self.job["job"][self.job_type]["python_file_uris"] = pyfiles
|
158
149
|
|
159
150
|
def set_main(self, main_jar: str | None = None, main_class: str | None = None) -> None:
|
160
|
-
"""
|
161
|
-
Set Dataproc main class.
|
151
|
+
"""Set Dataproc main class.
|
162
152
|
|
163
153
|
:param main_jar: URI for the main file.
|
164
154
|
:param main_class: Name of the main class.
|
@@ -172,16 +162,16 @@ class DataProcJobBuilder:
|
|
172
162
|
self.job["job"][self.job_type]["main_class"] = main_class
|
173
163
|
|
174
164
|
def set_python_main(self, main: str) -> None:
|
175
|
-
"""
|
176
|
-
Set Dataproc main python file uri.
|
165
|
+
"""Set Dataproc main python file uri.
|
177
166
|
|
178
167
|
:param main: URI for the python main file.
|
179
168
|
"""
|
180
169
|
self.job["job"][self.job_type]["main_python_file_uri"] = main
|
181
170
|
|
182
171
|
def set_job_name(self, name: str) -> None:
|
183
|
-
"""
|
184
|
-
|
172
|
+
"""Set Dataproc job name.
|
173
|
+
|
174
|
+
Job name is sanitized, replacing dots by underscores.
|
185
175
|
|
186
176
|
:param name: Job name.
|
187
177
|
"""
|
@@ -189,8 +179,7 @@ class DataProcJobBuilder:
|
|
189
179
|
self.job["job"]["reference"]["job_id"] = sanitized_name
|
190
180
|
|
191
181
|
def build(self) -> dict:
|
192
|
-
"""
|
193
|
-
Returns Dataproc job.
|
182
|
+
"""Return Dataproc job.
|
194
183
|
|
195
184
|
:return: Dataproc job
|
196
185
|
"""
|
@@ -198,8 +187,7 @@ class DataProcJobBuilder:
|
|
198
187
|
|
199
188
|
|
200
189
|
class DataprocHook(GoogleBaseHook):
|
201
|
-
"""
|
202
|
-
Hook for Google Cloud Dataproc APIs.
|
190
|
+
"""Google Cloud Dataproc APIs.
|
203
191
|
|
204
192
|
All the methods in the hook where project_id is used must be called with
|
205
193
|
keyword arguments rather than positional.
|
@@ -219,7 +207,7 @@ class DataprocHook(GoogleBaseHook):
|
|
219
207
|
super().__init__(gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain)
|
220
208
|
|
221
209
|
def get_cluster_client(self, region: str | None = None) -> ClusterControllerClient:
|
222
|
-
"""
|
210
|
+
"""Create a ClusterControllerClient."""
|
223
211
|
client_options = None
|
224
212
|
if region and region != "global":
|
225
213
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -229,7 +217,7 @@ class DataprocHook(GoogleBaseHook):
|
|
229
217
|
)
|
230
218
|
|
231
219
|
def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceClient:
|
232
|
-
"""
|
220
|
+
"""Create a WorkflowTemplateServiceClient."""
|
233
221
|
client_options = None
|
234
222
|
if region and region != "global":
|
235
223
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -239,7 +227,7 @@ class DataprocHook(GoogleBaseHook):
|
|
239
227
|
)
|
240
228
|
|
241
229
|
def get_job_client(self, region: str | None = None) -> JobControllerClient:
|
242
|
-
"""
|
230
|
+
"""Create a JobControllerClient."""
|
243
231
|
client_options = None
|
244
232
|
if region and region != "global":
|
245
233
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -249,7 +237,7 @@ class DataprocHook(GoogleBaseHook):
|
|
249
237
|
)
|
250
238
|
|
251
239
|
def get_batch_client(self, region: str | None = None) -> BatchControllerClient:
|
252
|
-
"""
|
240
|
+
"""Create a BatchControllerClient."""
|
253
241
|
client_options = None
|
254
242
|
if region and region != "global":
|
255
243
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -258,8 +246,8 @@ class DataprocHook(GoogleBaseHook):
|
|
258
246
|
credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
|
259
247
|
)
|
260
248
|
|
261
|
-
def get_operations_client(self, region):
|
262
|
-
"""
|
249
|
+
def get_operations_client(self, region: str | None):
|
250
|
+
"""Create a OperationsClient."""
|
263
251
|
return self.get_batch_client(region=region).transport.operations_client
|
264
252
|
|
265
253
|
def wait_for_operation(
|
@@ -267,8 +255,8 @@ class DataprocHook(GoogleBaseHook):
|
|
267
255
|
operation: Operation,
|
268
256
|
timeout: float | None = None,
|
269
257
|
result_retry: Retry | _MethodDefault = DEFAULT,
|
270
|
-
):
|
271
|
-
"""
|
258
|
+
) -> Any:
|
259
|
+
"""Wait for a long-lasting operation to complete."""
|
272
260
|
try:
|
273
261
|
return operation.result(timeout=timeout, retry=result_retry)
|
274
262
|
except Exception:
|
@@ -288,28 +276,30 @@ class DataprocHook(GoogleBaseHook):
|
|
288
276
|
retry: Retry | _MethodDefault = DEFAULT,
|
289
277
|
timeout: float | None = None,
|
290
278
|
metadata: Sequence[tuple[str, str]] = (),
|
291
|
-
):
|
292
|
-
"""
|
293
|
-
|
294
|
-
|
295
|
-
:param
|
296
|
-
:param
|
297
|
-
:param
|
298
|
-
:param
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
:class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig
|
306
|
-
:param request_id:
|
307
|
-
|
308
|
-
the
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
279
|
+
) -> Operation:
|
280
|
+
"""Create a cluster in a specified project.
|
281
|
+
|
282
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
283
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
284
|
+
:param cluster_name: Name of the cluster to create.
|
285
|
+
:param labels: Labels that will be assigned to created cluster.
|
286
|
+
:param cluster_config: The cluster config to create. If a dict is
|
287
|
+
provided, it must be of the same form as the protobuf message
|
288
|
+
:class:`~google.cloud.dataproc_v1.types.ClusterConfig`.
|
289
|
+
:param virtual_cluster_config: The virtual cluster config, used when
|
290
|
+
creating a Dataproc cluster that does not directly control the
|
291
|
+
underlying compute resources, for example, when creating a
|
292
|
+
Dataproc-on-GKE cluster with
|
293
|
+
:class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`.
|
294
|
+
:param request_id: A unique id used to identify the request. If the
|
295
|
+
server receives two *CreateClusterRequest* requests with the same
|
296
|
+
ID, the second request will be ignored, and an operation created
|
297
|
+
for the first one and stored in the backend is returned.
|
298
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
299
|
+
will not be retried.
|
300
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
301
|
+
to complete. If *retry* is specified, the timeout applies to each
|
302
|
+
individual attempt.
|
313
303
|
:param metadata: Additional metadata that is provided to the method.
|
314
304
|
"""
|
315
305
|
# Dataproc labels must conform to the following regex:
|
@@ -353,22 +343,23 @@ class DataprocHook(GoogleBaseHook):
|
|
353
343
|
retry: Retry | _MethodDefault = DEFAULT,
|
354
344
|
timeout: float | None = None,
|
355
345
|
metadata: Sequence[tuple[str, str]] = (),
|
356
|
-
):
|
357
|
-
"""
|
358
|
-
|
359
|
-
|
360
|
-
:param
|
361
|
-
:param
|
362
|
-
:param
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
the first
|
368
|
-
:param retry: A retry object used to retry requests. If
|
369
|
-
retried.
|
370
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
371
|
-
|
346
|
+
) -> Operation:
|
347
|
+
"""Delete a cluster in a project.
|
348
|
+
|
349
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
350
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
351
|
+
:param cluster_name: Name of the cluster to delete.
|
352
|
+
:param cluster_uuid: If specified, the RPC should fail if cluster with
|
353
|
+
the UUID does not exist.
|
354
|
+
:param request_id: A unique id used to identify the request. If the
|
355
|
+
server receives two *DeleteClusterRequest* requests with the same
|
356
|
+
ID, the second request will be ignored, and an operation created
|
357
|
+
for the first one and stored in the backend is returned.
|
358
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
359
|
+
will not be retried.
|
360
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
361
|
+
to complete. If *retry* is specified, the timeout applies to each
|
362
|
+
individual attempt.
|
372
363
|
:param metadata: Additional metadata that is provided to the method.
|
373
364
|
"""
|
374
365
|
client = self.get_cluster_client(region=region)
|
@@ -395,18 +386,19 @@ class DataprocHook(GoogleBaseHook):
|
|
395
386
|
retry: Retry | _MethodDefault = DEFAULT,
|
396
387
|
timeout: float | None = None,
|
397
388
|
metadata: Sequence[tuple[str, str]] = (),
|
398
|
-
):
|
399
|
-
"""
|
400
|
-
|
401
|
-
diagnose is returned.
|
402
|
-
|
403
|
-
:param project_id:
|
404
|
-
:param region:
|
405
|
-
:param cluster_name:
|
406
|
-
:param retry: A retry object used to retry requests. If
|
407
|
-
retried.
|
408
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
409
|
-
|
389
|
+
) -> str:
|
390
|
+
"""Get cluster diagnostic information.
|
391
|
+
|
392
|
+
After the operation completes, the GCS URI to diagnose is returned.
|
393
|
+
|
394
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
395
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
396
|
+
:param cluster_name: Name of the cluster.
|
397
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
398
|
+
will not be retried.
|
399
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
400
|
+
to complete. If *retry* is specified, the timeout applies to each
|
401
|
+
individual attempt.
|
410
402
|
:param metadata: Additional metadata that is provided to the method.
|
411
403
|
"""
|
412
404
|
client = self.get_cluster_client(region=region)
|
@@ -429,17 +421,17 @@ class DataprocHook(GoogleBaseHook):
|
|
429
421
|
retry: Retry | _MethodDefault = DEFAULT,
|
430
422
|
timeout: float | None = None,
|
431
423
|
metadata: Sequence[tuple[str, str]] = (),
|
432
|
-
):
|
433
|
-
"""
|
434
|
-
|
435
|
-
|
436
|
-
:param
|
437
|
-
:param
|
438
|
-
:param
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
424
|
+
) -> Cluster:
|
425
|
+
"""Get the resource representation for a cluster in a project.
|
426
|
+
|
427
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
428
|
+
:param region: Cloud Dataproc region to handle the request.
|
429
|
+
:param cluster_name: The cluster name.
|
430
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
431
|
+
will not be retried.
|
432
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
433
|
+
to complete. If *retry* is specified, the timeout applies to each
|
434
|
+
individual attempt.
|
443
435
|
:param metadata: Additional metadata that is provided to the method.
|
444
436
|
"""
|
445
437
|
client = self.get_cluster_client(region=region)
|
@@ -462,19 +454,21 @@ class DataprocHook(GoogleBaseHook):
|
|
462
454
|
timeout: float | None = None,
|
463
455
|
metadata: Sequence[tuple[str, str]] = (),
|
464
456
|
):
|
465
|
-
"""
|
466
|
-
|
467
|
-
|
468
|
-
:param
|
469
|
-
:param
|
470
|
-
:param
|
471
|
-
|
472
|
-
|
473
|
-
streaming is performed per-page, this determines the maximum
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
457
|
+
"""List all regions/{region}/clusters in a project.
|
458
|
+
|
459
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
460
|
+
:param region: Cloud Dataproc region to handle the request.
|
461
|
+
:param filter_: To constrain the clusters to. Case-sensitive.
|
462
|
+
:param page_size: The maximum number of resources contained in the
|
463
|
+
underlying API response. If page streaming is performed
|
464
|
+
per-resource, this parameter does not affect the return value. If
|
465
|
+
page streaming is performed per-page, this determines the maximum
|
466
|
+
number of resources in a page.
|
467
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
468
|
+
will not be retried.
|
469
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
470
|
+
to complete. If *retry* is specified, the timeout applies to each
|
471
|
+
individual attempt.
|
478
472
|
:param metadata: Additional metadata that is provided to the method.
|
479
473
|
"""
|
480
474
|
client = self.get_cluster_client(region=region)
|
@@ -499,53 +493,56 @@ class DataprocHook(GoogleBaseHook):
|
|
499
493
|
retry: Retry | _MethodDefault = DEFAULT,
|
500
494
|
timeout: float | None = None,
|
501
495
|
metadata: Sequence[tuple[str, str]] = (),
|
502
|
-
):
|
503
|
-
"""
|
504
|
-
|
505
|
-
|
506
|
-
:param
|
507
|
-
:param
|
508
|
-
:param
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
(and potentially interrupting jobs). Default timeout
|
536
|
-
maximum allowed timeout is
|
496
|
+
) -> Operation:
|
497
|
+
"""Update a cluster in a project.
|
498
|
+
|
499
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
500
|
+
:param region: Cloud Dataproc region to handle the request.
|
501
|
+
:param cluster_name: The cluster name.
|
502
|
+
:param cluster: Changes to the cluster. If a dict is provided, it must
|
503
|
+
be of the same form as the protobuf message
|
504
|
+
:class:`~google.cloud.dataproc_v1.types.Cluster`.
|
505
|
+
:param update_mask: Specifies the path, relative to ``Cluster``, of the
|
506
|
+
field to update. For example, to change the number of workers in a
|
507
|
+
cluster to 5, this would be specified as
|
508
|
+
``config.worker_config.num_instances``, and the ``PATCH`` request
|
509
|
+
body would specify the new value:
|
510
|
+
|
511
|
+
.. code-block:: python
|
512
|
+
|
513
|
+
{"config": {"workerConfig": {"numInstances": "5"}}}
|
514
|
+
|
515
|
+
Similarly, to change the number of preemptible workers in a cluster
|
516
|
+
to 5, this would be ``config.secondary_worker_config.num_instances``
|
517
|
+
and the ``PATCH`` request body would be:
|
518
|
+
|
519
|
+
.. code-block:: python
|
520
|
+
|
521
|
+
{"config": {"secondaryWorkerConfig": {"numInstances": "5"}}}
|
522
|
+
|
523
|
+
If a dict is provided, it must be of the same form as the protobuf
|
524
|
+
message :class:`~google.cloud.dataproc_v1.types.FieldMask`.
|
525
|
+
:param graceful_decommission_timeout: Timeout for graceful YARN
|
526
|
+
decommissioning. Graceful decommissioning allows removing nodes from
|
527
|
+
the cluster without interrupting jobs in progress. Timeout specifies
|
528
|
+
how long to wait for jobs in progress to finish before forcefully
|
529
|
+
removing nodes (and potentially interrupting jobs). Default timeout
|
530
|
+
is 0 (for forceful decommission), and the maximum allowed timeout is
|
531
|
+
one day.
|
537
532
|
|
538
533
|
Only supported on Dataproc image versions 1.2 and higher.
|
539
534
|
|
540
|
-
If a dict is provided, it must be of the same form as the protobuf
|
541
|
-
:class:`~google.cloud.dataproc_v1.types.Duration
|
542
|
-
:param request_id:
|
543
|
-
|
544
|
-
the
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
535
|
+
If a dict is provided, it must be of the same form as the protobuf
|
536
|
+
message :class:`~google.cloud.dataproc_v1.types.Duration`.
|
537
|
+
:param request_id: A unique id used to identify the request. If the
|
538
|
+
server receives two *UpdateClusterRequest* requests with the same
|
539
|
+
ID, the second request will be ignored, and an operation created
|
540
|
+
for the first one and stored in the backend is returned.
|
541
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
542
|
+
will not be retried.
|
543
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
544
|
+
to complete. If *retry* is specified, the timeout applies to each
|
545
|
+
individual attempt.
|
549
546
|
:param metadata: Additional metadata that is provided to the method.
|
550
547
|
"""
|
551
548
|
if region is None:
|
@@ -577,17 +574,18 @@ class DataprocHook(GoogleBaseHook):
|
|
577
574
|
timeout: float | None = None,
|
578
575
|
metadata: Sequence[tuple[str, str]] = (),
|
579
576
|
) -> WorkflowTemplate:
|
580
|
-
"""
|
581
|
-
|
582
|
-
|
583
|
-
:param
|
584
|
-
:param
|
585
|
-
|
586
|
-
|
587
|
-
:param retry: A retry object used to retry requests. If
|
588
|
-
retried.
|
589
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
590
|
-
|
577
|
+
"""Create a new workflow template.
|
578
|
+
|
579
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
580
|
+
:param region: Cloud Dataproc region to handle the request.
|
581
|
+
:param template: The Dataproc workflow template to create. If a dict is
|
582
|
+
provided, it must be of the same form as the protobuf message
|
583
|
+
WorkflowTemplate.
|
584
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
585
|
+
will not be retried.
|
586
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
587
|
+
to complete. If *retry* is specified, the timeout applies to each
|
588
|
+
individual attempt.
|
591
589
|
:param metadata: Additional metadata that is provided to the method.
|
592
590
|
"""
|
593
591
|
if region is None:
|
@@ -611,27 +609,27 @@ class DataprocHook(GoogleBaseHook):
|
|
611
609
|
retry: Retry | _MethodDefault = DEFAULT,
|
612
610
|
timeout: float | None = None,
|
613
611
|
metadata: Sequence[tuple[str, str]] = (),
|
614
|
-
):
|
615
|
-
"""
|
616
|
-
Instantiates a template and begins execution.
|
612
|
+
) -> Operation:
|
613
|
+
"""Instantiate a template and begins execution.
|
617
614
|
|
618
615
|
:param template_name: Name of template to instantiate.
|
619
|
-
:param project_id:
|
620
|
-
:param region:
|
621
|
-
:param version:
|
622
|
-
the workflow will be instantiated only if the current
|
623
|
-
the workflow template has the supplied version.
|
624
|
-
|
625
|
-
|
626
|
-
:param request_id:
|
627
|
-
with the same tag from running. This mitigates risk of
|
628
|
-
instances started due to retries.
|
629
|
-
:param parameters:
|
630
|
-
parameters. Values may not exceed 100 characters.
|
631
|
-
:param retry: A retry object used to retry requests. If
|
632
|
-
retried.
|
633
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
634
|
-
|
616
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
617
|
+
:param region: Cloud Dataproc region to handle the request.
|
618
|
+
:param version: Version of workflow template to instantiate. If
|
619
|
+
specified, the workflow will be instantiated only if the current
|
620
|
+
version of the workflow template has the supplied version. This
|
621
|
+
option cannot be used to instantiate a previous version of workflow
|
622
|
+
template.
|
623
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
624
|
+
instances with the same tag from running. This mitigates risk of
|
625
|
+
concurrent instances started due to retries.
|
626
|
+
:param parameters: Map from parameter names to values that should be
|
627
|
+
used for those parameters. Values may not exceed 100 characters.
|
628
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
629
|
+
will not be retried.
|
630
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
631
|
+
to complete. If *retry* is specified, the timeout applies to each
|
632
|
+
individual attempt.
|
635
633
|
:param metadata: Additional metadata that is provided to the method.
|
636
634
|
"""
|
637
635
|
if region is None:
|
@@ -657,21 +655,22 @@ class DataprocHook(GoogleBaseHook):
|
|
657
655
|
retry: Retry | _MethodDefault = DEFAULT,
|
658
656
|
timeout: float | None = None,
|
659
657
|
metadata: Sequence[tuple[str, str]] = (),
|
660
|
-
):
|
661
|
-
"""
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
:param project_id:
|
667
|
-
:param region:
|
668
|
-
:param request_id:
|
669
|
-
with the same tag from running. This mitigates risk of
|
670
|
-
instances started due to retries.
|
671
|
-
:param retry: A retry object used to retry requests. If
|
672
|
-
retried.
|
673
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
674
|
-
|
658
|
+
) -> Operation:
|
659
|
+
"""Instantiate a template and begin execution.
|
660
|
+
|
661
|
+
:param template: The workflow template to instantiate. If a dict is
|
662
|
+
provided, it must be of the same form as the protobuf message
|
663
|
+
WorkflowTemplate.
|
664
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
665
|
+
:param region: Cloud Dataproc region to handle the request.
|
666
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
667
|
+
instances with the same tag from running. This mitigates risk of
|
668
|
+
concurrent instances started due to retries.
|
669
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
670
|
+
will not be retried.
|
671
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
672
|
+
to complete. If *retry* is specified, the timeout applies to each
|
673
|
+
individual attempt.
|
675
674
|
:param metadata: Additional metadata that is provided to the method.
|
676
675
|
"""
|
677
676
|
if region is None:
|
@@ -696,14 +695,13 @@ class DataprocHook(GoogleBaseHook):
|
|
696
695
|
wait_time: int = 10,
|
697
696
|
timeout: int | None = None,
|
698
697
|
) -> None:
|
699
|
-
"""
|
700
|
-
Helper method which polls a job to check if it finishes.
|
698
|
+
"""Poll a job to check if it has finished.
|
701
699
|
|
702
|
-
:param job_id:
|
703
|
-
:param project_id:
|
704
|
-
:param region:
|
705
|
-
:param wait_time: Number of seconds between checks
|
706
|
-
:param timeout: How many seconds wait for job to be ready.
|
700
|
+
:param job_id: Dataproc job ID.
|
701
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
702
|
+
:param region: Cloud Dataproc region to handle the request.
|
703
|
+
:param wait_time: Number of seconds between checks.
|
704
|
+
:param timeout: How many seconds wait for job to be ready.
|
707
705
|
"""
|
708
706
|
if region is None:
|
709
707
|
raise TypeError("missing 1 required keyword argument: 'region'")
|
@@ -734,16 +732,16 @@ class DataprocHook(GoogleBaseHook):
|
|
734
732
|
timeout: float | None = None,
|
735
733
|
metadata: Sequence[tuple[str, str]] = (),
|
736
734
|
) -> Job:
|
737
|
-
"""
|
738
|
-
|
739
|
-
|
740
|
-
:param
|
741
|
-
:param
|
742
|
-
:param
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
735
|
+
"""Get the resource representation for a job in a project.
|
736
|
+
|
737
|
+
:param job_id: Dataproc job ID.
|
738
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
739
|
+
:param region: Cloud Dataproc region to handle the request.
|
740
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
741
|
+
will not be retried.
|
742
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
743
|
+
to complete. If *retry* is specified, the timeout applies to each
|
744
|
+
individual attempt.
|
747
745
|
:param metadata: Additional metadata that is provided to the method.
|
748
746
|
"""
|
749
747
|
if region is None:
|
@@ -768,20 +766,20 @@ class DataprocHook(GoogleBaseHook):
|
|
768
766
|
timeout: float | None = None,
|
769
767
|
metadata: Sequence[tuple[str, str]] = (),
|
770
768
|
) -> Job:
|
771
|
-
"""
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
:param
|
777
|
-
:param
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
769
|
+
"""Submit a job to a cluster.
|
770
|
+
|
771
|
+
:param job: The job resource. If a dict is provided, it must be of the
|
772
|
+
same form as the protobuf message Job.
|
773
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
774
|
+
:param region: Cloud Dataproc region to handle the request.
|
775
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
776
|
+
instances with the same tag from running. This mitigates risk of
|
777
|
+
concurrent instances started due to retries.
|
778
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
779
|
+
will not be retried.
|
780
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
781
|
+
to complete. If *retry* is specified, the timeout applies to each
|
782
|
+
individual attempt.
|
785
783
|
:param metadata: Additional metadata that is provided to the method.
|
786
784
|
"""
|
787
785
|
if region is None:
|
@@ -804,16 +802,16 @@ class DataprocHook(GoogleBaseHook):
|
|
804
802
|
timeout: float | None = None,
|
805
803
|
metadata: Sequence[tuple[str, str]] = (),
|
806
804
|
) -> Job:
|
807
|
-
"""
|
808
|
-
|
809
|
-
|
810
|
-
:param
|
811
|
-
:param
|
812
|
-
:param
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
805
|
+
"""Start a job cancellation request.
|
806
|
+
|
807
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
808
|
+
:param region: Cloud Dataproc region to handle the request.
|
809
|
+
:param job_id: The job ID.
|
810
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
811
|
+
will not be retried.
|
812
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
813
|
+
to complete. If *retry* is specified, the timeout applies to each
|
814
|
+
individual attempt.
|
817
815
|
:param metadata: Additional metadata that is provided to the method.
|
818
816
|
"""
|
819
817
|
client = self.get_job_client(region=region)
|
@@ -838,22 +836,23 @@ class DataprocHook(GoogleBaseHook):
|
|
838
836
|
timeout: float | None = None,
|
839
837
|
metadata: Sequence[tuple[str, str]] = (),
|
840
838
|
) -> Operation:
|
841
|
-
"""
|
842
|
-
|
843
|
-
|
844
|
-
:param
|
845
|
-
:param
|
846
|
-
:param
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
the first
|
853
|
-
:param retry: A retry object used to retry requests. If
|
854
|
-
retried.
|
855
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
856
|
-
|
839
|
+
"""Create a batch workload.
|
840
|
+
|
841
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
842
|
+
:param region: Cloud Dataproc region to handle the request.
|
843
|
+
:param batch: The batch to create.
|
844
|
+
:param batch_id: The ID to use for the batch, which will become the
|
845
|
+
final component of the batch's resource name. This value must be of
|
846
|
+
4-63 characters. Valid characters are ``[a-z][0-9]-``.
|
847
|
+
:param request_id: A unique id used to identify the request. If the
|
848
|
+
server receives two *CreateBatchRequest* requests with the same
|
849
|
+
ID, the second request will be ignored, and an operation created
|
850
|
+
for the first one and stored in the backend is returned.
|
851
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
852
|
+
will not be retried.
|
853
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
854
|
+
to complete. If *retry* is specified, the timeout applies to each
|
855
|
+
individual attempt.
|
857
856
|
:param metadata: Additional metadata that is provided to the method.
|
858
857
|
"""
|
859
858
|
client = self.get_batch_client(region)
|
@@ -882,22 +881,20 @@ class DataprocHook(GoogleBaseHook):
|
|
882
881
|
timeout: float | None = None,
|
883
882
|
metadata: Sequence[tuple[str, str]] = (),
|
884
883
|
) -> None:
|
885
|
-
"""
|
886
|
-
|
887
|
-
|
888
|
-
:param
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
:param
|
893
|
-
|
894
|
-
|
895
|
-
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
896
|
-
``retry`` is specified, the timeout applies to each individual attempt.
|
884
|
+
"""Delete the batch workload resource.
|
885
|
+
|
886
|
+
:param batch_id: The batch ID.
|
887
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
888
|
+
:param region: Cloud Dataproc region to handle the request.
|
889
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
890
|
+
will not be retried.
|
891
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
892
|
+
to complete. If *retry* is specified, the timeout applies to each
|
893
|
+
individual attempt.
|
897
894
|
:param metadata: Additional metadata that is provided to the method.
|
898
895
|
"""
|
899
896
|
client = self.get_batch_client(region)
|
900
|
-
name = f"projects/{project_id}/
|
897
|
+
name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
|
901
898
|
|
902
899
|
client.delete_batch(
|
903
900
|
request={
|
@@ -918,22 +915,20 @@ class DataprocHook(GoogleBaseHook):
|
|
918
915
|
timeout: float | None = None,
|
919
916
|
metadata: Sequence[tuple[str, str]] = (),
|
920
917
|
) -> Batch:
|
921
|
-
"""
|
922
|
-
|
923
|
-
|
924
|
-
:param
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
:param
|
929
|
-
|
930
|
-
|
931
|
-
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
932
|
-
``retry`` is specified, the timeout applies to each individual attempt.
|
918
|
+
"""Get the batch workload resource representation.
|
919
|
+
|
920
|
+
:param batch_id: The batch ID.
|
921
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
922
|
+
:param region: Cloud Dataproc region to handle the request.
|
923
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
924
|
+
will not be retried.
|
925
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
926
|
+
to complete. If *retry* is specified, the timeout applies to each
|
927
|
+
individual attempt.
|
933
928
|
:param metadata: Additional metadata that is provided to the method.
|
934
929
|
"""
|
935
930
|
client = self.get_batch_client(region)
|
936
|
-
name = f"projects/{project_id}/
|
931
|
+
name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
|
937
932
|
|
938
933
|
result = client.get_batch(
|
939
934
|
request={
|
@@ -956,19 +951,20 @@ class DataprocHook(GoogleBaseHook):
|
|
956
951
|
timeout: float | None = None,
|
957
952
|
metadata: Sequence[tuple[str, str]] = (),
|
958
953
|
):
|
959
|
-
"""
|
960
|
-
|
961
|
-
|
962
|
-
:param
|
963
|
-
:param
|
964
|
-
|
965
|
-
|
966
|
-
:param page_token:
|
967
|
-
Provide this token to retrieve the subsequent page.
|
968
|
-
:param retry: A retry object used to retry requests. If
|
969
|
-
retried.
|
970
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
971
|
-
|
954
|
+
"""List batch workloads.
|
955
|
+
|
956
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
957
|
+
:param region: Cloud Dataproc region to handle the request.
|
958
|
+
:param page_size: The maximum number of batches to return in each
|
959
|
+
response. The service may return fewer than this value. The default
|
960
|
+
page size is 20; the maximum page size is 1000.
|
961
|
+
:param page_token: A page token received from a previous ``ListBatches``
|
962
|
+
call. Provide this token to retrieve the subsequent page.
|
963
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
964
|
+
will not be retried.
|
965
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
966
|
+
to complete. If *retry* is specified, the timeout applies to each
|
967
|
+
individual attempt.
|
972
968
|
:param metadata: Additional metadata that is provided to the method.
|
973
969
|
"""
|
974
970
|
client = self.get_batch_client(region)
|
@@ -997,24 +993,24 @@ class DataprocHook(GoogleBaseHook):
|
|
997
993
|
timeout: float | None = None,
|
998
994
|
metadata: Sequence[tuple[str, str]] = (),
|
999
995
|
) -> Batch:
|
1000
|
-
"""
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
function for waiting on completion.
|
1007
|
-
|
1008
|
-
:param batch_id:
|
1009
|
-
|
1010
|
-
|
1011
|
-
:param
|
1012
|
-
|
1013
|
-
:param
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
996
|
+
"""Wait for a batch job to complete.
|
997
|
+
|
998
|
+
After submission of a batch job, the operator waits for the job to
|
999
|
+
complete. This hook is, however, useful in the case when Airflow is
|
1000
|
+
restarted or the task pid is killed for any reason. In this case, the
|
1001
|
+
creation would happen again, catching the raised AlreadyExists, and fail
|
1002
|
+
to this function for waiting on completion.
|
1003
|
+
|
1004
|
+
:param batch_id: The batch ID.
|
1005
|
+
:param region: Cloud Dataproc region to handle the request.
|
1006
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1007
|
+
:param wait_check_interval: The amount of time to pause between checks
|
1008
|
+
for job completion.
|
1009
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1010
|
+
will not be retried.
|
1011
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1012
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1013
|
+
individual attempt.
|
1018
1014
|
:param metadata: Additional metadata that is provided to the method.
|
1019
1015
|
"""
|
1020
1016
|
state = None
|
@@ -1050,8 +1046,7 @@ class DataprocHook(GoogleBaseHook):
|
|
1050
1046
|
|
1051
1047
|
|
1052
1048
|
class DataprocAsyncHook(GoogleBaseHook):
|
1053
|
-
"""
|
1054
|
-
Asynchronous Hook for Google Cloud Dataproc APIs.
|
1049
|
+
"""Asynchronous interaction with Google Cloud Dataproc APIs.
|
1055
1050
|
|
1056
1051
|
All the methods in the hook where project_id is used must be called with
|
1057
1052
|
keyword arguments rather than positional.
|
@@ -1072,7 +1067,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1072
1067
|
self._cached_client: JobControllerAsyncClient | None = None
|
1073
1068
|
|
1074
1069
|
def get_cluster_client(self, region: str | None = None) -> ClusterControllerAsyncClient:
|
1075
|
-
"""
|
1070
|
+
"""Create a ClusterControllerAsyncClient."""
|
1076
1071
|
client_options = None
|
1077
1072
|
if region and region != "global":
|
1078
1073
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -1082,7 +1077,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1082
1077
|
)
|
1083
1078
|
|
1084
1079
|
def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceAsyncClient:
|
1085
|
-
"""
|
1080
|
+
"""Create a WorkflowTemplateServiceAsyncClient."""
|
1086
1081
|
client_options = None
|
1087
1082
|
if region and region != "global":
|
1088
1083
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -1092,7 +1087,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1092
1087
|
)
|
1093
1088
|
|
1094
1089
|
def get_job_client(self, region: str | None = None) -> JobControllerAsyncClient:
|
1095
|
-
"""
|
1090
|
+
"""Create a JobControllerAsyncClient."""
|
1096
1091
|
if self._cached_client is None:
|
1097
1092
|
client_options = None
|
1098
1093
|
if region and region != "global":
|
@@ -1106,7 +1101,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1106
1101
|
return self._cached_client
|
1107
1102
|
|
1108
1103
|
def get_batch_client(self, region: str | None = None) -> BatchControllerAsyncClient:
|
1109
|
-
"""
|
1104
|
+
"""Create a BatchControllerAsyncClient."""
|
1110
1105
|
client_options = None
|
1111
1106
|
if region and region != "global":
|
1112
1107
|
client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443")
|
@@ -1116,7 +1111,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1116
1111
|
)
|
1117
1112
|
|
1118
1113
|
def get_operations_client(self, region: str) -> OperationsClient:
|
1119
|
-
"""
|
1114
|
+
"""Create a OperationsClient."""
|
1120
1115
|
return self.get_template_client(region=region).transport.operations_client
|
1121
1116
|
|
1122
1117
|
@GoogleBaseHook.fallback_to_default_project_id
|
@@ -1132,28 +1127,30 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1132
1127
|
retry: Retry | _MethodDefault = DEFAULT,
|
1133
1128
|
timeout: float | None = None,
|
1134
1129
|
metadata: Sequence[tuple[str, str]] = (),
|
1135
|
-
):
|
1136
|
-
"""
|
1137
|
-
|
1138
|
-
|
1139
|
-
:param
|
1140
|
-
:param
|
1141
|
-
:param
|
1142
|
-
:param
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
:class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig
|
1150
|
-
:param request_id:
|
1151
|
-
|
1152
|
-
the
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1130
|
+
) -> AsyncOperation:
|
1131
|
+
"""Create a cluster in a project.
|
1132
|
+
|
1133
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1134
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
1135
|
+
:param cluster_name: Name of the cluster to create.
|
1136
|
+
:param labels: Labels that will be assigned to created cluster.
|
1137
|
+
:param cluster_config: The cluster config to create. If a dict is
|
1138
|
+
provided, it must be of the same form as the protobuf message
|
1139
|
+
:class:`~google.cloud.dataproc_v1.types.ClusterConfig`.
|
1140
|
+
:param virtual_cluster_config: The virtual cluster config, used when
|
1141
|
+
creating a Dataproc cluster that does not directly control the
|
1142
|
+
underlying compute resources, for example, when creating a
|
1143
|
+
Dataproc-on-GKE cluster with
|
1144
|
+
:class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`.
|
1145
|
+
:param request_id: A unique id used to identify the request. If the
|
1146
|
+
server receives two *CreateClusterRequest* requests with the same
|
1147
|
+
ID, the second request will be ignored, and an operation created
|
1148
|
+
for the first one and stored in the backend is returned.
|
1149
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1150
|
+
will not be retried.
|
1151
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1152
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1153
|
+
individual attempt.
|
1157
1154
|
:param metadata: Additional metadata that is provided to the method.
|
1158
1155
|
"""
|
1159
1156
|
# Dataproc labels must conform to the following regex:
|
@@ -1197,26 +1194,27 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1197
1194
|
retry: Retry | _MethodDefault = DEFAULT,
|
1198
1195
|
timeout: float | None = None,
|
1199
1196
|
metadata: Sequence[tuple[str, str]] = (),
|
1200
|
-
):
|
1201
|
-
"""
|
1202
|
-
|
1203
|
-
|
1204
|
-
:param
|
1205
|
-
:param
|
1206
|
-
:param
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
the first
|
1212
|
-
:param retry: A retry object used to retry requests. If
|
1213
|
-
retried.
|
1214
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1215
|
-
|
1197
|
+
) -> AsyncOperation:
|
1198
|
+
"""Delete a cluster in a project.
|
1199
|
+
|
1200
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1201
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
1202
|
+
:param cluster_name: Name of the cluster to delete.
|
1203
|
+
:param cluster_uuid: If specified, the RPC should fail if cluster with
|
1204
|
+
the UUID does not exist.
|
1205
|
+
:param request_id: A unique id used to identify the request. If the
|
1206
|
+
server receives two *DeleteClusterRequest* requests with the same
|
1207
|
+
ID, the second request will be ignored, and an operation created
|
1208
|
+
for the first one and stored in the backend is returned.
|
1209
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1210
|
+
will not be retried.
|
1211
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1212
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1213
|
+
individual attempt.
|
1216
1214
|
:param metadata: Additional metadata that is provided to the method.
|
1217
1215
|
"""
|
1218
1216
|
client = self.get_cluster_client(region=region)
|
1219
|
-
result = client.delete_cluster(
|
1217
|
+
result = await client.delete_cluster(
|
1220
1218
|
request={
|
1221
1219
|
"project_id": project_id,
|
1222
1220
|
"region": region,
|
@@ -1239,18 +1237,19 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1239
1237
|
retry: Retry | _MethodDefault = DEFAULT,
|
1240
1238
|
timeout: float | None = None,
|
1241
1239
|
metadata: Sequence[tuple[str, str]] = (),
|
1242
|
-
):
|
1243
|
-
"""
|
1244
|
-
|
1245
|
-
diagnose is returned.
|
1246
|
-
|
1247
|
-
:param project_id:
|
1248
|
-
:param region:
|
1249
|
-
:param cluster_name:
|
1250
|
-
:param retry: A retry object used to retry requests. If
|
1251
|
-
retried.
|
1252
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1253
|
-
|
1240
|
+
) -> str:
|
1241
|
+
"""Get cluster diagnostic information.
|
1242
|
+
|
1243
|
+
After the operation completes, the GCS URI to diagnose is returned.
|
1244
|
+
|
1245
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1246
|
+
:param region: Cloud Dataproc region in which to handle the request.
|
1247
|
+
:param cluster_name: Name of the cluster.
|
1248
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1249
|
+
will not be retried.
|
1250
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1251
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1252
|
+
individual attempt.
|
1254
1253
|
:param metadata: Additional metadata that is provided to the method.
|
1255
1254
|
"""
|
1256
1255
|
client = self.get_cluster_client(region=region)
|
@@ -1273,17 +1272,17 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1273
1272
|
retry: Retry | _MethodDefault = DEFAULT,
|
1274
1273
|
timeout: float | None = None,
|
1275
1274
|
metadata: Sequence[tuple[str, str]] = (),
|
1276
|
-
):
|
1277
|
-
"""
|
1278
|
-
|
1279
|
-
|
1280
|
-
:param
|
1281
|
-
:param
|
1282
|
-
:param
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1275
|
+
) -> Cluster:
|
1276
|
+
"""Get the resource representation for a cluster in a project.
|
1277
|
+
|
1278
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1279
|
+
:param region: Cloud Dataproc region to handle the request.
|
1280
|
+
:param cluster_name: The cluster name.
|
1281
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1282
|
+
will not be retried.
|
1283
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1284
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1285
|
+
individual attempt.
|
1287
1286
|
:param metadata: Additional metadata that is provided to the method.
|
1288
1287
|
"""
|
1289
1288
|
client = self.get_cluster_client(region=region)
|
@@ -1306,19 +1305,21 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1306
1305
|
timeout: float | None = None,
|
1307
1306
|
metadata: Sequence[tuple[str, str]] = (),
|
1308
1307
|
):
|
1309
|
-
"""
|
1310
|
-
|
1311
|
-
|
1312
|
-
:param
|
1313
|
-
:param
|
1314
|
-
:param
|
1315
|
-
|
1316
|
-
|
1317
|
-
streaming is performed per-page, this determines the maximum
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1308
|
+
"""List all regions/{region}/clusters in a project.
|
1309
|
+
|
1310
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1311
|
+
:param region: Cloud Dataproc region to handle the request.
|
1312
|
+
:param filter_: To constrain the clusters to. Case-sensitive.
|
1313
|
+
:param page_size: The maximum number of resources contained in the
|
1314
|
+
underlying API response. If page streaming is performed
|
1315
|
+
per-resource, this parameter does not affect the return value. If
|
1316
|
+
page streaming is performed per-page, this determines the maximum
|
1317
|
+
number of resources in a page.
|
1318
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1319
|
+
will not be retried.
|
1320
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1321
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1322
|
+
individual attempt.
|
1322
1323
|
:param metadata: Additional metadata that is provided to the method.
|
1323
1324
|
"""
|
1324
1325
|
client = self.get_cluster_client(region=region)
|
@@ -1343,53 +1344,56 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1343
1344
|
retry: Retry | _MethodDefault = DEFAULT,
|
1344
1345
|
timeout: float | None = None,
|
1345
1346
|
metadata: Sequence[tuple[str, str]] = (),
|
1346
|
-
):
|
1347
|
-
"""
|
1348
|
-
|
1349
|
-
|
1350
|
-
:param
|
1351
|
-
:param
|
1352
|
-
:param
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
|
1358
|
-
|
1359
|
-
|
1360
|
-
|
1361
|
-
|
1362
|
-
|
1363
|
-
|
1364
|
-
|
1365
|
-
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
(and potentially interrupting jobs). Default timeout
|
1380
|
-
maximum allowed timeout is
|
1347
|
+
) -> AsyncOperation:
|
1348
|
+
"""Update a cluster in a project.
|
1349
|
+
|
1350
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1351
|
+
:param region: Cloud Dataproc region to handle the request.
|
1352
|
+
:param cluster_name: The cluster name.
|
1353
|
+
:param cluster: Changes to the cluster. If a dict is provided, it must
|
1354
|
+
be of the same form as the protobuf message
|
1355
|
+
:class:`~google.cloud.dataproc_v1.types.Cluster`.
|
1356
|
+
:param update_mask: Specifies the path, relative to ``Cluster``, of the
|
1357
|
+
field to update. For example, to change the number of workers in a
|
1358
|
+
cluster to 5, this would be specified as
|
1359
|
+
``config.worker_config.num_instances``, and the ``PATCH`` request
|
1360
|
+
body would specify the new value:
|
1361
|
+
|
1362
|
+
.. code-block:: python
|
1363
|
+
|
1364
|
+
{"config": {"workerConfig": {"numInstances": "5"}}}
|
1365
|
+
|
1366
|
+
Similarly, to change the number of preemptible workers in a cluster
|
1367
|
+
to 5, this would be ``config.secondary_worker_config.num_instances``
|
1368
|
+
and the ``PATCH`` request body would be:
|
1369
|
+
|
1370
|
+
.. code-block:: python
|
1371
|
+
|
1372
|
+
{"config": {"secondaryWorkerConfig": {"numInstances": "5"}}}
|
1373
|
+
|
1374
|
+
If a dict is provided, it must be of the same form as the protobuf
|
1375
|
+
message :class:`~google.cloud.dataproc_v1.types.FieldMask`.
|
1376
|
+
:param graceful_decommission_timeout: Timeout for graceful YARN
|
1377
|
+
decommissioning. Graceful decommissioning allows removing nodes from
|
1378
|
+
the cluster without interrupting jobs in progress. Timeout specifies
|
1379
|
+
how long to wait for jobs in progress to finish before forcefully
|
1380
|
+
removing nodes (and potentially interrupting jobs). Default timeout
|
1381
|
+
is 0 (for forceful decommission), and the maximum allowed timeout is
|
1382
|
+
one day.
|
1381
1383
|
|
1382
1384
|
Only supported on Dataproc image versions 1.2 and higher.
|
1383
1385
|
|
1384
|
-
If a dict is provided, it must be of the same form as the protobuf
|
1385
|
-
:class:`~google.cloud.dataproc_v1.types.Duration
|
1386
|
-
:param request_id:
|
1387
|
-
|
1388
|
-
the
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1386
|
+
If a dict is provided, it must be of the same form as the protobuf
|
1387
|
+
message :class:`~google.cloud.dataproc_v1.types.Duration`.
|
1388
|
+
:param request_id: A unique id used to identify the request. If the
|
1389
|
+
server receives two *UpdateClusterRequest* requests with the same
|
1390
|
+
ID, the second request will be ignored, and an operation created
|
1391
|
+
for the first one and stored in the backend is returned.
|
1392
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1393
|
+
will not be retried.
|
1394
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1395
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1396
|
+
individual attempt.
|
1393
1397
|
:param metadata: Additional metadata that is provided to the method.
|
1394
1398
|
"""
|
1395
1399
|
if region is None:
|
@@ -1421,17 +1425,18 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1421
1425
|
timeout: float | None = None,
|
1422
1426
|
metadata: Sequence[tuple[str, str]] = (),
|
1423
1427
|
) -> WorkflowTemplate:
|
1424
|
-
"""
|
1425
|
-
|
1426
|
-
|
1427
|
-
:param
|
1428
|
-
:param
|
1429
|
-
|
1430
|
-
|
1431
|
-
:param retry: A retry object used to retry requests. If
|
1432
|
-
retried.
|
1433
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1434
|
-
|
1428
|
+
"""Create a new workflow template.
|
1429
|
+
|
1430
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1431
|
+
:param region: Cloud Dataproc region to handle the request.
|
1432
|
+
:param template: The Dataproc workflow template to create. If a dict is
|
1433
|
+
provided, it must be of the same form as the protobuf message
|
1434
|
+
WorkflowTemplate.
|
1435
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1436
|
+
will not be retried.
|
1437
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1438
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1439
|
+
individual attempt.
|
1435
1440
|
:param metadata: Additional metadata that is provided to the method.
|
1436
1441
|
"""
|
1437
1442
|
if region is None:
|
@@ -1455,27 +1460,27 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1455
1460
|
retry: Retry | _MethodDefault = DEFAULT,
|
1456
1461
|
timeout: float | None = None,
|
1457
1462
|
metadata: Sequence[tuple[str, str]] = (),
|
1458
|
-
):
|
1459
|
-
"""
|
1460
|
-
Instantiates a template and begins execution.
|
1463
|
+
) -> AsyncOperation:
|
1464
|
+
"""Instantiate a template and begins execution.
|
1461
1465
|
|
1462
1466
|
:param template_name: Name of template to instantiate.
|
1463
|
-
:param project_id:
|
1464
|
-
:param region:
|
1465
|
-
:param version:
|
1466
|
-
the workflow will be instantiated only if the current
|
1467
|
-
the workflow template has the supplied version.
|
1468
|
-
|
1469
|
-
|
1470
|
-
:param request_id:
|
1471
|
-
with the same tag from running. This mitigates risk of
|
1472
|
-
instances started due to retries.
|
1473
|
-
:param parameters:
|
1474
|
-
parameters. Values may not exceed 100 characters.
|
1475
|
-
:param retry: A retry object used to retry requests. If
|
1476
|
-
retried.
|
1477
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1478
|
-
|
1467
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1468
|
+
:param region: Cloud Dataproc region to handle the request.
|
1469
|
+
:param version: Version of workflow template to instantiate. If
|
1470
|
+
specified, the workflow will be instantiated only if the current
|
1471
|
+
version of the workflow template has the supplied version. This
|
1472
|
+
option cannot be used to instantiate a previous version of workflow
|
1473
|
+
template.
|
1474
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
1475
|
+
instances with the same tag from running. This mitigates risk of
|
1476
|
+
concurrent instances started due to retries.
|
1477
|
+
:param parameters: Map from parameter names to values that should be
|
1478
|
+
used for those parameters. Values may not exceed 100 characters.
|
1479
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1480
|
+
will not be retried.
|
1481
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1482
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1483
|
+
individual attempt.
|
1479
1484
|
:param metadata: Additional metadata that is provided to the method.
|
1480
1485
|
"""
|
1481
1486
|
if region is None:
|
@@ -1501,21 +1506,22 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1501
1506
|
retry: Retry | _MethodDefault = DEFAULT,
|
1502
1507
|
timeout: float | None = None,
|
1503
1508
|
metadata: Sequence[tuple[str, str]] = (),
|
1504
|
-
):
|
1505
|
-
"""
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
:param project_id:
|
1511
|
-
:param region:
|
1512
|
-
:param request_id:
|
1513
|
-
with the same tag from running. This mitigates risk of
|
1514
|
-
instances started due to retries.
|
1515
|
-
:param retry: A retry object used to retry requests. If
|
1516
|
-
retried.
|
1517
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1518
|
-
|
1509
|
+
) -> AsyncOperation:
|
1510
|
+
"""Instantiate a template and begin execution.
|
1511
|
+
|
1512
|
+
:param template: The workflow template to instantiate. If a dict is
|
1513
|
+
provided, it must be of the same form as the protobuf message
|
1514
|
+
WorkflowTemplate.
|
1515
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1516
|
+
:param region: Cloud Dataproc region to handle the request.
|
1517
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
1518
|
+
instances with the same tag from running. This mitigates risk of
|
1519
|
+
concurrent instances started due to retries.
|
1520
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1521
|
+
will not be retried.
|
1522
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1523
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1524
|
+
individual attempt.
|
1519
1525
|
:param metadata: Additional metadata that is provided to the method.
|
1520
1526
|
"""
|
1521
1527
|
if region is None:
|
@@ -1544,16 +1550,16 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1544
1550
|
timeout: float | None = None,
|
1545
1551
|
metadata: Sequence[tuple[str, str]] = (),
|
1546
1552
|
) -> Job:
|
1547
|
-
"""
|
1548
|
-
|
1549
|
-
|
1550
|
-
:param
|
1551
|
-
:param
|
1552
|
-
:param
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1553
|
+
"""Get the resource representation for a job in a project.
|
1554
|
+
|
1555
|
+
:param job_id: Dataproc job ID.
|
1556
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1557
|
+
:param region: Cloud Dataproc region to handle the request.
|
1558
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1559
|
+
will not be retried.
|
1560
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1561
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1562
|
+
individual attempt.
|
1557
1563
|
:param metadata: Additional metadata that is provided to the method.
|
1558
1564
|
"""
|
1559
1565
|
if region is None:
|
@@ -1578,20 +1584,20 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1578
1584
|
timeout: float | None = None,
|
1579
1585
|
metadata: Sequence[tuple[str, str]] = (),
|
1580
1586
|
) -> Job:
|
1581
|
-
"""
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
:param
|
1587
|
-
:param
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1587
|
+
"""Submit a job to a cluster.
|
1588
|
+
|
1589
|
+
:param job: The job resource. If a dict is provided, it must be of the
|
1590
|
+
same form as the protobuf message Job.
|
1591
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1592
|
+
:param region: Cloud Dataproc region to handle the request.
|
1593
|
+
:param request_id: A tag that prevents multiple concurrent workflow
|
1594
|
+
instances with the same tag from running. This mitigates risk of
|
1595
|
+
concurrent instances started due to retries.
|
1596
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1597
|
+
will not be retried.
|
1598
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1599
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1600
|
+
individual attempt.
|
1595
1601
|
:param metadata: Additional metadata that is provided to the method.
|
1596
1602
|
"""
|
1597
1603
|
if region is None:
|
@@ -1614,16 +1620,16 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1614
1620
|
timeout: float | None = None,
|
1615
1621
|
metadata: Sequence[tuple[str, str]] = (),
|
1616
1622
|
) -> Job:
|
1617
|
-
"""
|
1618
|
-
|
1619
|
-
|
1620
|
-
:param
|
1621
|
-
:param
|
1622
|
-
:param
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1623
|
+
"""Start a job cancellation request.
|
1624
|
+
|
1625
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1626
|
+
:param region: Cloud Dataproc region to handle the request.
|
1627
|
+
:param job_id: The job ID.
|
1628
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1629
|
+
will not be retried.
|
1630
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1631
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1632
|
+
individual attempt.
|
1627
1633
|
:param metadata: Additional metadata that is provided to the method.
|
1628
1634
|
"""
|
1629
1635
|
client = self.get_job_client(region=region)
|
@@ -1648,22 +1654,23 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1648
1654
|
timeout: float | None = None,
|
1649
1655
|
metadata: Sequence[tuple[str, str]] = (),
|
1650
1656
|
) -> AsyncOperation:
|
1651
|
-
"""
|
1652
|
-
|
1653
|
-
|
1654
|
-
:param
|
1655
|
-
:param
|
1656
|
-
:param
|
1657
|
-
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
the first
|
1663
|
-
:param retry: A retry object used to retry requests. If
|
1664
|
-
retried.
|
1665
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1666
|
-
|
1657
|
+
"""Create a batch workload.
|
1658
|
+
|
1659
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1660
|
+
:param region: Cloud Dataproc region to handle the request.
|
1661
|
+
:param batch: The batch to create.
|
1662
|
+
:param batch_id: The ID to use for the batch, which will become the
|
1663
|
+
final component of the batch's resource name. This value must be of
|
1664
|
+
4-63 characters. Valid characters are ``[a-z][0-9]-``.
|
1665
|
+
:param request_id: A unique id used to identify the request. If the
|
1666
|
+
server receives two *CreateBatchRequest* requests with the same
|
1667
|
+
ID, the second request will be ignored, and an operation created
|
1668
|
+
for the first one and stored in the backend is returned.
|
1669
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1670
|
+
will not be retried.
|
1671
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1672
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1673
|
+
individual attempt.
|
1667
1674
|
:param metadata: Additional metadata that is provided to the method.
|
1668
1675
|
"""
|
1669
1676
|
client = self.get_batch_client(region)
|
@@ -1692,22 +1699,20 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1692
1699
|
timeout: float | None = None,
|
1693
1700
|
metadata: Sequence[tuple[str, str]] = (),
|
1694
1701
|
) -> None:
|
1695
|
-
"""
|
1696
|
-
|
1697
|
-
|
1698
|
-
:param
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
:param
|
1703
|
-
|
1704
|
-
|
1705
|
-
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
1706
|
-
``retry`` is specified, the timeout applies to each individual attempt.
|
1702
|
+
"""Delete the batch workload resource.
|
1703
|
+
|
1704
|
+
:param batch_id: The batch ID.
|
1705
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1706
|
+
:param region: Cloud Dataproc region to handle the request.
|
1707
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1708
|
+
will not be retried.
|
1709
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1710
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1711
|
+
individual attempt.
|
1707
1712
|
:param metadata: Additional metadata that is provided to the method.
|
1708
1713
|
"""
|
1709
1714
|
client = self.get_batch_client(region)
|
1710
|
-
name = f"projects/{project_id}/
|
1715
|
+
name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
|
1711
1716
|
|
1712
1717
|
await client.delete_batch(
|
1713
1718
|
request={
|
@@ -1728,22 +1733,20 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1728
1733
|
timeout: float | None = None,
|
1729
1734
|
metadata: Sequence[tuple[str, str]] = (),
|
1730
1735
|
) -> Batch:
|
1731
|
-
"""
|
1732
|
-
|
1733
|
-
|
1734
|
-
:param
|
1735
|
-
|
1736
|
-
|
1737
|
-
|
1738
|
-
:param
|
1739
|
-
|
1740
|
-
|
1741
|
-
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
1742
|
-
``retry`` is specified, the timeout applies to each individual attempt.
|
1736
|
+
"""Get the batch workload resource representation.
|
1737
|
+
|
1738
|
+
:param batch_id: The batch ID.
|
1739
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1740
|
+
:param region: Cloud Dataproc region to handle the request.
|
1741
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1742
|
+
will not be retried.
|
1743
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1744
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1745
|
+
individual attempt.
|
1743
1746
|
:param metadata: Additional metadata that is provided to the method.
|
1744
1747
|
"""
|
1745
1748
|
client = self.get_batch_client(region)
|
1746
|
-
name = f"projects/{project_id}/
|
1749
|
+
name = f"projects/{project_id}/locations/{region}/batches/{batch_id}"
|
1747
1750
|
|
1748
1751
|
result = await client.get_batch(
|
1749
1752
|
request={
|
@@ -1766,19 +1769,20 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1766
1769
|
timeout: float | None = None,
|
1767
1770
|
metadata: Sequence[tuple[str, str]] = (),
|
1768
1771
|
):
|
1769
|
-
"""
|
1770
|
-
|
1771
|
-
|
1772
|
-
:param
|
1773
|
-
:param
|
1774
|
-
|
1775
|
-
|
1776
|
-
:param page_token:
|
1777
|
-
Provide this token to retrieve the subsequent page.
|
1778
|
-
:param retry: A retry object used to retry requests. If
|
1779
|
-
retried.
|
1780
|
-
:param timeout: The amount of time, in seconds, to wait for the request
|
1781
|
-
|
1772
|
+
"""List batch workloads.
|
1773
|
+
|
1774
|
+
:param project_id: Google Cloud project ID that the cluster belongs to.
|
1775
|
+
:param region: Cloud Dataproc region to handle the request.
|
1776
|
+
:param page_size: The maximum number of batches to return in each
|
1777
|
+
response. The service may return fewer than this value. The default
|
1778
|
+
page size is 20; the maximum page size is 1000.
|
1779
|
+
:param page_token: A page token received from a previous ``ListBatches``
|
1780
|
+
call. Provide this token to retrieve the subsequent page.
|
1781
|
+
:param retry: A retry object used to retry requests. If *None*, requests
|
1782
|
+
will not be retried.
|
1783
|
+
:param timeout: The amount of time, in seconds, to wait for the request
|
1784
|
+
to complete. If *retry* is specified, the timeout applies to each
|
1785
|
+
individual attempt.
|
1782
1786
|
:param metadata: Additional metadata that is provided to the method.
|
1783
1787
|
"""
|
1784
1788
|
client = self.get_batch_client(region)
|