apache-airflow-providers-google 10.10.0rc1__py3-none-any.whl → 10.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +4 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +131 -27
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +1 -9
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +121 -4
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +1 -11
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -10
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +220 -6
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +409 -0
- airflow/providers/google/cloud/links/vertex_ai.py +49 -0
- airflow/providers/google/cloud/operators/dataproc.py +32 -10
- airflow/providers/google/cloud/operators/gcs.py +1 -1
- airflow/providers/google/cloud/operators/mlengine.py +116 -0
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +45 -0
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +2 -8
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +287 -201
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +1 -9
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +2 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +451 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +464 -0
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +7 -1
- airflow/providers/google/get_provider_info.py +5 -0
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/METADATA +8 -8
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/RECORD +29 -27
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.10.0rc1.dist-info → apache_airflow_providers_google-10.10.1.dist-info}/top_level.txt +0 -0
@@ -15,13 +15,9 @@
|
|
15
15
|
# KIND, either express or implied. See the License for the
|
16
16
|
# specific language governing permissions and limitations
|
17
17
|
# under the License.
|
18
|
-
"""This module contains a Google Cloud Vertex AI hook.
|
19
18
|
|
20
|
-
|
19
|
+
"""This module contains a Google Cloud Vertex AI hook."""
|
21
20
|
|
22
|
-
aiplatform
|
23
|
-
camelCase
|
24
|
-
"""
|
25
21
|
from __future__ import annotations
|
26
22
|
|
27
23
|
from typing import TYPE_CHECKING, Sequence
|
@@ -36,7 +32,10 @@ from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
36
32
|
if TYPE_CHECKING:
|
37
33
|
from google.api_core.operation import Operation
|
38
34
|
from google.api_core.retry import Retry
|
39
|
-
from google.cloud.aiplatform_v1.services.model_service.pagers import
|
35
|
+
from google.cloud.aiplatform_v1.services.model_service.pagers import (
|
36
|
+
ListModelsPager,
|
37
|
+
ListModelVersionsPager,
|
38
|
+
)
|
40
39
|
from google.cloud.aiplatform_v1.types import Model, model_service
|
41
40
|
|
42
41
|
|
@@ -236,3 +235,218 @@ class ModelServiceHook(GoogleBaseHook):
|
|
236
235
|
metadata=metadata,
|
237
236
|
)
|
238
237
|
return result
|
238
|
+
|
239
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
240
|
+
def list_model_versions(
|
241
|
+
self,
|
242
|
+
region: str,
|
243
|
+
project_id: str,
|
244
|
+
model_id: str,
|
245
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
246
|
+
timeout: float | None = None,
|
247
|
+
metadata: Sequence[tuple[str, str]] = (),
|
248
|
+
) -> ListModelVersionsPager:
|
249
|
+
"""
|
250
|
+
Lists all versions of the existing Model.
|
251
|
+
|
252
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
253
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
254
|
+
:param model_id: Required. The ID of the Model to output versions for.
|
255
|
+
:param retry: Designation of what errors, if any, should be retried.
|
256
|
+
:param timeout: The timeout for this request.
|
257
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
258
|
+
"""
|
259
|
+
client = self.get_model_service_client(region)
|
260
|
+
name = client.model_path(project_id, region, model_id)
|
261
|
+
|
262
|
+
result = client.list_model_versions(
|
263
|
+
request={
|
264
|
+
"name": name,
|
265
|
+
},
|
266
|
+
retry=retry,
|
267
|
+
timeout=timeout,
|
268
|
+
metadata=metadata,
|
269
|
+
)
|
270
|
+
return result
|
271
|
+
|
272
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
273
|
+
def delete_model_version(
|
274
|
+
self,
|
275
|
+
region: str,
|
276
|
+
project_id: str,
|
277
|
+
model_id: str,
|
278
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
279
|
+
timeout: float | None = None,
|
280
|
+
metadata: Sequence[tuple[str, str]] = (),
|
281
|
+
) -> Operation:
|
282
|
+
"""
|
283
|
+
Deletes version of the Model. The version could not be deleted if this version is default.
|
284
|
+
|
285
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
286
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
287
|
+
:param model_id: Required. The ID of the Model in which to delete version.
|
288
|
+
:param retry: Designation of what errors, if any, should be retried.
|
289
|
+
:param timeout: The timeout for this request.
|
290
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
291
|
+
"""
|
292
|
+
client = self.get_model_service_client(region)
|
293
|
+
name = client.model_path(project_id, region, model_id)
|
294
|
+
|
295
|
+
result = client.delete_model_version(
|
296
|
+
request={
|
297
|
+
"name": name,
|
298
|
+
},
|
299
|
+
retry=retry,
|
300
|
+
timeout=timeout,
|
301
|
+
metadata=metadata,
|
302
|
+
)
|
303
|
+
return result
|
304
|
+
|
305
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
306
|
+
def get_model(
|
307
|
+
self,
|
308
|
+
region: str,
|
309
|
+
project_id: str,
|
310
|
+
model_id: str,
|
311
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
312
|
+
timeout: float | None = None,
|
313
|
+
metadata: Sequence[tuple[str, str]] = (),
|
314
|
+
) -> Model:
|
315
|
+
"""
|
316
|
+
Retrieves Model of specific name and version. If version is not specified, the default is retrieved.
|
317
|
+
|
318
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
319
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
320
|
+
:param model_id: Required. The ID of the Model to retrieve.
|
321
|
+
:param retry: Designation of what errors, if any, should be retried.
|
322
|
+
:param timeout: The timeout for this request.
|
323
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
324
|
+
"""
|
325
|
+
client = self.get_model_service_client(region)
|
326
|
+
name = client.model_path(project_id, region, model_id)
|
327
|
+
|
328
|
+
result = client.get_model(
|
329
|
+
request={
|
330
|
+
"name": name,
|
331
|
+
},
|
332
|
+
retry=retry,
|
333
|
+
timeout=timeout,
|
334
|
+
metadata=metadata,
|
335
|
+
)
|
336
|
+
return result
|
337
|
+
|
338
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
339
|
+
def set_version_as_default(
|
340
|
+
self,
|
341
|
+
region: str,
|
342
|
+
model_id: str,
|
343
|
+
project_id: str,
|
344
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
345
|
+
timeout: float | None = None,
|
346
|
+
metadata: Sequence[tuple[str, str]] = (),
|
347
|
+
) -> Model:
|
348
|
+
"""
|
349
|
+
Set current version of the Model as default.
|
350
|
+
|
351
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
352
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
353
|
+
:param model_id: Required. The ID of the Model to set as default.
|
354
|
+
:param retry: Designation of what errors, if any, should be retried.
|
355
|
+
:param timeout: The timeout for this request.
|
356
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
357
|
+
"""
|
358
|
+
client = self.get_model_service_client(region)
|
359
|
+
name = client.model_path(project_id, region, model_id)
|
360
|
+
|
361
|
+
result = client.merge_version_aliases(
|
362
|
+
request={
|
363
|
+
"name": name,
|
364
|
+
"version_aliases": ["default"],
|
365
|
+
},
|
366
|
+
retry=retry,
|
367
|
+
timeout=timeout,
|
368
|
+
metadata=metadata,
|
369
|
+
)
|
370
|
+
return result
|
371
|
+
|
372
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
373
|
+
def add_version_aliases(
|
374
|
+
self,
|
375
|
+
region: str,
|
376
|
+
model_id: str,
|
377
|
+
project_id: str,
|
378
|
+
version_aliases: Sequence[str],
|
379
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
380
|
+
timeout: float | None = None,
|
381
|
+
metadata: Sequence[tuple[str, str]] = (),
|
382
|
+
) -> Model:
|
383
|
+
"""
|
384
|
+
Add list of version aliases to specific version of Model.
|
385
|
+
|
386
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
387
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
388
|
+
:param model_id: Required. The ID of the Model to add aliases to.
|
389
|
+
:param version_aliases: Required. List of version aliases to be added for specific version.
|
390
|
+
:param retry: Designation of what errors, if any, should be retried.
|
391
|
+
:param timeout: The timeout for this request.
|
392
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
393
|
+
"""
|
394
|
+
client = self.get_model_service_client(region)
|
395
|
+
name = client.model_path(project_id, region, model_id)
|
396
|
+
|
397
|
+
for alias in version_aliases:
|
398
|
+
if alias.startswith("-"):
|
399
|
+
raise AirflowException("Name of the alias can't start with '-'")
|
400
|
+
|
401
|
+
result = client.merge_version_aliases(
|
402
|
+
request={
|
403
|
+
"name": name,
|
404
|
+
"version_aliases": version_aliases,
|
405
|
+
},
|
406
|
+
retry=retry,
|
407
|
+
timeout=timeout,
|
408
|
+
metadata=metadata,
|
409
|
+
)
|
410
|
+
return result
|
411
|
+
|
412
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
413
|
+
def delete_version_aliases(
|
414
|
+
self,
|
415
|
+
region: str,
|
416
|
+
model_id: str,
|
417
|
+
project_id: str,
|
418
|
+
version_aliases: Sequence[str],
|
419
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
420
|
+
timeout: float | None = None,
|
421
|
+
metadata: Sequence[tuple[str, str]] = (),
|
422
|
+
) -> Model:
|
423
|
+
"""
|
424
|
+
Delete list of version aliases of specific version of Model.
|
425
|
+
|
426
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
427
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
428
|
+
:param model_id: Required. The ID of the Model to delete aliases from.
|
429
|
+
:param version_aliases: Required. List of version aliases to be deleted from specific version.
|
430
|
+
:param retry: Designation of what errors, if any, should be retried.
|
431
|
+
:param timeout: The timeout for this request.
|
432
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
433
|
+
"""
|
434
|
+
client = self.get_model_service_client(region)
|
435
|
+
name = client.model_path(project_id, region, model_id)
|
436
|
+
if "default" in version_aliases:
|
437
|
+
raise AirflowException(
|
438
|
+
"Default alias can't be deleted. "
|
439
|
+
"Make sure to assign this alias to another version before deletion"
|
440
|
+
)
|
441
|
+
aliases_for_delete = ["-" + alias for alias in version_aliases]
|
442
|
+
|
443
|
+
result = client.merge_version_aliases(
|
444
|
+
request={
|
445
|
+
"name": name,
|
446
|
+
"version_aliases": aliases_for_delete,
|
447
|
+
},
|
448
|
+
retry=retry,
|
449
|
+
timeout=timeout,
|
450
|
+
metadata=metadata,
|
451
|
+
)
|
452
|
+
return result
|
@@ -0,0 +1,409 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
"""This module contains a Google Cloud Vertex AI hook.
|
19
|
+
|
20
|
+
.. spelling:word-list::
|
21
|
+
|
22
|
+
aiplatform
|
23
|
+
"""
|
24
|
+
from __future__ import annotations
|
25
|
+
|
26
|
+
from typing import TYPE_CHECKING, Any, Sequence
|
27
|
+
|
28
|
+
from google.api_core.client_options import ClientOptions
|
29
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
30
|
+
from google.cloud.aiplatform import PipelineJob
|
31
|
+
from google.cloud.aiplatform_v1 import PipelineServiceClient
|
32
|
+
|
33
|
+
from airflow.exceptions import AirflowException
|
34
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
35
|
+
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
36
|
+
|
37
|
+
if TYPE_CHECKING:
|
38
|
+
from google.api_core.operation import Operation
|
39
|
+
from google.api_core.retry import Retry
|
40
|
+
from google.cloud.aiplatform.metadata import experiment_resources
|
41
|
+
from google.cloud.aiplatform_v1.services.pipeline_service.pagers import ListPipelineJobsPager
|
42
|
+
|
43
|
+
|
44
|
+
class PipelineJobHook(GoogleBaseHook):
|
45
|
+
"""Hook for Google Cloud Vertex AI Pipeline Job APIs."""
|
46
|
+
|
47
|
+
def __init__(
|
48
|
+
self,
|
49
|
+
gcp_conn_id: str = "google_cloud_default",
|
50
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
51
|
+
**kwargs,
|
52
|
+
) -> None:
|
53
|
+
super().__init__(
|
54
|
+
gcp_conn_id=gcp_conn_id,
|
55
|
+
impersonation_chain=impersonation_chain,
|
56
|
+
)
|
57
|
+
self._pipeline_job: PipelineJob | None = None
|
58
|
+
|
59
|
+
def get_pipeline_service_client(
|
60
|
+
self,
|
61
|
+
region: str | None = None,
|
62
|
+
) -> PipelineServiceClient:
|
63
|
+
"""Returns PipelineServiceClient."""
|
64
|
+
if region and region != "global":
|
65
|
+
client_options = ClientOptions(api_endpoint=f"{region}-aiplatform.googleapis.com:443")
|
66
|
+
else:
|
67
|
+
client_options = ClientOptions()
|
68
|
+
return PipelineServiceClient(
|
69
|
+
credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
|
70
|
+
)
|
71
|
+
|
72
|
+
def get_pipeline_job_object(
|
73
|
+
self,
|
74
|
+
display_name: str,
|
75
|
+
template_path: str,
|
76
|
+
job_id: str | None = None,
|
77
|
+
pipeline_root: str | None = None,
|
78
|
+
parameter_values: dict[str, Any] | None = None,
|
79
|
+
input_artifacts: dict[str, str] | None = None,
|
80
|
+
enable_caching: bool | None = None,
|
81
|
+
encryption_spec_key_name: str | None = None,
|
82
|
+
labels: dict[str, str] | None = None,
|
83
|
+
project: str | None = None,
|
84
|
+
location: str | None = None,
|
85
|
+
failure_policy: str | None = None,
|
86
|
+
) -> PipelineJob:
|
87
|
+
"""Returns PipelineJob object."""
|
88
|
+
return PipelineJob(
|
89
|
+
display_name=display_name,
|
90
|
+
template_path=template_path,
|
91
|
+
job_id=job_id,
|
92
|
+
pipeline_root=pipeline_root,
|
93
|
+
parameter_values=parameter_values,
|
94
|
+
input_artifacts=input_artifacts,
|
95
|
+
enable_caching=enable_caching,
|
96
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
97
|
+
labels=labels,
|
98
|
+
credentials=self.get_credentials(),
|
99
|
+
project=project,
|
100
|
+
location=location,
|
101
|
+
failure_policy=failure_policy,
|
102
|
+
)
|
103
|
+
|
104
|
+
@staticmethod
|
105
|
+
def extract_pipeline_job_id(obj: dict) -> str:
|
106
|
+
"""Returns unique id of the pipeline_job."""
|
107
|
+
return obj["name"].rpartition("/")[-1]
|
108
|
+
|
109
|
+
def wait_for_operation(self, operation: Operation, timeout: float | None = None):
|
110
|
+
"""Waits for long-lasting operation to complete."""
|
111
|
+
try:
|
112
|
+
return operation.result(timeout=timeout)
|
113
|
+
except Exception:
|
114
|
+
error = operation.exception(timeout=timeout)
|
115
|
+
raise AirflowException(error)
|
116
|
+
|
117
|
+
def cancel_pipeline_job(self) -> None:
|
118
|
+
"""Cancel PipelineJob."""
|
119
|
+
if self._pipeline_job:
|
120
|
+
self._pipeline_job.cancel()
|
121
|
+
|
122
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
123
|
+
def create_pipeline_job(
|
124
|
+
self,
|
125
|
+
project_id: str,
|
126
|
+
region: str,
|
127
|
+
pipeline_job: PipelineJob,
|
128
|
+
pipeline_job_id: str,
|
129
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
130
|
+
timeout: float | None = None,
|
131
|
+
metadata: Sequence[tuple[str, str]] = (),
|
132
|
+
) -> PipelineJob:
|
133
|
+
"""
|
134
|
+
Creates a PipelineJob. A PipelineJob will run immediately when created.
|
135
|
+
|
136
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
137
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
138
|
+
:param pipeline_job: Required. The PipelineJob to create.
|
139
|
+
:param pipeline_job_id: The ID to use for the PipelineJob, which will become the final component of
|
140
|
+
the PipelineJob name. If not provided, an ID will be automatically generated.
|
141
|
+
|
142
|
+
This value should be less than 128 characters, and valid characters are /[a-z][0-9]-/.
|
143
|
+
:param retry: Designation of what errors, if any, should be retried.
|
144
|
+
:param timeout: The timeout for this request.
|
145
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
146
|
+
"""
|
147
|
+
client = self.get_pipeline_service_client(region)
|
148
|
+
parent = client.common_location_path(project_id, region)
|
149
|
+
|
150
|
+
result = client.create_pipeline_job(
|
151
|
+
request={
|
152
|
+
"parent": parent,
|
153
|
+
"pipeline_job": pipeline_job,
|
154
|
+
"pipeline_job_id": pipeline_job_id,
|
155
|
+
},
|
156
|
+
retry=retry,
|
157
|
+
timeout=timeout,
|
158
|
+
metadata=metadata,
|
159
|
+
)
|
160
|
+
return result
|
161
|
+
|
162
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
163
|
+
def run_pipeline_job(
|
164
|
+
self,
|
165
|
+
project_id: str,
|
166
|
+
region: str,
|
167
|
+
display_name: str,
|
168
|
+
template_path: str,
|
169
|
+
job_id: str | None = None,
|
170
|
+
pipeline_root: str | None = None,
|
171
|
+
parameter_values: dict[str, Any] | None = None,
|
172
|
+
input_artifacts: dict[str, str] | None = None,
|
173
|
+
enable_caching: bool | None = None,
|
174
|
+
encryption_spec_key_name: str | None = None,
|
175
|
+
labels: dict[str, str] | None = None,
|
176
|
+
failure_policy: str | None = None,
|
177
|
+
# START: run param
|
178
|
+
service_account: str | None = None,
|
179
|
+
network: str | None = None,
|
180
|
+
create_request_timeout: float | None = None,
|
181
|
+
experiment: str | experiment_resources.Experiment | None = None,
|
182
|
+
# END: run param
|
183
|
+
) -> PipelineJob:
|
184
|
+
"""
|
185
|
+
Run PipelineJob and monitor the job until completion.
|
186
|
+
|
187
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
188
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
189
|
+
:param display_name: Required. The user-defined name of this Pipeline.
|
190
|
+
:param template_path: Required. The path of PipelineJob or PipelineSpec JSON or YAML file. It can be
|
191
|
+
a local path, a Google Cloud Storage URI (e.g. "gs://project.name"), an Artifact Registry URI
|
192
|
+
(e.g. "https://us-central1-kfp.pkg.dev/proj/repo/pack/latest"), or an HTTPS URI.
|
193
|
+
:param job_id: Optional. The unique ID of the job run. If not specified, pipeline name + timestamp
|
194
|
+
will be used.
|
195
|
+
:param pipeline_root: Optional. The root of the pipeline outputs. If not set, the staging bucket set
|
196
|
+
in aiplatform.init will be used. If that's not set a pipeline-specific artifacts bucket will be
|
197
|
+
used.
|
198
|
+
:param parameter_values: Optional. The mapping from runtime parameter names to its values that
|
199
|
+
control the pipeline run.
|
200
|
+
:param input_artifacts: Optional. The mapping from the runtime parameter name for this artifact to
|
201
|
+
its resource id. For example: "vertex_model":"456". Note: full resource name
|
202
|
+
("projects/123/locations/us-central1/metadataStores/default/artifacts/456") cannot be used.
|
203
|
+
:param enable_caching: Optional. Whether to turn on caching for the run.
|
204
|
+
If this is not set, defaults to the compile time settings, which are True for all tasks by
|
205
|
+
default, while users may specify different caching options for individual tasks.
|
206
|
+
If this is set, the setting applies to all tasks in the pipeline. Overrides the compile time
|
207
|
+
settings.
|
208
|
+
:param encryption_spec_key_name: Optional. The Cloud KMS resource identifier of the customer managed
|
209
|
+
encryption key used to protect the job. Has the form:
|
210
|
+
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
|
211
|
+
The key needs to be in the same region as where the compute resource is created. If this is set,
|
212
|
+
then all resources created by the PipelineJob will be encrypted with the provided encryption key.
|
213
|
+
Overrides encryption_spec_key_name set in aiplatform.init.
|
214
|
+
:param labels: Optional. The user defined metadata to organize PipelineJob.
|
215
|
+
:param failure_policy: Optional. The failure policy - "slow" or "fast". Currently, the default of a
|
216
|
+
pipeline is that the pipeline will continue to run until no more tasks can be executed, also
|
217
|
+
known as PIPELINE_FAILURE_POLICY_FAIL_SLOW (corresponds to "slow"). However, if a pipeline is set
|
218
|
+
to PIPELINE_FAILURE_POLICY_FAIL_FAST (corresponds to "fast"), it will stop scheduling any new
|
219
|
+
tasks when a task has failed. Any scheduled tasks will continue to completion.
|
220
|
+
:param service_account: Optional. Specifies the service account for workload run-as account. Users
|
221
|
+
submitting jobs must have act-as permission on this run-as account.
|
222
|
+
:param network: Optional. The full name of the Compute Engine network to which the job should be
|
223
|
+
peered. For example, projects/12345/global/networks/myVPC.
|
224
|
+
Private services access must already be configured for the network. If left unspecified, the
|
225
|
+
network set in aiplatform.init will be used. Otherwise, the job is not peered with any network.
|
226
|
+
:param create_request_timeout: Optional. The timeout for the create request in seconds.
|
227
|
+
:param experiment: Optional. The Vertex AI experiment name or instance to associate to this
|
228
|
+
PipelineJob. Metrics produced by the PipelineJob as system.Metric Artifacts will be associated as
|
229
|
+
metrics to the current Experiment Run. Pipeline parameters will be associated as parameters to
|
230
|
+
the current Experiment Run.
|
231
|
+
"""
|
232
|
+
self._pipeline_job = self.get_pipeline_job_object(
|
233
|
+
display_name=display_name,
|
234
|
+
template_path=template_path,
|
235
|
+
job_id=job_id,
|
236
|
+
pipeline_root=pipeline_root,
|
237
|
+
parameter_values=parameter_values,
|
238
|
+
input_artifacts=input_artifacts,
|
239
|
+
enable_caching=enable_caching,
|
240
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
241
|
+
labels=labels,
|
242
|
+
project=project_id,
|
243
|
+
location=region,
|
244
|
+
failure_policy=failure_policy,
|
245
|
+
)
|
246
|
+
|
247
|
+
self._pipeline_job.submit(
|
248
|
+
service_account=service_account,
|
249
|
+
network=network,
|
250
|
+
create_request_timeout=create_request_timeout,
|
251
|
+
experiment=experiment,
|
252
|
+
)
|
253
|
+
|
254
|
+
self._pipeline_job.wait()
|
255
|
+
return self._pipeline_job
|
256
|
+
|
257
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
258
|
+
def get_pipeline_job(
|
259
|
+
self,
|
260
|
+
project_id: str,
|
261
|
+
region: str,
|
262
|
+
pipeline_job_id: str,
|
263
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
264
|
+
timeout: float | None = None,
|
265
|
+
metadata: Sequence[tuple[str, str]] = (),
|
266
|
+
) -> PipelineJob:
|
267
|
+
"""
|
268
|
+
Gets a PipelineJob.
|
269
|
+
|
270
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
271
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
272
|
+
:param pipeline_job_id: Required. The ID of the PipelineJob resource.
|
273
|
+
:param retry: Designation of what errors, if any, should be retried.
|
274
|
+
:param timeout: The timeout for this request.
|
275
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
276
|
+
"""
|
277
|
+
client = self.get_pipeline_service_client(region)
|
278
|
+
name = client.pipeline_job_path(project_id, region, pipeline_job_id)
|
279
|
+
|
280
|
+
result = client.get_pipeline_job(
|
281
|
+
request={
|
282
|
+
"name": name,
|
283
|
+
},
|
284
|
+
retry=retry,
|
285
|
+
timeout=timeout,
|
286
|
+
metadata=metadata,
|
287
|
+
)
|
288
|
+
return result
|
289
|
+
|
290
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
291
|
+
def list_pipeline_jobs(
|
292
|
+
self,
|
293
|
+
project_id: str,
|
294
|
+
region: str,
|
295
|
+
page_size: int | None = None,
|
296
|
+
page_token: str | None = None,
|
297
|
+
filter: str | None = None,
|
298
|
+
order_by: str | None = None,
|
299
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
300
|
+
timeout: float | None = None,
|
301
|
+
metadata: Sequence[tuple[str, str]] = (),
|
302
|
+
) -> ListPipelineJobsPager:
|
303
|
+
"""
|
304
|
+
Lists PipelineJobs in a Location.
|
305
|
+
|
306
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
307
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
308
|
+
:param filter: Optional. Lists the PipelineJobs that match the filter expression. The
|
309
|
+
following fields are supported:
|
310
|
+
|
311
|
+
- ``pipeline_name``: Supports ``=`` and ``!=`` comparisons.
|
312
|
+
- ``display_name``: Supports ``=``, ``!=`` comparisons, and
|
313
|
+
``:`` wildcard.
|
314
|
+
- ``pipeline_job_user_id``: Supports ``=``, ``!=``
|
315
|
+
comparisons, and ``:`` wildcard. for example, can check
|
316
|
+
if pipeline's display_name contains *step* by doing
|
317
|
+
display_name:"*step*"
|
318
|
+
- ``create_time``: Supports ``=``, ``!=``, ``<``, ``>``,
|
319
|
+
``<=``, and ``>=`` comparisons. Values must be in RFC
|
320
|
+
3339 format.
|
321
|
+
- ``update_time``: Supports ``=``, ``!=``, ``<``, ``>``,
|
322
|
+
``<=``, and ``>=`` comparisons. Values must be in RFC
|
323
|
+
3339 format.
|
324
|
+
- ``end_time``: Supports ``=``, ``!=``, ``<``, ``>``,
|
325
|
+
``<=``, and ``>=`` comparisons. Values must be in RFC
|
326
|
+
3339 format.
|
327
|
+
- ``labels``: Supports key-value equality and key presence.
|
328
|
+
|
329
|
+
Filter expressions can be combined together using logical
|
330
|
+
operators (``AND`` & ``OR``). For example:
|
331
|
+
``pipeline_name="test" AND create_time>"2020-05-18T13:30:00Z"``.
|
332
|
+
|
333
|
+
The syntax to define filter expression is based on
|
334
|
+
https://google.aip.dev/160.
|
335
|
+
:param page_size: Optional. The standard list page size.
|
336
|
+
:param page_token: Optional. The standard list page token. Typically obtained via
|
337
|
+
[ListPipelineJobsResponse.next_page_token][google.cloud.aiplatform.v1.ListPipelineJobsResponse.next_page_token]
|
338
|
+
of the previous
|
339
|
+
[PipelineService.ListPipelineJobs][google.cloud.aiplatform.v1.PipelineService.ListPipelineJobs]
|
340
|
+
call.
|
341
|
+
:param order_by: Optional. A comma-separated list of fields to order by. The default
|
342
|
+
sort order is in ascending order. Use "desc" after a field
|
343
|
+
name for descending. You can have multiple order_by fields
|
344
|
+
provided e.g. "create_time desc, end_time", "end_time,
|
345
|
+
start_time, update_time" For example, using "create_time
|
346
|
+
desc, end_time" will order results by create time in
|
347
|
+
descending order, and if there are multiple jobs having the
|
348
|
+
same create time, order them by the end time in ascending
|
349
|
+
order. if order_by is not specified, it will order by
|
350
|
+
default order is create time in descending order. Supported
|
351
|
+
fields:
|
352
|
+
|
353
|
+
- ``create_time``
|
354
|
+
- ``update_time``
|
355
|
+
- ``end_time``
|
356
|
+
- ``start_time``
|
357
|
+
:param retry: Designation of what errors, if any, should be retried.
|
358
|
+
:param timeout: The timeout for this request.
|
359
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
360
|
+
"""
|
361
|
+
client = self.get_pipeline_service_client(region)
|
362
|
+
parent = client.common_location_path(project_id, region)
|
363
|
+
|
364
|
+
result = client.list_pipeline_jobs(
|
365
|
+
request={
|
366
|
+
"parent": parent,
|
367
|
+
"page_size": page_size,
|
368
|
+
"page_token": page_token,
|
369
|
+
"filter": filter,
|
370
|
+
"order_by": order_by,
|
371
|
+
},
|
372
|
+
retry=retry,
|
373
|
+
timeout=timeout,
|
374
|
+
metadata=metadata,
|
375
|
+
)
|
376
|
+
return result
|
377
|
+
|
378
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
379
|
+
def delete_pipeline_job(
|
380
|
+
self,
|
381
|
+
project_id: str,
|
382
|
+
region: str,
|
383
|
+
pipeline_job_id: str,
|
384
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
385
|
+
timeout: float | None = None,
|
386
|
+
metadata: Sequence[tuple[str, str]] = (),
|
387
|
+
) -> Operation:
|
388
|
+
"""
|
389
|
+
Deletes a PipelineJob.
|
390
|
+
|
391
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
392
|
+
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
393
|
+
:param pipeline_job_id: Required. The ID of the PipelineJob resource to be deleted.
|
394
|
+
:param retry: Designation of what errors, if any, should be retried.
|
395
|
+
:param timeout: The timeout for this request.
|
396
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
397
|
+
"""
|
398
|
+
client = self.get_pipeline_service_client(region)
|
399
|
+
name = client.pipeline_job_path(project_id, region, pipeline_job_id)
|
400
|
+
|
401
|
+
result = client.delete_pipeline_job(
|
402
|
+
request={
|
403
|
+
"name": name,
|
404
|
+
},
|
405
|
+
retry=retry,
|
406
|
+
timeout=timeout,
|
407
|
+
metadata=metadata,
|
408
|
+
)
|
409
|
+
return result
|