google-cloud-pipeline-components 2.13.1__py3-none-any.whl → 2.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of google-cloud-pipeline-components might be problematic. Click here for more details.
- google_cloud_pipeline_components/__init__.py +5 -6
- google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +4 -10
- google_cloud_pipeline_components/_implementation/llm/env.py +1 -1
- google_cloud_pipeline_components/_implementation/llm/function_based.py +14 -48
- google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
- google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py +27 -36
- google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +26 -41
- google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +60 -0
- google_cloud_pipeline_components/_implementation/llm/validate_pipeline.py +11 -0
- google_cloud_pipeline_components/_placeholders.py +30 -1
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +1 -1
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +2 -2
- google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +2 -2
- google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +34 -34
- google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +34 -34
- google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +34 -34
- google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +34 -34
- google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +1 -1
- google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +39 -39
- google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +41 -41
- google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +4 -4
- google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +3 -3
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +17 -17
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +15 -15
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +16 -16
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +2 -2
- google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +15 -15
- google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +14 -14
- google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +13 -13
- google_cloud_pipeline_components/preview/automl/vision/data_converter.py +3 -1
- google_cloud_pipeline_components/preview/custom_job/component.py +2 -2
- google_cloud_pipeline_components/preview/custom_job/utils.py +3 -2
- google_cloud_pipeline_components/preview/llm/rlhf/component.py +60 -8
- google_cloud_pipeline_components/preview/model_evaluation/__init__.py +1 -1
- google_cloud_pipeline_components/proto/template_metadata_pb2.py +22 -15
- google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +10 -10
- google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +31 -31
- google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +3 -3
- google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +14 -14
- google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +37 -37
- google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +2 -2
- google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +1 -1
- google_cloud_pipeline_components/v1/automl/tabular/transform.py +2 -2
- google_cloud_pipeline_components/v1/model_evaluation/__init__.py +3 -1
- google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +2 -2
- google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/__init__.py +2 -2
- google_cloud_pipeline_components/version.py +1 -1
- {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/METADATA +18 -19
- {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/RECORD +65 -66
- {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/WHEEL +1 -1
- google_cloud_pipeline_components/proto/preflight_validations_pb2.py +0 -47
- /google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/__init__.py +0 -0
- /google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +0 -0
- {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/LICENSE +0 -0
- {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/top_level.txt +0 -0
|
@@ -2844,7 +2844,7 @@ deploymentSpec:
|
|
|
2844
2844
|
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
|
|
2845
2845
|
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
|
|
2846
2846
|
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
|
|
2847
|
-
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:
|
|
2847
|
+
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240419_0625", "\",
|
|
2848
2848
|
\"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}",
|
|
2849
2849
|
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=",
|
|
2850
2850
|
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}'
|
|
@@ -2875,7 +2875,7 @@ deploymentSpec:
|
|
|
2875
2875
|
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\
|
|
2876
2876
|
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\
|
|
2877
2877
|
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n"
|
|
2878
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
2878
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
2879
2879
|
exec-feature-transform-engine:
|
|
2880
2880
|
container:
|
|
2881
2881
|
args:
|
|
@@ -2960,8 +2960,8 @@ deploymentSpec:
|
|
|
2960
2960
|
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}'
|
|
2961
2961
|
- '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}'
|
|
2962
2962
|
- '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}'
|
|
2963
|
-
- --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:
|
|
2964
|
-
- --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:
|
|
2963
|
+
- --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
|
|
2964
|
+
- --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
|
|
2965
2965
|
- '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}'
|
|
2966
2966
|
- '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}'
|
|
2967
2967
|
- '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}'
|
|
@@ -2978,7 +2978,7 @@ deploymentSpec:
|
|
|
2978
2978
|
- '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
|
|
2979
2979
|
["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
|
|
2980
2980
|
- '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}'
|
|
2981
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:
|
|
2981
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
|
|
2982
2982
|
resources:
|
|
2983
2983
|
cpuLimit: 8.0
|
|
2984
2984
|
memoryLimit: 30.0
|
|
@@ -3098,10 +3098,10 @@ deploymentSpec:
|
|
|
3098
3098
|
\ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\
|
|
3099
3099
|
\ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\
|
|
3100
3100
|
\ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\
|
|
3101
|
-
\ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:
|
|
3101
|
+
\ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20240419_0625'\n\
|
|
3102
3102
|
\ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\
|
|
3103
3103
|
\ {\n 'machine_type': machine_type,\n },\n 'container_spec':\
|
|
3104
|
-
\ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:
|
|
3104
|
+
\ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20240419_0625',\n\
|
|
3105
3105
|
\ 'args': [\n f'--job_dir={formatted_job_dir}',\n\
|
|
3106
3106
|
\ f'--target_column={target_column}',\n f'--objective={objective}',\n\
|
|
3107
3107
|
\ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\
|
|
@@ -3159,7 +3159,7 @@ deploymentSpec:
|
|
|
3159
3159
|
\ 'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\
|
|
3160
3160
|
\ }\n unmanaged_container_model.uri = model_dir\n\n return collections.namedtuple('Outputs',\
|
|
3161
3161
|
\ ['worker_pool_specs'])(\n worker_pool_specs_lst\n )\n\n"
|
|
3162
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
3162
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
3163
3163
|
exec-get-model-display-name:
|
|
3164
3164
|
container:
|
|
3165
3165
|
args:
|
|
@@ -3186,7 +3186,7 @@ deploymentSpec:
|
|
|
3186
3186
|
\n return collections.namedtuple(\n 'Outputs',\n [\n \
|
|
3187
3187
|
\ 'model_display_name',\n ],\n )(\n model_display_name,\n )\n\
|
|
3188
3188
|
\n"
|
|
3189
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
3189
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
3190
3190
|
exec-get-prediction-type-for-xgboost:
|
|
3191
3191
|
container:
|
|
3192
3192
|
args:
|
|
@@ -3215,7 +3215,7 @@ deploymentSpec:
|
|
|
3215
3215
|
\ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\
|
|
3216
3216
|
\ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\
|
|
3217
3217
|
\ ' multi:softprob].'\n )\n\n"
|
|
3218
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
3218
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
3219
3219
|
exec-model-batch-predict:
|
|
3220
3220
|
container:
|
|
3221
3221
|
args:
|
|
@@ -3407,7 +3407,7 @@ deploymentSpec:
|
|
|
3407
3407
|
\ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\
|
|
3408
3408
|
\ ],\n )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\
|
|
3409
3409
|
\ )\n\n"
|
|
3410
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
3410
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
3411
3411
|
exec-split-materialized-data:
|
|
3412
3412
|
container:
|
|
3413
3413
|
args:
|
|
@@ -3453,7 +3453,7 @@ deploymentSpec:
|
|
|
3453
3453
|
\ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\
|
|
3454
3454
|
\ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\
|
|
3455
3455
|
\ 'w') as f:\n f.write(file_patterns[2])\n\n"
|
|
3456
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:
|
|
3456
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
|
|
3457
3457
|
exec-training-configurator-and-validator:
|
|
3458
3458
|
container:
|
|
3459
3459
|
args:
|
|
@@ -3498,7 +3498,7 @@ deploymentSpec:
|
|
|
3498
3498
|
["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}'
|
|
3499
3499
|
- '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
|
|
3500
3500
|
["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
|
|
3501
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:
|
|
3501
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
|
|
3502
3502
|
exec-xgboost-trainer:
|
|
3503
3503
|
container:
|
|
3504
3504
|
args:
|
|
@@ -22,6 +22,7 @@ from google_cloud_pipeline_components.preview.automl.vision.json_utils import Co
|
|
|
22
22
|
from kfp import dsl
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
# pylint: disable=singleton-comparison
|
|
25
26
|
# pylint: disable=g-doc-args
|
|
26
27
|
@dsl.container_component
|
|
27
28
|
def data_converter(
|
|
@@ -31,6 +32,7 @@ def data_converter(
|
|
|
31
32
|
objective: str,
|
|
32
33
|
output_dir: dsl.Output[dsl.Artifact],
|
|
33
34
|
gcp_resources: dsl.OutputPath(str),
|
|
35
|
+
enable_input_validation: bool = True,
|
|
34
36
|
location: str = 'us-central1',
|
|
35
37
|
timeout: str = '604800s',
|
|
36
38
|
service_account: Optional[str] = None,
|
|
@@ -75,7 +77,7 @@ def data_converter(
|
|
|
75
77
|
'image_uri': 'us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter',
|
|
76
78
|
'args': [
|
|
77
79
|
'--enable_input_validation',
|
|
78
|
-
|
|
80
|
+
str(enable_input_validation),
|
|
79
81
|
'--input_file_path',
|
|
80
82
|
input_file_path,
|
|
81
83
|
'--input_file_type',
|
|
@@ -37,7 +37,7 @@ def custom_training_job(
|
|
|
37
37
|
base_output_directory: str = '',
|
|
38
38
|
labels: Dict[str, str] = {},
|
|
39
39
|
encryption_spec_key_name: str = '',
|
|
40
|
-
persistent_resource_id: str =
|
|
40
|
+
persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
|
|
41
41
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
42
42
|
):
|
|
43
43
|
# fmt: off
|
|
@@ -57,7 +57,7 @@ def custom_training_job(
|
|
|
57
57
|
base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
|
|
58
58
|
labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
|
|
59
59
|
encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key.
|
|
60
|
-
persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run.
|
|
60
|
+
persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
|
|
61
61
|
project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run.
|
|
62
62
|
Returns:
|
|
63
63
|
gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob.
|
|
@@ -18,6 +18,7 @@ import textwrap
|
|
|
18
18
|
from typing import Callable, Dict, List, Optional
|
|
19
19
|
import warnings
|
|
20
20
|
|
|
21
|
+
from google_cloud_pipeline_components import _placeholders
|
|
21
22
|
from google_cloud_pipeline_components.preview.custom_job import component
|
|
22
23
|
from kfp import components
|
|
23
24
|
import yaml
|
|
@@ -68,7 +69,7 @@ def create_custom_training_job_from_component(
|
|
|
68
69
|
nfs_mounts: Optional[List[Dict[str, str]]] = None,
|
|
69
70
|
base_output_directory: str = '',
|
|
70
71
|
labels: Optional[Dict[str, str]] = None,
|
|
71
|
-
persistent_resource_id: str =
|
|
72
|
+
persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
|
|
72
73
|
env: Optional[List[Dict[str, str]]] = None,
|
|
73
74
|
) -> Callable:
|
|
74
75
|
# fmt: off
|
|
@@ -96,7 +97,7 @@ def create_custom_training_job_from_component(
|
|
|
96
97
|
nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
|
|
97
98
|
base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
|
|
98
99
|
labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
|
|
99
|
-
persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run.
|
|
100
|
+
persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
|
|
100
101
|
env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
|
|
101
102
|
|
|
102
103
|
Returns:
|
|
@@ -21,6 +21,8 @@ from google_cloud_pipeline_components._implementation.llm import env
|
|
|
21
21
|
from google_cloud_pipeline_components._implementation.llm import function_based
|
|
22
22
|
from google_cloud_pipeline_components._implementation.llm import reinforcement_learning_graph
|
|
23
23
|
from google_cloud_pipeline_components._implementation.llm import reward_model_graph
|
|
24
|
+
from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
|
|
25
|
+
from google_cloud_pipeline_components._implementation.llm import utils
|
|
24
26
|
from google_cloud_pipeline_components._implementation.llm import validate_pipeline
|
|
25
27
|
from google_cloud_pipeline_components.preview.llm.infer import component
|
|
26
28
|
import kfp
|
|
@@ -94,11 +96,43 @@ def rlhf_pipeline(
|
|
|
94
96
|
eval_dataset=eval_dataset,
|
|
95
97
|
).set_display_name('Validate Inputs')
|
|
96
98
|
|
|
99
|
+
preprocess_metadata = rlhf_preprocessor.rlhf_preprocessor(
|
|
100
|
+
large_model_reference=large_model_reference,
|
|
101
|
+
accelerator_type=accelerator_type,
|
|
102
|
+
use_test_spec=env.get_use_test_machine_spec(),
|
|
103
|
+
project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
|
|
104
|
+
location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
|
|
105
|
+
artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
|
|
106
|
+
tag=env.get_private_image_tag(),
|
|
107
|
+
evaluation_dataset=eval_dataset,
|
|
108
|
+
tensorboard_resource_id=tensorboard_resource_id,
|
|
109
|
+
).set_display_name('Preprocess Inputs')
|
|
110
|
+
num_microbatches = preprocess_metadata.outputs['metadata_num_microbatches']
|
|
111
|
+
|
|
97
112
|
reward_model_pipeline = (
|
|
98
113
|
(
|
|
99
114
|
reward_model_graph.pipeline(
|
|
100
115
|
preference_dataset=preference_dataset,
|
|
101
116
|
large_model_reference=large_model_reference,
|
|
117
|
+
reward_model_reference=preprocess_metadata.outputs[
|
|
118
|
+
'metadata_reward_model_reference'
|
|
119
|
+
],
|
|
120
|
+
reward_model_path=preprocess_metadata.outputs[
|
|
121
|
+
'metadata_reward_model_path'
|
|
122
|
+
],
|
|
123
|
+
machine_type=preprocess_metadata.outputs['metadata_machine_type'],
|
|
124
|
+
tuning_location=preprocess_metadata.outputs[
|
|
125
|
+
'metadata_tuning_location'
|
|
126
|
+
],
|
|
127
|
+
accelerator_type=preprocess_metadata.outputs[
|
|
128
|
+
'metadata_accelerator_type'
|
|
129
|
+
],
|
|
130
|
+
accelerator_count=preprocess_metadata.outputs[
|
|
131
|
+
'metadata_accelerator_count'
|
|
132
|
+
],
|
|
133
|
+
reward_model_image_uri=preprocess_metadata.outputs[
|
|
134
|
+
'metadata_refined_image_uri'
|
|
135
|
+
],
|
|
102
136
|
prompt_sequence_length=prompt_sequence_length,
|
|
103
137
|
target_sequence_length=target_sequence_length,
|
|
104
138
|
eval_dataset=validate_pipeline_task.outputs[
|
|
@@ -110,9 +144,9 @@ def rlhf_pipeline(
|
|
|
110
144
|
lora_dim=reward_lora_dim,
|
|
111
145
|
project=project,
|
|
112
146
|
location=location,
|
|
113
|
-
accelerator_type=accelerator_type,
|
|
114
147
|
tensorboard_resource_id=tensorboard_resource_id,
|
|
115
148
|
encryption_spec_key_name=encryption_spec_key_name,
|
|
149
|
+
num_microbatches=num_microbatches,
|
|
116
150
|
)
|
|
117
151
|
)
|
|
118
152
|
.set_display_name('Train Reward Model')
|
|
@@ -120,8 +154,8 @@ def rlhf_pipeline(
|
|
|
120
154
|
)
|
|
121
155
|
rl_model_pipeline = reinforcement_learning_graph.pipeline(
|
|
122
156
|
prompt_dataset=prompt_dataset,
|
|
123
|
-
input_reward_model_path=
|
|
124
|
-
'
|
|
157
|
+
input_reward_model_path=preprocess_metadata.outputs[
|
|
158
|
+
'metadata_reward_model_path'
|
|
125
159
|
],
|
|
126
160
|
input_reward_adapter_path=reward_model_pipeline.outputs[
|
|
127
161
|
'reward_model_adapter_path'
|
|
@@ -130,6 +164,22 @@ def rlhf_pipeline(
|
|
|
130
164
|
'reward_dataset_path'
|
|
131
165
|
],
|
|
132
166
|
large_model_reference=large_model_reference,
|
|
167
|
+
reward_model_reference=preprocess_metadata.outputs[
|
|
168
|
+
'metadata_reward_model_reference'
|
|
169
|
+
],
|
|
170
|
+
policy_model_reference=preprocess_metadata.outputs[
|
|
171
|
+
'metadata_large_model_reference'
|
|
172
|
+
],
|
|
173
|
+
policy_model_path=preprocess_metadata.outputs[
|
|
174
|
+
'metadata_reference_model_path'
|
|
175
|
+
],
|
|
176
|
+
machine_type=preprocess_metadata.outputs['metadata_machine_type'],
|
|
177
|
+
tuning_location=preprocess_metadata.outputs['metadata_tuning_location'],
|
|
178
|
+
accelerator_type=preprocess_metadata.outputs['metadata_accelerator_type'],
|
|
179
|
+
accelerator_count=preprocess_metadata.outputs[
|
|
180
|
+
'metadata_accelerator_count'
|
|
181
|
+
],
|
|
182
|
+
rl_image_uri=preprocess_metadata.outputs['metadata_refined_image_uri'],
|
|
133
183
|
prompt_sequence_length=prompt_sequence_length,
|
|
134
184
|
target_sequence_length=target_sequence_length,
|
|
135
185
|
reinforcement_learning_rate_multiplier=reinforcement_learning_rate_multiplier,
|
|
@@ -138,17 +188,16 @@ def rlhf_pipeline(
|
|
|
138
188
|
instruction=instruction,
|
|
139
189
|
reward_lora_dim=reward_lora_dim,
|
|
140
190
|
project=project,
|
|
141
|
-
accelerator_type=accelerator_type,
|
|
142
191
|
location=location,
|
|
143
192
|
tensorboard_resource_id=tensorboard_resource_id,
|
|
144
193
|
encryption_spec_key_name=encryption_spec_key_name,
|
|
194
|
+
num_microbatches=num_microbatches,
|
|
145
195
|
).set_display_name('Reinforcement Learning')
|
|
146
196
|
|
|
147
|
-
has_inference_dataset =
|
|
148
|
-
|
|
149
|
-
).set_display_name('Resolve Inference Dataset')
|
|
197
|
+
has_inference_dataset = preprocess_metadata.outputs['has_inference_dataset']
|
|
198
|
+
|
|
150
199
|
with kfp.dsl.Condition(
|
|
151
|
-
has_inference_dataset
|
|
200
|
+
has_inference_dataset == True, # pylint: disable=singleton-comparison
|
|
152
201
|
name='Perform Inference',
|
|
153
202
|
):
|
|
154
203
|
has_model_checkpoint = function_based.value_exists(
|
|
@@ -174,6 +223,9 @@ def rlhf_pipeline(
|
|
|
174
223
|
llm_model_handler = deployment_graph.pipeline(
|
|
175
224
|
output_adapter_path=rl_model_pipeline.outputs['output_adapter_path'],
|
|
176
225
|
large_model_reference=large_model_reference,
|
|
226
|
+
policy_model_reference=preprocess_metadata.outputs[
|
|
227
|
+
'metadata_large_model_reference'
|
|
228
|
+
],
|
|
177
229
|
model_display_name=model_display_name,
|
|
178
230
|
deploy_model=deploy_model,
|
|
179
231
|
encryption_spec_key_name=encryption_spec_key_name,
|
|
@@ -16,10 +16,10 @@
|
|
|
16
16
|
from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
|
|
17
17
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
|
18
18
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
|
|
19
|
-
from google_cloud_pipeline_components.preview.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
|
|
20
19
|
from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
|
|
21
20
|
from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
|
|
22
21
|
from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
|
|
22
|
+
from google_cloud_pipeline_components.v1.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
|
|
23
23
|
|
|
24
24
|
__all__ = [
|
|
25
25
|
'autosxs_pipeline',
|
|
@@ -67,10 +67,15 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
|
|
67
67
|
b' \x01(\x0b\x32\x1a.template_metadata.OptionsH\x00\x42\x08\n\x06values"U\n\x0bMachineType\x12\r\n\x03\x61ny\x18\x01'
|
|
68
68
|
b' \x01(\x08H\x00\x12-\n\x07options\x18\x02'
|
|
69
69
|
b' \x01(\x0b\x32\x1a.template_metadata.OptionsH\x00\x42\x08\n\x06values"1\n\x07Options\x12&\n\x06values\x18\x01'
|
|
70
|
-
b' \x03(\x0b\x32\x16.google.protobuf.Value"\
|
|
70
|
+
b' \x03(\x0b\x32\x16.google.protobuf.Value"\xcc\x02\n\x0fValidationItems\x12N\n\x0esa_validations\x18\x01'
|
|
71
71
|
b' \x03(\x0b\x32\x36.template_metadata.GoogleCloudServiceAccountValidation\x12O\n\x11quota_validations\x18\x02'
|
|
72
72
|
b' \x03(\x0b\x32\x34.template_metadata.GoogleCloudProjectQuotaValidation\x12N\n\x0f\x61pi_validations\x18\x03'
|
|
73
|
-
b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation
|
|
73
|
+
b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation\x12H\n\x0fgcs_validations\x18\x04'
|
|
74
|
+
b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\x80\x01\n\x1cGoogleCloudStorageValidation\x12\x0f\n\x07gcs_uri\x18\x01'
|
|
75
|
+
b' \x01(\t\x12\x10\n\x08is_input\x18\x02'
|
|
76
|
+
b' \x01(\x08\x12\x1f\n\x17\x64\x65\x66\x61ult_service_account\x18\x03'
|
|
77
|
+
b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x04'
|
|
78
|
+
b' \x01(\t"p\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
|
|
74
79
|
b' \x01(\t\x12\x15\n\x0bint64_value\x18\x02'
|
|
75
80
|
b' \x01(\x03H\x00\x12\x16\n\x0c\x64ouble_value\x18\x03'
|
|
76
81
|
b' \x01(\x01H\x00\x42\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
|
|
@@ -91,12 +96,12 @@ _builder.BuildTopDescriptorsAndMessages(
|
|
|
91
96
|
if not _descriptor._USE_C_DESCRIPTORS:
|
|
92
97
|
_globals['DESCRIPTOR']._loaded_options = None
|
|
93
98
|
_globals['DESCRIPTOR']._serialized_options = b'P\001'
|
|
94
|
-
_globals['_SIZE']._serialized_start =
|
|
95
|
-
_globals['_SIZE']._serialized_end =
|
|
96
|
-
_globals['_CONTENTTYPE']._serialized_start =
|
|
97
|
-
_globals['_CONTENTTYPE']._serialized_end =
|
|
98
|
-
_globals['_URITYPE']._serialized_start =
|
|
99
|
-
_globals['_URITYPE']._serialized_end =
|
|
99
|
+
_globals['_SIZE']._serialized_start = 3092
|
|
100
|
+
_globals['_SIZE']._serialized_end = 3163
|
|
101
|
+
_globals['_CONTENTTYPE']._serialized_start = 3166
|
|
102
|
+
_globals['_CONTENTTYPE']._serialized_end = 3296
|
|
103
|
+
_globals['_URITYPE']._serialized_start = 3298
|
|
104
|
+
_globals['_URITYPE']._serialized_end = 3395
|
|
100
105
|
_globals['_TEMPLATEMETADATA']._serialized_start = 164
|
|
101
106
|
_globals['_TEMPLATEMETADATA']._serialized_end = 301
|
|
102
107
|
_globals['_IOMETADATA']._serialized_start = 303
|
|
@@ -136,11 +141,13 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
|
136
141
|
_globals['_OPTIONS']._serialized_start = 2256
|
|
137
142
|
_globals['_OPTIONS']._serialized_end = 2305
|
|
138
143
|
_globals['_VALIDATIONITEMS']._serialized_start = 2308
|
|
139
|
-
_globals['_VALIDATIONITEMS']._serialized_end =
|
|
140
|
-
_globals['
|
|
141
|
-
_globals['
|
|
142
|
-
_globals['
|
|
143
|
-
_globals['
|
|
144
|
-
_globals['
|
|
145
|
-
_globals['
|
|
144
|
+
_globals['_VALIDATIONITEMS']._serialized_end = 2640
|
|
145
|
+
_globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_start = 2643
|
|
146
|
+
_globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end = 2771
|
|
147
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2773
|
|
148
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2885
|
|
149
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2888
|
|
150
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 3029
|
|
151
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 3031
|
|
152
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 3090
|
|
146
153
|
# @@protoc_insertion_point(module_scope)
|
|
@@ -658,7 +658,7 @@ deploymentSpec:
|
|
|
658
658
|
\ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
|
|
659
659
|
\ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
|
|
660
660
|
\ ref.project, ref.dataset_id)\n\n"
|
|
661
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
661
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
662
662
|
exec-bigquery-create-dataset-2:
|
|
663
663
|
container:
|
|
664
664
|
args:
|
|
@@ -693,7 +693,7 @@ deploymentSpec:
|
|
|
693
693
|
\ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
|
|
694
694
|
\ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
|
|
695
695
|
\ ref.project, ref.dataset_id)\n\n"
|
|
696
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
696
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
697
697
|
exec-bigquery-delete-dataset-with-prefix:
|
|
698
698
|
container:
|
|
699
699
|
args:
|
|
@@ -727,7 +727,7 @@ deploymentSpec:
|
|
|
727
727
|
\ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\
|
|
728
728
|
\ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\
|
|
729
729
|
\n"
|
|
730
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
730
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
731
731
|
exec-bigquery-query-job:
|
|
732
732
|
container:
|
|
733
733
|
args:
|
|
@@ -788,7 +788,7 @@ deploymentSpec:
|
|
|
788
788
|
\ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
|
|
789
789
|
\ if write_disposition:\n config['write_disposition'] = write_disposition\n\
|
|
790
790
|
\ return config\n\n"
|
|
791
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
791
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
792
792
|
exec-get-first-valid:
|
|
793
793
|
container:
|
|
794
794
|
args:
|
|
@@ -812,7 +812,7 @@ deploymentSpec:
|
|
|
812
812
|
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
|
|
813
813
|
\n for value in json.loads(values):\n if value:\n return value\n\
|
|
814
814
|
\ raise ValueError('No valid values.')\n\n"
|
|
815
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
815
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
816
816
|
exec-get-model-metadata:
|
|
817
817
|
container:
|
|
818
818
|
args:
|
|
@@ -851,7 +851,7 @@ deploymentSpec:
|
|
|
851
851
|
\ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\
|
|
852
852
|
\ options.time_series_id_column,\n options.time_series_data_column,\n\
|
|
853
853
|
\ options.horizon,\n )\n\n"
|
|
854
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
854
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
855
855
|
exec-get-table-location:
|
|
856
856
|
container:
|
|
857
857
|
args:
|
|
@@ -887,7 +887,7 @@ deploymentSpec:
|
|
|
887
887
|
\ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
|
|
888
888
|
\ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
|
|
889
889
|
\ return client.get_table(table).location\n\n"
|
|
890
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
890
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
891
891
|
exec-load-table-from-uri:
|
|
892
892
|
container:
|
|
893
893
|
args:
|
|
@@ -928,7 +928,7 @@ deploymentSpec:
|
|
|
928
928
|
\ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\
|
|
929
929
|
\ destination=destination,\n project=project,\n location=location,\n\
|
|
930
930
|
\ job_config=job_config).result()\n return destination\n\n"
|
|
931
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
931
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
932
932
|
exec-maybe-replace-with-default:
|
|
933
933
|
container:
|
|
934
934
|
args:
|
|
@@ -950,7 +950,7 @@ deploymentSpec:
|
|
|
950
950
|
\ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\
|
|
951
951
|
\ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\
|
|
952
952
|
\n return default if not value else value\n\n"
|
|
953
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
953
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
954
954
|
exec-validate-inputs:
|
|
955
955
|
container:
|
|
956
956
|
args:
|
|
@@ -1046,7 +1046,7 @@ deploymentSpec:
|
|
|
1046
1046
|
\ raise ValueError(\n 'Granularity unit should be one of the\
|
|
1047
1047
|
\ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\
|
|
1048
1048
|
\n"
|
|
1049
|
-
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:
|
|
1049
|
+
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
|
|
1050
1050
|
pipelineInfo:
|
|
1051
1051
|
description: Forecasts using a BQML ARIMA_PLUS model.
|
|
1052
1052
|
name: automl-tabular-bqml-arima-prediction
|