google-cloud-pipeline-components 2.13.1__py3-none-any.whl → 2.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (66) hide show
  1. google_cloud_pipeline_components/__init__.py +5 -6
  2. google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +4 -10
  3. google_cloud_pipeline_components/_implementation/llm/env.py +1 -1
  4. google_cloud_pipeline_components/_implementation/llm/function_based.py +14 -48
  5. google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
  6. google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py +27 -36
  7. google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +26 -41
  8. google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +60 -0
  9. google_cloud_pipeline_components/_implementation/llm/validate_pipeline.py +11 -0
  10. google_cloud_pipeline_components/_placeholders.py +30 -1
  11. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +1 -1
  12. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +2 -2
  13. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +2 -2
  14. google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +34 -34
  15. google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +34 -34
  16. google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +34 -34
  17. google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +34 -34
  18. google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +1 -1
  19. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +39 -39
  20. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +41 -41
  21. google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +2 -2
  22. google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +2 -2
  23. google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +4 -4
  24. google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +3 -3
  25. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +2 -2
  26. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +17 -17
  27. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +2 -2
  28. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +15 -15
  29. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +2 -2
  30. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +16 -16
  31. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +2 -2
  32. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +15 -15
  33. google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +14 -14
  34. google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +13 -13
  35. google_cloud_pipeline_components/preview/automl/vision/data_converter.py +3 -1
  36. google_cloud_pipeline_components/preview/custom_job/component.py +2 -2
  37. google_cloud_pipeline_components/preview/custom_job/utils.py +3 -2
  38. google_cloud_pipeline_components/preview/llm/rlhf/component.py +60 -8
  39. google_cloud_pipeline_components/preview/model_evaluation/__init__.py +1 -1
  40. google_cloud_pipeline_components/proto/template_metadata_pb2.py +22 -15
  41. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +10 -10
  42. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +31 -31
  43. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +3 -3
  44. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +14 -14
  45. google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +37 -37
  46. google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +2 -2
  47. google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +2 -2
  48. google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +1 -1
  49. google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +1 -1
  50. google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +1 -1
  51. google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +2 -2
  52. google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +2 -2
  53. google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +1 -1
  54. google_cloud_pipeline_components/v1/automl/tabular/transform.py +2 -2
  55. google_cloud_pipeline_components/v1/model_evaluation/__init__.py +3 -1
  56. google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +2 -2
  57. google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/__init__.py +2 -2
  58. google_cloud_pipeline_components/version.py +1 -1
  59. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/METADATA +18 -19
  60. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/RECORD +65 -66
  61. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/WHEEL +1 -1
  62. google_cloud_pipeline_components/proto/preflight_validations_pb2.py +0 -47
  63. /google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/__init__.py +0 -0
  64. /google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +0 -0
  65. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/LICENSE +0 -0
  66. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.0.dist-info}/top_level.txt +0 -0
@@ -2844,7 +2844,7 @@ deploymentSpec:
2844
2844
  \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
2845
2845
  "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
2846
2846
  {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
2847
- "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\",
2847
+ "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240419_0625", "\",
2848
2848
  \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}",
2849
2849
  "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=",
2850
2850
  "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}'
@@ -2875,7 +2875,7 @@ deploymentSpec:
2875
2875
  \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\
2876
2876
  \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\
2877
2877
  \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n"
2878
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
2878
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
2879
2879
  exec-feature-transform-engine:
2880
2880
  container:
2881
2881
  args:
@@ -2960,8 +2960,8 @@ deploymentSpec:
2960
2960
  "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}'
2961
2961
  - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}'
2962
2962
  - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}'
2963
- - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325
2964
- - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
2963
+ - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
2964
+ - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
2965
2965
  - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}'
2966
2966
  - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}'
2967
2967
  - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}'
@@ -2978,7 +2978,7 @@ deploymentSpec:
2978
2978
  - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
2979
2979
  ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
2980
2980
  - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}'
2981
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
2981
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
2982
2982
  resources:
2983
2983
  cpuLimit: 8.0
2984
2984
  memoryLimit: 30.0
@@ -3098,10 +3098,10 @@ deploymentSpec:
3098
3098
  \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\
3099
3099
  \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\
3100
3100
  \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\
3101
- \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20240214_1325'\n\
3101
+ \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20240419_0625'\n\
3102
3102
  \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\
3103
3103
  \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\
3104
- \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20240214_1325',\n\
3104
+ \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20240419_0625',\n\
3105
3105
  \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\
3106
3106
  \ f'--target_column={target_column}',\n f'--objective={objective}',\n\
3107
3107
  \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\
@@ -3159,7 +3159,7 @@ deploymentSpec:
3159
3159
  \ 'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\
3160
3160
  \ }\n unmanaged_container_model.uri = model_dir\n\n return collections.namedtuple('Outputs',\
3161
3161
  \ ['worker_pool_specs'])(\n worker_pool_specs_lst\n )\n\n"
3162
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
3162
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
3163
3163
  exec-get-model-display-name:
3164
3164
  container:
3165
3165
  args:
@@ -3186,7 +3186,7 @@ deploymentSpec:
3186
3186
  \n return collections.namedtuple(\n 'Outputs',\n [\n \
3187
3187
  \ 'model_display_name',\n ],\n )(\n model_display_name,\n )\n\
3188
3188
  \n"
3189
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
3189
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
3190
3190
  exec-get-prediction-type-for-xgboost:
3191
3191
  container:
3192
3192
  args:
@@ -3215,7 +3215,7 @@ deploymentSpec:
3215
3215
  \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\
3216
3216
  \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\
3217
3217
  \ ' multi:softprob].'\n )\n\n"
3218
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
3218
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
3219
3219
  exec-model-batch-predict:
3220
3220
  container:
3221
3221
  args:
@@ -3407,7 +3407,7 @@ deploymentSpec:
3407
3407
  \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\
3408
3408
  \ ],\n )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\
3409
3409
  \ )\n\n"
3410
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
3410
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
3411
3411
  exec-split-materialized-data:
3412
3412
  container:
3413
3413
  args:
@@ -3453,7 +3453,7 @@ deploymentSpec:
3453
3453
  \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\
3454
3454
  \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\
3455
3455
  \ 'w') as f:\n f.write(file_patterns[2])\n\n"
3456
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325
3456
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
3457
3457
  exec-training-configurator-and-validator:
3458
3458
  container:
3459
3459
  args:
@@ -3498,7 +3498,7 @@ deploymentSpec:
3498
3498
  ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}'
3499
3499
  - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
3500
3500
  ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
3501
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
3501
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
3502
3502
  exec-xgboost-trainer:
3503
3503
  container:
3504
3504
  args:
@@ -22,6 +22,7 @@ from google_cloud_pipeline_components.preview.automl.vision.json_utils import Co
22
22
  from kfp import dsl
23
23
 
24
24
 
25
+ # pylint: disable=singleton-comparison
25
26
  # pylint: disable=g-doc-args
26
27
  @dsl.container_component
27
28
  def data_converter(
@@ -31,6 +32,7 @@ def data_converter(
31
32
  objective: str,
32
33
  output_dir: dsl.Output[dsl.Artifact],
33
34
  gcp_resources: dsl.OutputPath(str),
35
+ enable_input_validation: bool = True,
34
36
  location: str = 'us-central1',
35
37
  timeout: str = '604800s',
36
38
  service_account: Optional[str] = None,
@@ -75,7 +77,7 @@ def data_converter(
75
77
  'image_uri': 'us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter',
76
78
  'args': [
77
79
  '--enable_input_validation',
78
- 'true',
80
+ str(enable_input_validation),
79
81
  '--input_file_path',
80
82
  input_file_path,
81
83
  '--input_file_type',
@@ -37,7 +37,7 @@ def custom_training_job(
37
37
  base_output_directory: str = '',
38
38
  labels: Dict[str, str] = {},
39
39
  encryption_spec_key_name: str = '',
40
- persistent_resource_id: str = '',
40
+ persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
41
41
  project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
42
42
  ):
43
43
  # fmt: off
@@ -57,7 +57,7 @@ def custom_training_job(
57
57
  base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
58
58
  labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
59
59
  encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key.
60
- persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
60
+ persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
61
61
  project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run.
62
62
  Returns:
63
63
  gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob.
@@ -18,6 +18,7 @@ import textwrap
18
18
  from typing import Callable, Dict, List, Optional
19
19
  import warnings
20
20
 
21
+ from google_cloud_pipeline_components import _placeholders
21
22
  from google_cloud_pipeline_components.preview.custom_job import component
22
23
  from kfp import components
23
24
  import yaml
@@ -68,7 +69,7 @@ def create_custom_training_job_from_component(
68
69
  nfs_mounts: Optional[List[Dict[str, str]]] = None,
69
70
  base_output_directory: str = '',
70
71
  labels: Optional[Dict[str, str]] = None,
71
- persistent_resource_id: str = '',
72
+ persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
72
73
  env: Optional[List[Dict[str, str]]] = None,
73
74
  ) -> Callable:
74
75
  # fmt: off
@@ -96,7 +97,7 @@ def create_custom_training_job_from_component(
96
97
  nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
97
98
  base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
98
99
  labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
99
- persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
100
+ persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
100
101
  env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
101
102
 
102
103
  Returns:
@@ -21,6 +21,8 @@ from google_cloud_pipeline_components._implementation.llm import env
21
21
  from google_cloud_pipeline_components._implementation.llm import function_based
22
22
  from google_cloud_pipeline_components._implementation.llm import reinforcement_learning_graph
23
23
  from google_cloud_pipeline_components._implementation.llm import reward_model_graph
24
+ from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
25
+ from google_cloud_pipeline_components._implementation.llm import utils
24
26
  from google_cloud_pipeline_components._implementation.llm import validate_pipeline
25
27
  from google_cloud_pipeline_components.preview.llm.infer import component
26
28
  import kfp
@@ -94,11 +96,43 @@ def rlhf_pipeline(
94
96
  eval_dataset=eval_dataset,
95
97
  ).set_display_name('Validate Inputs')
96
98
 
99
+ preprocess_metadata = rlhf_preprocessor.rlhf_preprocessor(
100
+ large_model_reference=large_model_reference,
101
+ accelerator_type=accelerator_type,
102
+ use_test_spec=env.get_use_test_machine_spec(),
103
+ project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
104
+ location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
105
+ artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
106
+ tag=env.get_private_image_tag(),
107
+ evaluation_dataset=eval_dataset,
108
+ tensorboard_resource_id=tensorboard_resource_id,
109
+ ).set_display_name('Preprocess Inputs')
110
+ num_microbatches = preprocess_metadata.outputs['metadata_num_microbatches']
111
+
97
112
  reward_model_pipeline = (
98
113
  (
99
114
  reward_model_graph.pipeline(
100
115
  preference_dataset=preference_dataset,
101
116
  large_model_reference=large_model_reference,
117
+ reward_model_reference=preprocess_metadata.outputs[
118
+ 'metadata_reward_model_reference'
119
+ ],
120
+ reward_model_path=preprocess_metadata.outputs[
121
+ 'metadata_reward_model_path'
122
+ ],
123
+ machine_type=preprocess_metadata.outputs['metadata_machine_type'],
124
+ tuning_location=preprocess_metadata.outputs[
125
+ 'metadata_tuning_location'
126
+ ],
127
+ accelerator_type=preprocess_metadata.outputs[
128
+ 'metadata_accelerator_type'
129
+ ],
130
+ accelerator_count=preprocess_metadata.outputs[
131
+ 'metadata_accelerator_count'
132
+ ],
133
+ reward_model_image_uri=preprocess_metadata.outputs[
134
+ 'metadata_refined_image_uri'
135
+ ],
102
136
  prompt_sequence_length=prompt_sequence_length,
103
137
  target_sequence_length=target_sequence_length,
104
138
  eval_dataset=validate_pipeline_task.outputs[
@@ -110,9 +144,9 @@ def rlhf_pipeline(
110
144
  lora_dim=reward_lora_dim,
111
145
  project=project,
112
146
  location=location,
113
- accelerator_type=accelerator_type,
114
147
  tensorboard_resource_id=tensorboard_resource_id,
115
148
  encryption_spec_key_name=encryption_spec_key_name,
149
+ num_microbatches=num_microbatches,
116
150
  )
117
151
  )
118
152
  .set_display_name('Train Reward Model')
@@ -120,8 +154,8 @@ def rlhf_pipeline(
120
154
  )
121
155
  rl_model_pipeline = reinforcement_learning_graph.pipeline(
122
156
  prompt_dataset=prompt_dataset,
123
- input_reward_model_path=reward_model_pipeline.outputs[
124
- 'reward_model_base_path'
157
+ input_reward_model_path=preprocess_metadata.outputs[
158
+ 'metadata_reward_model_path'
125
159
  ],
126
160
  input_reward_adapter_path=reward_model_pipeline.outputs[
127
161
  'reward_model_adapter_path'
@@ -130,6 +164,22 @@ def rlhf_pipeline(
130
164
  'reward_dataset_path'
131
165
  ],
132
166
  large_model_reference=large_model_reference,
167
+ reward_model_reference=preprocess_metadata.outputs[
168
+ 'metadata_reward_model_reference'
169
+ ],
170
+ policy_model_reference=preprocess_metadata.outputs[
171
+ 'metadata_large_model_reference'
172
+ ],
173
+ policy_model_path=preprocess_metadata.outputs[
174
+ 'metadata_reference_model_path'
175
+ ],
176
+ machine_type=preprocess_metadata.outputs['metadata_machine_type'],
177
+ tuning_location=preprocess_metadata.outputs['metadata_tuning_location'],
178
+ accelerator_type=preprocess_metadata.outputs['metadata_accelerator_type'],
179
+ accelerator_count=preprocess_metadata.outputs[
180
+ 'metadata_accelerator_count'
181
+ ],
182
+ rl_image_uri=preprocess_metadata.outputs['metadata_refined_image_uri'],
133
183
  prompt_sequence_length=prompt_sequence_length,
134
184
  target_sequence_length=target_sequence_length,
135
185
  reinforcement_learning_rate_multiplier=reinforcement_learning_rate_multiplier,
@@ -138,17 +188,16 @@ def rlhf_pipeline(
138
188
  instruction=instruction,
139
189
  reward_lora_dim=reward_lora_dim,
140
190
  project=project,
141
- accelerator_type=accelerator_type,
142
191
  location=location,
143
192
  tensorboard_resource_id=tensorboard_resource_id,
144
193
  encryption_spec_key_name=encryption_spec_key_name,
194
+ num_microbatches=num_microbatches,
145
195
  ).set_display_name('Reinforcement Learning')
146
196
 
147
- has_inference_dataset = function_based.value_exists(
148
- value=eval_dataset
149
- ).set_display_name('Resolve Inference Dataset')
197
+ has_inference_dataset = preprocess_metadata.outputs['has_inference_dataset']
198
+
150
199
  with kfp.dsl.Condition(
151
- has_inference_dataset.output == True, # pylint: disable=singleton-comparison
200
+ has_inference_dataset == True, # pylint: disable=singleton-comparison
152
201
  name='Perform Inference',
153
202
  ):
154
203
  has_model_checkpoint = function_based.value_exists(
@@ -174,6 +223,9 @@ def rlhf_pipeline(
174
223
  llm_model_handler = deployment_graph.pipeline(
175
224
  output_adapter_path=rl_model_pipeline.outputs['output_adapter_path'],
176
225
  large_model_reference=large_model_reference,
226
+ policy_model_reference=preprocess_metadata.outputs[
227
+ 'metadata_large_model_reference'
228
+ ],
177
229
  model_display_name=model_display_name,
178
230
  deploy_model=deploy_model,
179
231
  encryption_spec_key_name=encryption_spec_key_name,
@@ -16,10 +16,10 @@
16
16
  from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
17
17
  from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
18
18
  from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
19
- from google_cloud_pipeline_components.preview.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
20
19
  from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
21
20
  from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
22
21
  from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
22
+ from google_cloud_pipeline_components.v1.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
23
23
 
24
24
  __all__ = [
25
25
  'autosxs_pipeline',
@@ -67,10 +67,15 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
67
67
  b' \x01(\x0b\x32\x1a.template_metadata.OptionsH\x00\x42\x08\n\x06values"U\n\x0bMachineType\x12\r\n\x03\x61ny\x18\x01'
68
68
  b' \x01(\x08H\x00\x12-\n\x07options\x18\x02'
69
69
  b' \x01(\x0b\x32\x1a.template_metadata.OptionsH\x00\x42\x08\n\x06values"1\n\x07Options\x12&\n\x06values\x18\x01'
70
- b' \x03(\x0b\x32\x16.google.protobuf.Value"\x82\x02\n\x0fValidationItems\x12N\n\x0esa_validations\x18\x01'
70
+ b' \x03(\x0b\x32\x16.google.protobuf.Value"\xcc\x02\n\x0fValidationItems\x12N\n\x0esa_validations\x18\x01'
71
71
  b' \x03(\x0b\x32\x36.template_metadata.GoogleCloudServiceAccountValidation\x12O\n\x11quota_validations\x18\x02'
72
72
  b' \x03(\x0b\x32\x34.template_metadata.GoogleCloudProjectQuotaValidation\x12N\n\x0f\x61pi_validations\x18\x03'
73
- b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation"p\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
73
+ b' \x03(\x0b\x32\x35.template_metadata.GoogleCloudApiEnablementValidation\x12H\n\x0fgcs_validations\x18\x04'
74
+ b' \x03(\x0b\x32/.template_metadata.GoogleCloudStorageValidation"\x80\x01\n\x1cGoogleCloudStorageValidation\x12\x0f\n\x07gcs_uri\x18\x01'
75
+ b' \x01(\t\x12\x10\n\x08is_input\x18\x02'
76
+ b' \x01(\x08\x12\x1f\n\x17\x64\x65\x66\x61ult_service_account\x18\x03'
77
+ b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x04'
78
+ b' \x01(\t"p\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
74
79
  b' \x01(\t\x12\x15\n\x0bint64_value\x18\x02'
75
80
  b' \x01(\x03H\x00\x12\x16\n\x0c\x64ouble_value\x18\x03'
76
81
  b' \x01(\x01H\x00\x42\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
@@ -91,12 +96,12 @@ _builder.BuildTopDescriptorsAndMessages(
91
96
  if not _descriptor._USE_C_DESCRIPTORS:
92
97
  _globals['DESCRIPTOR']._loaded_options = None
93
98
  _globals['DESCRIPTOR']._serialized_options = b'P\001'
94
- _globals['_SIZE']._serialized_start = 2887
95
- _globals['_SIZE']._serialized_end = 2958
96
- _globals['_CONTENTTYPE']._serialized_start = 2961
97
- _globals['_CONTENTTYPE']._serialized_end = 3091
98
- _globals['_URITYPE']._serialized_start = 3093
99
- _globals['_URITYPE']._serialized_end = 3190
99
+ _globals['_SIZE']._serialized_start = 3092
100
+ _globals['_SIZE']._serialized_end = 3163
101
+ _globals['_CONTENTTYPE']._serialized_start = 3166
102
+ _globals['_CONTENTTYPE']._serialized_end = 3296
103
+ _globals['_URITYPE']._serialized_start = 3298
104
+ _globals['_URITYPE']._serialized_end = 3395
100
105
  _globals['_TEMPLATEMETADATA']._serialized_start = 164
101
106
  _globals['_TEMPLATEMETADATA']._serialized_end = 301
102
107
  _globals['_IOMETADATA']._serialized_start = 303
@@ -136,11 +141,13 @@ if not _descriptor._USE_C_DESCRIPTORS:
136
141
  _globals['_OPTIONS']._serialized_start = 2256
137
142
  _globals['_OPTIONS']._serialized_end = 2305
138
143
  _globals['_VALIDATIONITEMS']._serialized_start = 2308
139
- _globals['_VALIDATIONITEMS']._serialized_end = 2566
140
- _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2568
141
- _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2680
142
- _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2683
143
- _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 2824
144
- _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 2826
145
- _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 2885
144
+ _globals['_VALIDATIONITEMS']._serialized_end = 2640
145
+ _globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_start = 2643
146
+ _globals['_GOOGLECLOUDSTORAGEVALIDATION']._serialized_end = 2771
147
+ _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 2773
148
+ _globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 2885
149
+ _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 2888
150
+ _globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 3029
151
+ _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 3031
152
+ _globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 3090
146
153
  # @@protoc_insertion_point(module_scope)
@@ -658,7 +658,7 @@ deploymentSpec:
658
658
  \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
659
659
  \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
660
660
  \ ref.project, ref.dataset_id)\n\n"
661
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
661
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
662
662
  exec-bigquery-create-dataset-2:
663
663
  container:
664
664
  args:
@@ -693,7 +693,7 @@ deploymentSpec:
693
693
  \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\
694
694
  \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \
695
695
  \ ref.project, ref.dataset_id)\n\n"
696
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
696
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
697
697
  exec-bigquery-delete-dataset-with-prefix:
698
698
  container:
699
699
  args:
@@ -727,7 +727,7 @@ deploymentSpec:
727
727
  \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\
728
728
  \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\
729
729
  \n"
730
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
730
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
731
731
  exec-bigquery-query-job:
732
732
  container:
733
733
  args:
@@ -788,7 +788,7 @@ deploymentSpec:
788
788
  \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\
789
789
  \ if write_disposition:\n config['write_disposition'] = write_disposition\n\
790
790
  \ return config\n\n"
791
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
791
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
792
792
  exec-get-first-valid:
793
793
  container:
794
794
  args:
@@ -812,7 +812,7 @@ deploymentSpec:
812
812
  \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
813
813
  \n for value in json.loads(values):\n if value:\n return value\n\
814
814
  \ raise ValueError('No valid values.')\n\n"
815
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
815
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
816
816
  exec-get-model-metadata:
817
817
  container:
818
818
  args:
@@ -851,7 +851,7 @@ deploymentSpec:
851
851
  \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\
852
852
  \ options.time_series_id_column,\n options.time_series_data_column,\n\
853
853
  \ options.horizon,\n )\n\n"
854
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
854
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
855
855
  exec-get-table-location:
856
856
  container:
857
857
  args:
@@ -887,7 +887,7 @@ deploymentSpec:
887
887
  \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\
888
888
  \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\
889
889
  \ return client.get_table(table).location\n\n"
890
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
890
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
891
891
  exec-load-table-from-uri:
892
892
  container:
893
893
  args:
@@ -928,7 +928,7 @@ deploymentSpec:
928
928
  \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\
929
929
  \ destination=destination,\n project=project,\n location=location,\n\
930
930
  \ job_config=job_config).result()\n return destination\n\n"
931
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
931
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
932
932
  exec-maybe-replace-with-default:
933
933
  container:
934
934
  args:
@@ -950,7 +950,7 @@ deploymentSpec:
950
950
  \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\
951
951
  \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\
952
952
  \n return default if not value else value\n\n"
953
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
953
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
954
954
  exec-validate-inputs:
955
955
  container:
956
956
  args:
@@ -1046,7 +1046,7 @@ deploymentSpec:
1046
1046
  \ raise ValueError(\n 'Granularity unit should be one of the\
1047
1047
  \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\
1048
1048
  \n"
1049
- image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
1049
+ image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
1050
1050
  pipelineInfo:
1051
1051
  description: Forecasts using a BQML ARIMA_PLUS model.
1052
1052
  name: automl-tabular-bqml-arima-prediction