google-cloud-pipeline-components 2.14.0__py3-none-any.whl → 2.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (64) hide show
  1. google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +10 -26
  2. google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
  3. google_cloud_pipeline_components/_implementation/llm/infer_preprocessor.py +109 -0
  4. google_cloud_pipeline_components/_implementation/llm/online_evaluation_pairwise.py +8 -0
  5. google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +5 -6
  6. google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +24 -0
  7. google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +0 -12
  8. google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py +2 -1
  9. google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation_preprocessor/component.py +14 -0
  10. google_cloud_pipeline_components/_implementation/starry_net/__init__.py +41 -0
  11. google_cloud_pipeline_components/_implementation/{model_evaluation/import_evaluation → starry_net/dataprep}/__init__.py +1 -2
  12. google_cloud_pipeline_components/_implementation/starry_net/dataprep/component.py +159 -0
  13. google_cloud_pipeline_components/_implementation/starry_net/evaluation/__init__.py +13 -0
  14. google_cloud_pipeline_components/_implementation/starry_net/evaluation/component.py +23 -0
  15. google_cloud_pipeline_components/_implementation/starry_net/evaluation/evaluation.yaml +197 -0
  16. google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/__init__.py +13 -0
  17. google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py +62 -0
  18. google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/__init__.py +13 -0
  19. google_cloud_pipeline_components/_implementation/starry_net/maybe_set_tfrecord_args/component.py +77 -0
  20. google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/__init__.py +13 -0
  21. google_cloud_pipeline_components/_implementation/starry_net/set_dataprep_args/component.py +97 -0
  22. google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/__init__.py +13 -0
  23. google_cloud_pipeline_components/_implementation/starry_net/set_eval_args/component.py +76 -0
  24. google_cloud_pipeline_components/_implementation/starry_net/set_test_set/__init__.py +13 -0
  25. google_cloud_pipeline_components/_implementation/starry_net/set_test_set/component.py +48 -0
  26. google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/__init__.py +13 -0
  27. google_cloud_pipeline_components/_implementation/starry_net/set_tfrecord_args/component.py +70 -0
  28. google_cloud_pipeline_components/_implementation/starry_net/set_train_args/__init__.py +13 -0
  29. google_cloud_pipeline_components/_implementation/starry_net/set_train_args/component.py +90 -0
  30. google_cloud_pipeline_components/_implementation/starry_net/train/__init__.py +13 -0
  31. google_cloud_pipeline_components/_implementation/starry_net/train/component.py +209 -0
  32. google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/__init__.py +13 -0
  33. google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/component.py +59 -0
  34. google_cloud_pipeline_components/_implementation/starry_net/upload_model/__init__.py +13 -0
  35. google_cloud_pipeline_components/_implementation/starry_net/upload_model/component.py +23 -0
  36. google_cloud_pipeline_components/_implementation/starry_net/upload_model/upload_model.yaml +37 -0
  37. google_cloud_pipeline_components/_implementation/starry_net/version.py +18 -0
  38. google_cloud_pipeline_components/container/utils/error_surfacing.py +45 -0
  39. google_cloud_pipeline_components/container/v1/model/get_model/remote_runner.py +36 -7
  40. google_cloud_pipeline_components/preview/llm/infer/component.py +22 -25
  41. google_cloud_pipeline_components/preview/llm/rlhf/component.py +15 -8
  42. google_cloud_pipeline_components/preview/model_evaluation/__init__.py +4 -1
  43. google_cloud_pipeline_components/{_implementation/model_evaluation/import_evaluation/component.py → preview/model_evaluation/model_evaluation_import_component.py} +4 -3
  44. google_cloud_pipeline_components/preview/starry_net/__init__.py +19 -0
  45. google_cloud_pipeline_components/preview/starry_net/component.py +443 -0
  46. google_cloud_pipeline_components/proto/task_error_pb2.py +32 -0
  47. google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +13 -13
  48. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +10 -0
  49. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +4 -1
  50. google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +8 -10
  51. google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +2 -2
  52. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +2 -2
  53. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +2 -2
  54. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +2 -2
  55. google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +2 -2
  56. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_classification_pipeline.py +4 -2
  57. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_text_generation_pipeline.py +8 -2
  58. google_cloud_pipeline_components/v1/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +1 -0
  59. google_cloud_pipeline_components/version.py +1 -1
  60. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/METADATA +17 -20
  61. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/RECORD +64 -32
  62. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/WHEEL +1 -1
  63. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/LICENSE +0 -0
  64. {google_cloud_pipeline_components-2.14.0.dist-info → google_cloud_pipeline_components-2.15.0.dist-info}/top_level.txt +0 -0
@@ -37,8 +37,10 @@ def pipeline(
37
37
  policy_model_reference: str,
38
38
  model_display_name: Optional[str] = None,
39
39
  deploy_model: bool = True,
40
+ upload_model: bool = True,
40
41
  encryption_spec_key_name: str = '',
41
42
  upload_location: str = _placeholders.LOCATION_PLACEHOLDER,
43
+ regional_endpoint: str = '',
42
44
  ) -> PipelineOutput:
43
45
  # fmt: off
44
46
  """Uploads a tuned language model and (optionally) deploys it to an endpoint.
@@ -51,50 +53,32 @@ def pipeline(
51
53
  deploy_model: Whether to deploy the model to an endpoint in `us-central1`. Default is True.
52
54
  encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
53
55
  upload_location: Region to upload and deploy the model to. Default is the location used to run the pipeline components.
56
+ regional_endpoint: Regional endpoint to upload the model.
54
57
 
55
58
  Returns:
56
59
  model_resource_name: Path to the model uploaded to the Model Registry. This will be an empty string if the model was not deployed.
57
60
  endpoint_resource_name: Path the Online Prediction Endpoint. This will be an empty string if the model was not deployed.
58
61
  """
59
62
  # fmt: on
60
- regional_endpoint = function_based.resolve_regional_endpoint(
61
- upload_location=upload_location
62
- ).set_display_name('Resolve Regional Endpoint')
63
-
64
- display_name = (
65
- function_based.resolve_model_display_name(
66
- large_model_reference=large_model_reference,
67
- model_display_name=model_display_name,
68
- )
69
- .set_caching_options(False)
70
- .set_display_name('Resolve Model Display Name')
71
- )
72
-
73
- upload_model = function_based.resolve_upload_model(
74
- large_model_reference=policy_model_reference,
75
- ).set_display_name('Resolve Upload Model')
76
63
  upload_task = upload_llm_model.refined_upload_llm_model(
77
64
  project=_placeholders.PROJECT_ID_PLACEHOLDER,
78
65
  location=upload_location,
79
- regional_endpoint=regional_endpoint.output,
66
+ regional_endpoint=regional_endpoint,
80
67
  artifact_uri=output_adapter_path,
81
- model_display_name=display_name.output,
68
+ model_display_name=model_display_name,
82
69
  model_reference_name=large_model_reference,
83
- upload_model=upload_model.output,
70
+ upload_model=upload_model,
84
71
  encryption_spec_key_name=encryption_spec_key_name,
85
72
  tune_type='rlhf',
86
73
  ).set_display_name('Upload Model')
87
- deploy_model = function_based.resolve_deploy_model(
88
- deploy_model=deploy_model,
89
- large_model_reference=policy_model_reference,
90
- ).set_display_name('Resolve Deploy Model')
74
+
91
75
  deploy_task = deploy_llm_model.deploy_llm_model(
92
76
  project=_placeholders.PROJECT_ID_PLACEHOLDER,
93
77
  location=upload_location,
94
78
  model_resource_name=upload_task.outputs['model_resource_name'],
95
- display_name=display_name.output,
96
- regional_endpoint=regional_endpoint.output,
97
- deploy_model=deploy_model.output,
79
+ display_name=model_display_name,
80
+ regional_endpoint=regional_endpoint,
81
+ deploy_model=deploy_model,
98
82
  encryption_spec_key_name=encryption_spec_key_name,
99
83
  ).set_display_name('Deploy Model')
100
84
  return PipelineOutput(
@@ -17,4 +17,4 @@
17
17
  DO NOT EDIT - This file is generated, manual changes will be overridden.
18
18
  """
19
19
 
20
- IMAGE_TAG = '20240425_1027_RC00'
20
+ IMAGE_TAG = '20240623_1707'
@@ -0,0 +1,109 @@
1
+ # Copyright 2024 The Kubeflow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Component that preprocesses inputs for infer pipeline."""
15
+
16
+ from google_cloud_pipeline_components import _placeholders
17
+ from google_cloud_pipeline_components import utils as gcpc_utils
18
+ from google_cloud_pipeline_components._implementation.llm import utils
19
+ from kfp import dsl
20
+
21
+
22
+ @dsl.container_component
23
+ def infer_preprocessor(
24
+ large_model_reference: str,
25
+ accelerator_type: str,
26
+ use_test_spec: bool,
27
+ project: str,
28
+ location: str,
29
+ artifact_registry: str,
30
+ tag: str,
31
+ gcp_resources: dsl.OutputPath(str), # pytype: disable=invalid-annotation
32
+ metadata_large_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
33
+ metadata_reference_model_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation
34
+ metadata_reward_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
35
+ metadata_reward_model_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation
36
+ metadata_machine_type: dsl.OutputPath(str), # pytype: disable=invalid-annotation
37
+ metadata_tuning_location: dsl.OutputPath(str), # pytype: disable=invalid-annotation
38
+ metadata_accelerator_type: dsl.OutputPath(str), # pytype: disable=invalid-annotation
39
+ metadata_accelerator_count: dsl.OutputPath(int), # pytype: disable=invalid-annotation
40
+ metadata_instruction: dsl.OutputPath(str), # pytype: disable=invalid-annotation
41
+ metadata_refined_image_uri: dsl.OutputPath(str), # pytype: disable=invalid-annotation
42
+ use_experimental_image: bool = False,
43
+ input_reference_model_path: str = '',
44
+ instruction: str = '',
45
+ image_uri: str = utils.get_default_image_uri('refined_cpu', ''),
46
+ ) -> dsl.ContainerSpec: # pylint: disable=g-doc-args
47
+ # fmt: off
48
+ """Preprocess infer pipeline inputs.
49
+
50
+ Args:
51
+ large_model_reference: The model for fine tuning.
52
+ accelerator_type: Specific accelerator type for the job.
53
+ use_test_spec: Whether to use a lower resource machine for testing.
54
+ project: Project that contains the artifact registry.
55
+ location: Region that contains the artifact registry.
56
+ artifact_registry: Registry that contains Docker images.
57
+ tag: Image tag.
58
+ use_experimental_image: Whether to use refined experimental image.
59
+ input_reference_model_path: The model checkpoint path for the reference model
60
+ instruction: The instruction to let the model know what task it needs to perform.
61
+ image_uri: Docker image URI to use for the custom job.
62
+
63
+ Returns:
64
+ gcp_resources: GCP resources that can be used to track the custom job.
65
+ metadata_large_model_reference: The base model for fine tuning. The name should be in capitalized snake case format.
66
+ metadata_reference_model_path: The model checkpoint path for the reinforcer model
67
+ metadata_reward_model_reference: The base model for training reward model. The name should be in capitalized snake case format.
68
+ metadata_reward_model_path: The model checkpoint path for the reward model.
69
+ metadata_machine_type: The type of the machine to provision for the custom job.
70
+ metadata_tuning_location: The GCP region to run the custom job.
71
+ metadata_accelerator_type: Specific accelerator type for the custom job.
72
+ metadata_accelerator_count: The number of accelerator.
73
+ metadata_instruction: The instruction to let the model know what task it needs to perform.
74
+ metadata_refined_image_uri: Docker image URI to use for the custom job.
75
+ """
76
+ # fmt: on
77
+ return gcpc_utils.build_serverless_customjob_container_spec(
78
+ project=_placeholders.PROJECT_ID_PLACEHOLDER,
79
+ location=_placeholders.LOCATION_PLACEHOLDER,
80
+ custom_job_payload=utils.build_payload(
81
+ display_name='infer_preprocessor',
82
+ machine_type='n1-standard-4',
83
+ image_uri=image_uri,
84
+ args=[
85
+ '--app_name=infer_preprocessor',
86
+ f'--large_model_reference={large_model_reference}',
87
+ f'--input_reference_model_path={input_reference_model_path}',
88
+ f'--accelerator_type={accelerator_type}',
89
+ f'--use_test_spec={use_test_spec}',
90
+ f'--project={project}',
91
+ f'--location={location}',
92
+ f'--artifact_registry={artifact_registry}',
93
+ f'--tag={tag}',
94
+ f'--use_experimental_image={use_experimental_image}',
95
+ f'--instruction={instruction}',
96
+ f'--metadata_large_model_reference_path={metadata_large_model_reference}',
97
+ f'--metadata_reference_model_path_path={metadata_reference_model_path}',
98
+ f'--metadata_reward_model_reference_path={metadata_reward_model_reference}',
99
+ f'--metadata_reward_model_path_path={metadata_reward_model_path}',
100
+ f'--metadata_machine_type_path={metadata_machine_type}',
101
+ f'--metadata_tuning_location_path={metadata_tuning_location}',
102
+ f'--metadata_accelerator_type_path={metadata_accelerator_type}',
103
+ f'--metadata_accelerator_count_path={metadata_accelerator_count}',
104
+ f'--metadata_instruction_path={metadata_instruction}',
105
+ f'--metadata_refined_image_uri_path={metadata_refined_image_uri}',
106
+ ],
107
+ ),
108
+ gcp_resources=gcp_resources,
109
+ )
@@ -52,6 +52,7 @@ def online_evaluation_pairwise(
52
52
  project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
53
53
  location: str = _placeholders.LOCATION_PLACEHOLDER,
54
54
  encryption_spec_key_name: str = '',
55
+ autorater_prompt_parameters: Dict[str, Dict[str, str]] = {},
55
56
  ) -> dsl.ContainerSpec: # pylint: disable=g-doc-args
56
57
  """Evaluate two models using an autorater.
57
58
 
@@ -73,6 +74,8 @@ def online_evaluation_pairwise(
73
74
  encryption_spec_key_name: Customer-managed encryption key options. If this
74
75
  is set, then all resources created by the component will be encrypted with
75
76
  the provided encryption key.
77
+ autorater_prompt_parameters: Map of autorater prompt template parameters to
78
+ columns or templates.
76
79
 
77
80
  Returns:
78
81
  judgments: Individual judgments used to calculate the win rates.
@@ -112,6 +115,11 @@ def online_evaluation_pairwise(
112
115
  '--executor_input={{$.json_escape[1]}}',
113
116
  f'--kms_key_name={encryption_spec_key_name}',
114
117
  f'--metadata_path={metadata}',
118
+ (
119
+ '--autorater_prompt_parameters='
120
+ "{{$.inputs.parameters['autorater_prompt_parameters']"
121
+ '.json_escape[0]}}'
122
+ ),
115
123
  ],
116
124
  encryption_spec_key_name=encryption_spec_key_name,
117
125
  ),
@@ -21,6 +21,7 @@ from google_cloud_pipeline_components._implementation.llm import function_based
21
21
  from google_cloud_pipeline_components._implementation.llm import preprocess_chat_dataset
22
22
  from google_cloud_pipeline_components._implementation.llm import private_text_comparison_importer
23
23
  from google_cloud_pipeline_components._implementation.llm import reward_model_trainer
24
+ from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
24
25
  from google_cloud_pipeline_components._implementation.llm import upload_tensorboard_metrics
25
26
  import kfp
26
27
 
@@ -45,6 +46,7 @@ def pipeline(
45
46
  accelerator_type: str,
46
47
  accelerator_count: int,
47
48
  reward_model_image_uri: str,
49
+ comma_separated_candidates_field_names: str,
48
50
  prompt_sequence_length: int = 512,
49
51
  target_sequence_length: int = 64,
50
52
  batch_size: int = 64,
@@ -72,6 +74,7 @@ def pipeline(
72
74
  accelerator_type: Specific accelerator type for the custom job.
73
75
  accelerator_count: The number of accelerator.
74
76
  reward_model_image_uri: Docker image URI to use for the reward model training job.
77
+ comma_separated_candidates_field_names: Comma separated list of fields that contain candidate text, e.g. ``'field_1,field_2,field_3'``.
75
78
  prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
76
79
  target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
77
80
  batch_size: Number of examples in each finetuning step. Default is 64.
@@ -91,7 +94,6 @@ def pipeline(
91
94
  """
92
95
  # fmt: on
93
96
  prompt_column = 'input_text'
94
- candidate_columns = ['candidate_0', 'candidate_1']
95
97
  choice_column = 'choice'
96
98
 
97
99
  processed_preference_dataset = (
@@ -103,9 +105,6 @@ def pipeline(
103
105
  ).set_display_name('Preprocess Prompt Dataset')
104
106
  )
105
107
 
106
- comma_separated_candidates_field_names = (
107
- function_based.convert_to_delimited_string(items=candidate_columns)
108
- )
109
108
  preference_dataset_importer = (
110
109
  private_text_comparison_importer.private_text_comparison_importer(
111
110
  project=project,
@@ -114,7 +113,7 @@ def pipeline(
114
113
  'processed_dataset_uri'
115
114
  ],
116
115
  inputs_field_name=prompt_column,
117
- comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
116
+ comma_separated_candidates_field_names=comma_separated_candidates_field_names,
118
117
  choice_field_name=choice_column,
119
118
  split=env.TRAIN_SPLIT,
120
119
  large_model_reference=reward_model_reference,
@@ -131,7 +130,7 @@ def pipeline(
131
130
  location=location,
132
131
  input_text=eval_dataset,
133
132
  inputs_field_name=prompt_column,
134
- comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
133
+ comma_separated_candidates_field_names=comma_separated_candidates_field_names,
135
134
  choice_field_name=choice_column,
136
135
  split=env.TRAIN_SPLIT,
137
136
  large_model_reference=reward_model_reference,
@@ -14,6 +14,7 @@
14
14
  """Component that preprocesses inputs for Reinforcement Learning from Human Feedback (RLHF)."""
15
15
 
16
16
  import os
17
+ from typing import List
17
18
 
18
19
  from google_cloud_pipeline_components import _placeholders
19
20
  from google_cloud_pipeline_components import utils as gcpc_utils
@@ -33,6 +34,7 @@ def rlhf_preprocessor(
33
34
  gcp_resources: dsl.OutputPath(str), # pytype: disable=invalid-annotation
34
35
  has_tensorboard_id: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
35
36
  has_inference_dataset: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
37
+ metadata_candidate_columns_string: dsl.OutputPath(str), # pytype: disable=invalid-annotation
36
38
  metadata_large_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
37
39
  metadata_reference_model_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation
38
40
  metadata_reward_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
@@ -43,11 +45,18 @@ def rlhf_preprocessor(
43
45
  metadata_accelerator_count: dsl.OutputPath(int), # pytype: disable=invalid-annotation
44
46
  metadata_refined_image_uri: dsl.OutputPath(str), # pytype: disable=invalid-annotation
45
47
  metadata_num_microbatches: dsl.OutputPath(int), # pytype: disable=invalid-annotation
48
+ metadata_upload_location: dsl.OutputPath(str), # pytype: disable=invalid-annotation
49
+ metadata_deploy_model: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
50
+ metadata_model_display_name: dsl.OutputPath(str), # pytype: disable=invalid-annotation
51
+ metadata_upload_model: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
46
52
  use_experimental_image: bool = False,
47
53
  evaluation_dataset: str = '',
48
54
  tensorboard_resource_id: str = '',
49
55
  input_reference_model_path: str = '',
50
56
  image_uri: str = utils.get_default_image_uri('refined_cpu', ''),
57
+ upload_location: str = '',
58
+ model_display_name: str = '',
59
+ deploy_model: bool = True,
51
60
  ) -> dsl.ContainerSpec: # pylint: disable=g-doc-args
52
61
  # fmt: off
53
62
  """Preprocess RLHF pipeline inputs.
@@ -68,6 +77,9 @@ def rlhf_preprocessor(
68
77
  metadata_reward_model_reference: The base model for training reward model. The name should be in capitalized snake case format.
69
78
  metadata_reward_model_path: The model checkpoint path for the reward model.
70
79
  image_uri: Docker image URI to use for the custom job.
80
+ upload_location: Region where the model will be uploaded.
81
+ model_display_name: Display name of the model.
82
+ deploy_model: Whether to deploy the model.
71
83
 
72
84
  Returns:
73
85
  gcp_resources: GCP resources that can be used to track the custom job.
@@ -80,6 +92,10 @@ def rlhf_preprocessor(
80
92
  metadata_refined_image_uri: Docker image URI to use for the custom job.
81
93
  metadata_num_microbatches: Number of microbatches to break the total batch
82
94
  size into during training.
95
+ metadata_upload_location: Regional endpoint.
96
+ metadata_deploy_model: Whether to deploy the model.
97
+ metadata_model_display_name: Display name of the model.
98
+ metadata_upload_model: Whether to upload the model.
83
99
  """
84
100
  # fmt: on
85
101
  return gcpc_utils.build_serverless_customjob_container_spec(
@@ -102,8 +118,12 @@ def rlhf_preprocessor(
102
118
  f'--artifact_registry={artifact_registry}',
103
119
  f'--tag={tag}',
104
120
  f'--use_experimental_image={use_experimental_image}',
121
+ f'--upload_location={upload_location}',
122
+ f'--deploy_model={deploy_model}',
123
+ f'--model_display_name={model_display_name}',
105
124
  f'--has_tensorboard_id_path={has_tensorboard_id}',
106
125
  f'--has_inference_dataset_path={has_inference_dataset}',
126
+ f'--metadata_candidate_columns_string_path={metadata_candidate_columns_string}',
107
127
  f'--metadata_large_model_reference_path={metadata_large_model_reference}',
108
128
  f'--metadata_reference_model_path_path={metadata_reference_model_path}',
109
129
  f'--metadata_reward_model_reference_path={metadata_reward_model_reference}',
@@ -114,6 +134,10 @@ def rlhf_preprocessor(
114
134
  f'--metadata_accelerator_count_path={metadata_accelerator_count}',
115
135
  f'--metadata_refined_image_uri_path={metadata_refined_image_uri}',
116
136
  f'--metadata_num_microbatches_path={metadata_num_microbatches}',
137
+ f'--metadata_upload_location_path={metadata_upload_location}',
138
+ f'--metadata_deploy_model_path={metadata_deploy_model}',
139
+ f'--metadata_model_display_name_path={metadata_model_display_name}',
140
+ f'--metadata_upload_model_path={metadata_upload_model}',
117
141
  ],
118
142
  ),
119
143
  gcp_resources=gcp_resources,
@@ -25,27 +25,18 @@ from google_cloud_pipeline_components._implementation.model_evaluation.feature_a
25
25
  from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
26
26
  from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp
27
27
  from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp
28
- from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
29
28
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp
30
- from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding.evaluation_llm_embedding_pipeline import evaluation_llm_embedding_pipeline
31
29
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp
32
30
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp
33
31
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation_preprocessor.component import llm_evaluation_dataset_preprocessor_graph_component as LLMEvaluationPreprocessorOp
34
32
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp
35
33
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp
36
34
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp
37
- from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline
38
- from google_cloud_pipeline_components._implementation.model_evaluation.model_inference.component import model_inference_and_evaluation_component
39
- from google_cloud_pipeline_components._implementation.model_evaluation.model_inference.component import model_inference_component
40
35
  from google_cloud_pipeline_components._implementation.model_evaluation.model_name_preprocessor.component import model_name_preprocessor as ModelNamePreprocessorOp
41
36
  from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp
42
- from google_cloud_pipeline_components._implementation.model_evaluation.text2sql.evaluation_llm_text2sql_pipeline import evaluation_llm_text2sql_pipeline
43
37
 
44
38
 
45
39
  __all__ = [
46
- 'evaluation_llm_safety_bias_pipeline',
47
- 'evaluation_llm_embedding_pipeline',
48
- 'evaluation_llm_text2sql_pipeline',
49
40
  'evaluation_llm_endpoint_batch_predict_pipeline_graph_component',
50
41
  'ChunkingOp',
51
42
  'EvaluationDataSamplerOp',
@@ -63,9 +54,6 @@ __all__ = [
63
54
  'LLMSafetyBiasMetricsOp',
64
55
  'ModelEvaluationFeatureAttributionOp',
65
56
  'ModelImportEvaluatedAnnotationOp',
66
- 'ModelImportEvaluationOp',
67
57
  'ModelNamePreprocessorOp',
68
58
  'TargetFieldDataRemoverOp',
69
- 'model_inference_component',
70
- 'model_inference_and_evaluation_component',
71
59
  ]
@@ -14,11 +14,12 @@
14
14
  """LLM embedding evaluation pipeline based on information retrieval (IR) task."""
15
15
 
16
16
  from typing import Dict, Optional, Union
17
+
17
18
  from google_cloud_pipeline_components._implementation.model_evaluation.endpoint_batch_predict.component import evaluation_llm_endpoint_batch_predict_pipeline_graph_component as LLMEndpointBatchPredictOp
18
- from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
19
19
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp
20
20
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp
21
21
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp
22
+ from google_cloud_pipeline_components.preview.model_evaluation.model_evaluation_import_component import model_evaluation_import as ModelImportEvaluationOp
22
23
  from google_cloud_pipeline_components.types.artifact_types import VertexModel
23
24
  from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
24
25
  import kfp
@@ -38,6 +38,8 @@ def evaluation_dataset_preprocessor_internal(
38
38
  output_dirs: dsl.OutputPath(list),
39
39
  gcp_resources: dsl.OutputPath(str),
40
40
  input_field_name: str = 'input_text',
41
+ role_field_name: str = 'role',
42
+ model_name: str = 'publishers/google/model/text-bison@002',
41
43
  display_name: str = 'llm_evaluation_dataset_preprocessor_component',
42
44
  machine_type: str = 'e2-highmem-16',
43
45
  service_account: str = '',
@@ -56,6 +58,9 @@ def evaluation_dataset_preprocessor_internal(
56
58
  gcs_source_uris: A json escaped list of GCS URIs of the input eval dataset.
57
59
  input_field_name: The field name of the input eval dataset instances that
58
60
  contains the input prompts to the LLM.
61
+ role_field_name: The field name of the role for input eval dataset instances
62
+ that contains the input prompts to the LLM.
63
+ model_name: Name of the model being used to create model-specific schemas.
59
64
  machine_type: The machine type of this custom job. If not set, defaulted
60
65
  to `e2-highmem-16`. More details:
61
66
  https://cloud.google.com/compute/docs/machine-resource
@@ -92,6 +97,8 @@ def evaluation_dataset_preprocessor_internal(
92
97
  f'--eval_dataset_preprocessor={True}',
93
98
  f'--gcs_source_uris={gcs_source_uris}',
94
99
  f'--input_field_name={input_field_name}',
100
+ f'--role_field_name={role_field_name}',
101
+ f'--model_name={model_name}',
95
102
  f'--output_dirs={output_dirs}',
96
103
  '--executor_input={{$.json_escape[1]}}',
97
104
  ],
@@ -109,6 +116,8 @@ def llm_evaluation_dataset_preprocessor_graph_component(
109
116
  location: str,
110
117
  gcs_source_uris: List[str],
111
118
  input_field_name: str = 'input_text',
119
+ role_field_name: str = 'role',
120
+ model_name: str = 'publishers/google/model/text-bison@002',
112
121
  display_name: str = 'llm_evaluation_dataset_preprocessor_component',
113
122
  machine_type: str = 'e2-standard-4',
114
123
  service_account: str = '',
@@ -126,6 +135,9 @@ def llm_evaluation_dataset_preprocessor_graph_component(
126
135
  gcs_source_uris: A list of GCS URIs of the input eval dataset.
127
136
  input_field_name: The field name of the input eval dataset instances that
128
137
  contains the input prompts to the LLM.
138
+ role_field_name: The field name of the role for input eval dataset
139
+ instances that contains the input prompts to the LLM.
140
+ model_name: Name of the model being used to create model-specific schemas.
129
141
  display_name: The name of the Evaluation job.
130
142
  machine_type: The machine type of this custom job. If not set, defaulted
131
143
  to `e2-standard-4`. More details:
@@ -163,6 +175,8 @@ def llm_evaluation_dataset_preprocessor_graph_component(
163
175
  input_list=gcs_source_uris
164
176
  ).output,
165
177
  input_field_name=input_field_name,
178
+ role_field_name=role_field_name,
179
+ model_name=model_name,
166
180
  display_name=display_name,
167
181
  machine_type=machine_type,
168
182
  service_account=service_account,
@@ -0,0 +1,41 @@
1
+ # Copyright 2024 The Kubeflow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from google_cloud_pipeline_components._implementation.starry_net.dataprep.component import dataprep as DataprepOp
15
+ from google_cloud_pipeline_components._implementation.starry_net.evaluation.component import evaluation as EvaluationOp
16
+ from google_cloud_pipeline_components._implementation.starry_net.get_training_artifacts.component import get_training_artifacts as GetTrainingArtifactsOp
17
+ from google_cloud_pipeline_components._implementation.starry_net.maybe_set_tfrecord_args.component import maybe_set_tfrecord_args as MaybeSetTfrecordArgsOp
18
+ from google_cloud_pipeline_components._implementation.starry_net.set_dataprep_args.component import set_dataprep_args as SetDataprepArgsOp
19
+ from google_cloud_pipeline_components._implementation.starry_net.set_eval_args.component import set_eval_args as SetEvalArgsOp
20
+ from google_cloud_pipeline_components._implementation.starry_net.set_test_set.component import set_test_set as SetTestSetOp
21
+ from google_cloud_pipeline_components._implementation.starry_net.set_tfrecord_args.component import set_tfrecord_args as SetTfrecordArgsOp
22
+ from google_cloud_pipeline_components._implementation.starry_net.set_train_args.component import set_train_args as SetTrainArgsOp
23
+ from google_cloud_pipeline_components._implementation.starry_net.train.component import train as TrainOp
24
+ from google_cloud_pipeline_components._implementation.starry_net.upload_decomposition_plots.component import upload_decomposition_plots as UploadDecompositionPlotsOp
25
+ from google_cloud_pipeline_components._implementation.starry_net.upload_model.component import upload_model as UploadModelOp
26
+
27
+
28
+ __all__ = [
29
+ 'DataprepOp',
30
+ 'EvaluationOp',
31
+ 'GetTrainingArtifactsOp',
32
+ 'MaybeSetTfrecordArgsOp',
33
+ 'SetDataprepArgsOp',
34
+ 'SetEvalArgsOp',
35
+ 'SetTestSetOp',
36
+ 'SetTfrecordArgsOp',
37
+ 'SetTrainArgsOp',
38
+ 'TrainOp',
39
+ 'UploadDecompositionPlotsOp',
40
+ 'UploadModelOp',
41
+ ]
@@ -1,4 +1,4 @@
1
- # Copyright 2023 The Kubeflow Authors. All Rights Reserved.
1
+ # Copyright 2024 The Kubeflow Authors. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,4 +11,3 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Google Cloud Pipeline Evaluation Import Evaluation Component."""