google-cloud-pipeline-components 2.13.1__py3-none-any.whl → 2.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (82) hide show
  1. google_cloud_pipeline_components/__init__.py +5 -6
  2. google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +12 -34
  3. google_cloud_pipeline_components/_implementation/llm/env.py +1 -1
  4. google_cloud_pipeline_components/_implementation/llm/function_based.py +14 -48
  5. google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
  6. google_cloud_pipeline_components/_implementation/llm/infer_preprocessor.py +109 -0
  7. google_cloud_pipeline_components/_implementation/llm/online_evaluation_pairwise.py +8 -0
  8. google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py +27 -36
  9. google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +31 -47
  10. google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +84 -0
  11. google_cloud_pipeline_components/_implementation/llm/validate_pipeline.py +11 -0
  12. google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +0 -12
  13. google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py +2 -1
  14. google_cloud_pipeline_components/_placeholders.py +30 -1
  15. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +1 -1
  16. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +2 -2
  17. google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +2 -2
  18. google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +34 -34
  19. google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +34 -34
  20. google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +34 -34
  21. google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +34 -34
  22. google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +1 -1
  23. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +39 -39
  24. google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +41 -41
  25. google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +2 -2
  26. google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +2 -2
  27. google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +4 -4
  28. google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +3 -3
  29. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +2 -2
  30. google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +17 -17
  31. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +2 -2
  32. google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +15 -15
  33. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +2 -2
  34. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +16 -16
  35. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +2 -2
  36. google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +15 -15
  37. google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +14 -14
  38. google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +13 -13
  39. google_cloud_pipeline_components/preview/automl/vision/data_converter.py +3 -1
  40. google_cloud_pipeline_components/preview/custom_job/component.py +2 -2
  41. google_cloud_pipeline_components/preview/custom_job/utils.py +3 -2
  42. google_cloud_pipeline_components/preview/llm/infer/component.py +22 -25
  43. google_cloud_pipeline_components/preview/llm/rlhf/component.py +72 -10
  44. google_cloud_pipeline_components/preview/model_evaluation/__init__.py +5 -2
  45. google_cloud_pipeline_components/preview/model_evaluation/model_evaluation_import_component.py +209 -0
  46. google_cloud_pipeline_components/proto/task_error_pb2.py +33 -0
  47. google_cloud_pipeline_components/proto/template_metadata_pb2.py +22 -15
  48. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +10 -10
  49. google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +31 -31
  50. google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +13 -13
  51. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +13 -3
  52. google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +18 -15
  53. google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +37 -37
  54. google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +2 -2
  55. google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +2 -2
  56. google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +1 -1
  57. google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +1 -1
  58. google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +1 -1
  59. google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +2 -2
  60. google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +2 -2
  61. google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +1 -1
  62. google_cloud_pipeline_components/v1/automl/tabular/transform.py +2 -2
  63. google_cloud_pipeline_components/v1/model_evaluation/__init__.py +3 -1
  64. google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +2 -2
  65. google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +8 -10
  66. google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +2 -2
  67. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +2 -2
  68. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +2 -2
  69. google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +2 -2
  70. google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +2 -2
  71. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_classification_pipeline.py +4 -2
  72. google_cloud_pipeline_components/v1/model_evaluation/evaluation_llm_text_generation_pipeline.py +4 -2
  73. google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/__init__.py +2 -2
  74. google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +1 -0
  75. google_cloud_pipeline_components/version.py +1 -1
  76. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.1.dist-info}/METADATA +18 -19
  77. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.1.dist-info}/RECORD +81 -79
  78. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.1.dist-info}/WHEEL +1 -1
  79. google_cloud_pipeline_components/proto/preflight_validations_pb2.py +0 -47
  80. /google_cloud_pipeline_components/{preview → v1}/model_evaluation/model_based_llm_evaluation/autosxs/__init__.py +0 -0
  81. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.1.dist-info}/LICENSE +0 -0
  82. {google_cloud_pipeline_components-2.13.1.dist-info → google_cloud_pipeline_components-2.14.1.dist-info}/top_level.txt +0 -0
@@ -21,12 +21,12 @@ from google_cloud_pipeline_components._implementation.llm import function_based
21
21
  from google_cloud_pipeline_components._implementation.llm import preprocess_chat_dataset
22
22
  from google_cloud_pipeline_components._implementation.llm import private_text_comparison_importer
23
23
  from google_cloud_pipeline_components._implementation.llm import reward_model_trainer
24
+ from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
24
25
  from google_cloud_pipeline_components._implementation.llm import upload_tensorboard_metrics
25
26
  import kfp
26
27
 
27
28
  PipelineOutput = NamedTuple(
28
29
  'Outputs',
29
- reward_model_base_path=str,
30
30
  reward_model_adapter_path=str,
31
31
  reward_dataset_path=str,
32
32
  )
@@ -39,6 +39,14 @@ PipelineOutput = NamedTuple(
39
39
  def pipeline(
40
40
  preference_dataset: str,
41
41
  large_model_reference: str,
42
+ reward_model_reference: str,
43
+ reward_model_path: str,
44
+ machine_type: str,
45
+ tuning_location: str,
46
+ accelerator_type: str,
47
+ accelerator_count: int,
48
+ reward_model_image_uri: str,
49
+ comma_separated_candidates_field_names: str,
42
50
  prompt_sequence_length: int = 512,
43
51
  target_sequence_length: int = 64,
44
52
  batch_size: int = 64,
@@ -48,10 +56,10 @@ def pipeline(
48
56
  eval_dataset: Optional[str] = None,
49
57
  instruction: Optional[str] = None,
50
58
  project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
51
- accelerator_type: str = 'GPU',
52
59
  location: str = _placeholders.LOCATION_PLACEHOLDER,
53
60
  tensorboard_resource_id: str = '',
54
61
  encryption_spec_key_name: str = '',
62
+ num_microbatches: int = 0,
55
63
  ) -> PipelineOutput:
56
64
  # fmt: off
57
65
  """Trains a reward model.
@@ -59,6 +67,14 @@ def pipeline(
59
67
  Args:
60
68
  preference_dataset: Cloud storage path to a human preference JSONL dataset used to train a reward model. Each example in a preference dataset must contain `candidate_0` and `candidate_1` fields that contain candidate responses, `choice` that specifies the preferred candidate and either `input_text` (if tuning a text model) or `messages` (if tuning a chat model). Chat datasets must contain at least 1 message in a `messages` field. Each message must be valid JSON that contains `author` and `content` fields, where valid `author` values are `user` and `assistant` and `content` must be non-empty. Each row may contain multiple messages, but the first and last author must be the `user`. An optional `context` field may be provided for each example in a chat dataset. If provided, the `context` will preprended to the message `content`. The `instruction` serves as the default context. (Useful if most messages use the same system-level context.) Any context provided in the example will override the default value.
61
69
  large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`.
70
+ reward_model_reference: Name of the base model. The name should be in capitalized snake case format.
71
+ reward_model_path: The model checkpoint path for the reward model.
72
+ machine_type: The type of the machine to provision for the custom job. Must be a valid GCE instance type and compatible with the accelerator type.
73
+ tuning_location: The GCP region to run the custom job.
74
+ accelerator_type: Specific accelerator type for the custom job.
75
+ accelerator_count: The number of accelerator.
76
+ reward_model_image_uri: Docker image URI to use for the reward model training job.
77
+ comma_separated_candidates_field_names: Comma separated list of fields that contain candidate text, e.g. ``'field_1,field_2,field_3'``.
62
78
  prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
63
79
  target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
64
80
  batch_size: Number of examples in each finetuning step. Default is 64.
@@ -67,28 +83,18 @@ def pipeline(
67
83
  reward_model_train_steps: Number of steps to use when training a reward model. Default value is 1000.
68
84
  instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
69
85
  project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
70
- accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
71
86
  location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
72
87
  tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
73
88
  encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
89
+ num_microbatches: The number of microbatches to break the total batch size into during training.
74
90
 
75
91
  Returns:
76
- reward_model_base_path: Path to the base model used by the reward model.
77
92
  reward_model_adapter_path: Path to the output LoRA adapter.
78
93
  reward_dataset_path: Preference dataset use for tuning the reward model.
79
94
  """
80
95
  # fmt: on
81
96
  prompt_column = 'input_text'
82
- candidate_columns = ['candidate_0', 'candidate_1']
83
97
  choice_column = 'choice'
84
- machine_spec = function_based.resolve_machine_spec(
85
- accelerator_type=accelerator_type,
86
- use_test_spec=env.get_use_test_machine_spec(),
87
- ).set_display_name('Resolve Machine Spec')
88
-
89
- reference_model_metadata = function_based.resolve_reference_model_metadata(
90
- large_model_reference=large_model_reference,
91
- ).set_display_name('Resolve Model Metadata')
92
98
 
93
99
  processed_preference_dataset = (
94
100
  preprocess_chat_dataset.preprocess_chat_dataset(
@@ -99,9 +105,6 @@ def pipeline(
99
105
  ).set_display_name('Preprocess Prompt Dataset')
100
106
  )
101
107
 
102
- comma_separated_candidates_field_names = (
103
- function_based.convert_to_delimited_string(items=candidate_columns)
104
- )
105
108
  preference_dataset_importer = (
106
109
  private_text_comparison_importer.private_text_comparison_importer(
107
110
  project=project,
@@ -110,12 +113,10 @@ def pipeline(
110
113
  'processed_dataset_uri'
111
114
  ],
112
115
  inputs_field_name=prompt_column,
113
- comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
116
+ comma_separated_candidates_field_names=comma_separated_candidates_field_names,
114
117
  choice_field_name=choice_column,
115
118
  split=env.TRAIN_SPLIT,
116
- large_model_reference=reference_model_metadata.outputs[
117
- 'reward_model_reference'
118
- ],
119
+ large_model_reference=reward_model_reference,
119
120
  instruction=instruction,
120
121
  encryption_spec_key_name=encryption_spec_key_name,
121
122
  )
@@ -129,12 +130,10 @@ def pipeline(
129
130
  location=location,
130
131
  input_text=eval_dataset,
131
132
  inputs_field_name=prompt_column,
132
- comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
133
+ comma_separated_candidates_field_names=comma_separated_candidates_field_names,
133
134
  choice_field_name=choice_column,
134
135
  split=env.TRAIN_SPLIT,
135
- large_model_reference=reference_model_metadata.outputs[
136
- 'reward_model_reference'
137
- ],
136
+ large_model_reference=reward_model_reference,
138
137
  instruction=instruction,
139
138
  encryption_spec_key_name=encryption_spec_key_name,
140
139
  )
@@ -142,21 +141,11 @@ def pipeline(
142
141
  .set_caching_options(False)
143
142
  )
144
143
 
145
- reward_model_image_uri = function_based.resolve_private_refined_image_uri(
146
- accelerator_type=machine_spec.outputs['accelerator_type'],
147
- ).set_display_name('Resolve Reward Model Image URI')
148
- num_microbatches = function_based.resolve_num_microbatches(
149
- large_model_reference=reference_model_metadata.outputs[
150
- 'reward_model_reference'
151
- ]
152
- ).set_display_name('Resolve Number of Microbatches')
153
144
  reward_model = (
154
145
  reward_model_trainer.reward_model_trainer(
155
146
  project=project,
156
- location=machine_spec.outputs['tuning_location'],
157
- input_model_path=reference_model_metadata.outputs[
158
- 'reward_model_path'
159
- ],
147
+ location=tuning_location,
148
+ input_model_path=reward_model_path,
160
149
  input_dataset_path=preference_dataset_importer.outputs[
161
150
  'output_dataset_path'
162
151
  ],
@@ -164,19 +153,17 @@ def pipeline(
164
153
  'output_dataset_path'
165
154
  ],
166
155
  train_steps=reward_model_train_steps,
167
- accelerator_type=machine_spec.outputs['accelerator_type'],
168
- accelerator_count=machine_spec.outputs['accelerator_count'],
169
- large_model_reference=reference_model_metadata.outputs[
170
- 'reward_model_reference'
171
- ],
172
- machine_type=machine_spec.outputs['machine_type'],
173
- image_uri=reward_model_image_uri.output,
156
+ accelerator_type=accelerator_type,
157
+ accelerator_count=accelerator_count,
158
+ large_model_reference=reward_model_reference,
159
+ machine_type=machine_type,
160
+ image_uri=reward_model_image_uri,
174
161
  inputs_sequence_length=prompt_sequence_length,
175
162
  targets_sequence_length=target_sequence_length,
176
163
  batch_size=batch_size,
177
164
  learning_rate_multiplier=reward_model_learning_rate_multiplier,
178
165
  lora_dim=lora_dim,
179
- num_microbatches=num_microbatches.output,
166
+ num_microbatches=num_microbatches,
180
167
  encryption_spec_key_name=encryption_spec_key_name,
181
168
  tensorboard_resource_id=tensorboard_resource_id,
182
169
  )
@@ -185,9 +172,6 @@ def pipeline(
185
172
  )
186
173
 
187
174
  return PipelineOutput(
188
- reward_model_base_path=reference_model_metadata.outputs[
189
- 'reward_model_path'
190
- ],
191
175
  reward_model_adapter_path=reward_model.outputs['output_adapter_path'],
192
176
  reward_dataset_path=preference_dataset_importer.outputs[
193
177
  'output_dataset_path'
@@ -14,6 +14,7 @@
14
14
  """Component that preprocesses inputs for Reinforcement Learning from Human Feedback (RLHF)."""
15
15
 
16
16
  import os
17
+ from typing import List
17
18
 
18
19
  from google_cloud_pipeline_components import _placeholders
19
20
  from google_cloud_pipeline_components import utils as gcpc_utils
@@ -23,24 +24,80 @@ from kfp import dsl
23
24
 
24
25
  @dsl.container_component
25
26
  def rlhf_preprocessor(
27
+ large_model_reference: str,
28
+ accelerator_type: str,
29
+ use_test_spec: bool,
30
+ project: str,
31
+ location: str,
32
+ artifact_registry: str,
33
+ tag: str,
26
34
  gcp_resources: dsl.OutputPath(str), # pytype: disable=invalid-annotation
27
35
  has_tensorboard_id: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
28
36
  has_inference_dataset: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
37
+ metadata_candidate_columns_string: dsl.OutputPath(str), # pytype: disable=invalid-annotation
38
+ metadata_large_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
39
+ metadata_reference_model_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation
40
+ metadata_reward_model_reference: dsl.OutputPath(str), # pytype: disable=invalid-annotation
41
+ metadata_reward_model_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation
42
+ metadata_machine_type: dsl.OutputPath(str), # pytype: disable=invalid-annotation
43
+ metadata_tuning_location: dsl.OutputPath(str), # pytype: disable=invalid-annotation
44
+ metadata_accelerator_type: dsl.OutputPath(str), # pytype: disable=invalid-annotation
45
+ metadata_accelerator_count: dsl.OutputPath(int), # pytype: disable=invalid-annotation
46
+ metadata_refined_image_uri: dsl.OutputPath(str), # pytype: disable=invalid-annotation
47
+ metadata_num_microbatches: dsl.OutputPath(int), # pytype: disable=invalid-annotation
48
+ metadata_upload_location: dsl.OutputPath(str), # pytype: disable=invalid-annotation
49
+ metadata_deploy_model: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
50
+ metadata_model_display_name: dsl.OutputPath(str), # pytype: disable=invalid-annotation
51
+ metadata_upload_model: dsl.OutputPath(bool), # pytype: disable=invalid-annotation
52
+ use_experimental_image: bool = False,
29
53
  evaluation_dataset: str = '',
30
54
  tensorboard_resource_id: str = '',
55
+ input_reference_model_path: str = '',
31
56
  image_uri: str = utils.get_default_image_uri('refined_cpu', ''),
57
+ upload_location: str = '',
58
+ model_display_name: str = '',
59
+ deploy_model: bool = True,
32
60
  ) -> dsl.ContainerSpec: # pylint: disable=g-doc-args
61
+ # fmt: off
33
62
  """Preprocess RLHF pipeline inputs.
34
63
 
35
64
  Args:
65
+ large_model_reference: The model for fine tuning.
66
+ accelerator_type: Specific accelerator type for the job.
67
+ use_test_spec: Whether to use a lower resource machine for testing.
68
+ project: Project that contains the artifact registry.
69
+ location: Region that contains the artifact registry.
70
+ artifact_registry: Registry that contains Docker images.
71
+ tag: Image tag.
72
+ use_experimental_image: Whether to use refined experimental image.
36
73
  evaluation_dataset: Path to evaluation data.
37
74
  tensorboard_resource_id: TensorBoard resource id.
75
+ metadata_large_model_reference: The base model for fine tuning. The name should be in capitalized snake case format.
76
+ metadata_reference_model_path: The model checkpoint path for the reinforcer model
77
+ metadata_reward_model_reference: The base model for training reward model. The name should be in capitalized snake case format.
78
+ metadata_reward_model_path: The model checkpoint path for the reward model.
79
+ image_uri: Docker image URI to use for the custom job.
80
+ upload_location: Region where the model will be uploaded.
81
+ model_display_name: Display name of the model.
82
+ deploy_model: Whether to deploy the model.
38
83
 
39
84
  Returns:
40
85
  gcp_resources: GCP resources that can be used to track the custom job.
41
86
  has_tensorboard_id: Whether a tensorboard id is provided.
42
87
  has_inference_dataset: Whether inference data are provided.
88
+ metadata_machine_type: The type of the machine to provision for the custom job.
89
+ metadata_tuning_location: The GCP region to run the custom job.
90
+ metadata_accelerator_type: Specific accelerator type for the custom job.
91
+ metadata_accelerator_count: The number of accelerator.
92
+ metadata_refined_image_uri: Docker image URI to use for the custom job.
93
+ metadata_num_microbatches: Number of microbatches to break the total batch
94
+ size into during training.
95
+ metadata_upload_location: Regional endpoint.
96
+ metadata_deploy_model: Whether to deploy the model.
97
+ metadata_model_display_name: Display name of the model.
98
+ metadata_upload_model: Whether to upload the model.
43
99
  """
100
+ # fmt: on
44
101
  return gcpc_utils.build_serverless_customjob_container_spec(
45
102
  project=_placeholders.PROJECT_ID_PLACEHOLDER,
46
103
  location=_placeholders.LOCATION_PLACEHOLDER,
@@ -52,8 +109,35 @@ def rlhf_preprocessor(
52
109
  '--app_name=rlhf_preprocessor',
53
110
  f'--evaluation_dataset={evaluation_dataset}',
54
111
  f'--tensorboard_resource_id={tensorboard_resource_id}',
112
+ f'--large_model_reference={large_model_reference}',
113
+ f'--input_reference_model_path={input_reference_model_path}',
114
+ f'--accelerator_type={accelerator_type}',
115
+ f'--use_test_spec={use_test_spec}',
116
+ f'--project={project}',
117
+ f'--location={location}',
118
+ f'--artifact_registry={artifact_registry}',
119
+ f'--tag={tag}',
120
+ f'--use_experimental_image={use_experimental_image}',
121
+ f'--upload_location={upload_location}',
122
+ f'--deploy_model={deploy_model}',
123
+ f'--model_display_name={model_display_name}',
55
124
  f'--has_tensorboard_id_path={has_tensorboard_id}',
56
125
  f'--has_inference_dataset_path={has_inference_dataset}',
126
+ f'--metadata_candidate_columns_string_path={metadata_candidate_columns_string}',
127
+ f'--metadata_large_model_reference_path={metadata_large_model_reference}',
128
+ f'--metadata_reference_model_path_path={metadata_reference_model_path}',
129
+ f'--metadata_reward_model_reference_path={metadata_reward_model_reference}',
130
+ f'--metadata_reward_model_path_path={metadata_reward_model_path}',
131
+ f'--metadata_machine_type_path={metadata_machine_type}',
132
+ f'--metadata_tuning_location_path={metadata_tuning_location}',
133
+ f'--metadata_accelerator_type_path={metadata_accelerator_type}',
134
+ f'--metadata_accelerator_count_path={metadata_accelerator_count}',
135
+ f'--metadata_refined_image_uri_path={metadata_refined_image_uri}',
136
+ f'--metadata_num_microbatches_path={metadata_num_microbatches}',
137
+ f'--metadata_upload_location_path={metadata_upload_location}',
138
+ f'--metadata_deploy_model_path={metadata_deploy_model}',
139
+ f'--metadata_model_display_name_path={metadata_model_display_name}',
140
+ f'--metadata_upload_model_path={metadata_upload_model}',
57
141
  ],
58
142
  ),
59
143
  gcp_resources=gcp_resources,
@@ -79,8 +79,19 @@ def validate_pipeline(
79
79
  # ]
80
80
  # [ Check CMEK
81
81
  supported_pipeline_regions = {
82
+ 'asia-northeast1',
83
+ 'asia-northeast3',
84
+ 'asia-southeast1',
85
+ 'europe-west1',
86
+ 'europe-west2',
87
+ 'europe-west3',
82
88
  'europe-west4',
89
+ 'europe-west9',
90
+ 'northamerica-northeast1',
83
91
  'us-central1',
92
+ 'us-east4',
93
+ 'us-west1',
94
+ 'us-west4',
84
95
  }
85
96
  if location not in supported_pipeline_regions:
86
97
  raise ValueError(
@@ -25,27 +25,18 @@ from google_cloud_pipeline_components._implementation.model_evaluation.feature_a
25
25
  from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
26
26
  from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp
27
27
  from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp
28
- from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
29
28
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp
30
- from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding.evaluation_llm_embedding_pipeline import evaluation_llm_embedding_pipeline
31
29
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp
32
30
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp
33
31
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation_preprocessor.component import llm_evaluation_dataset_preprocessor_graph_component as LLMEvaluationPreprocessorOp
34
32
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp
35
33
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp
36
34
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp
37
- from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline
38
- from google_cloud_pipeline_components._implementation.model_evaluation.model_inference.component import model_inference_and_evaluation_component
39
- from google_cloud_pipeline_components._implementation.model_evaluation.model_inference.component import model_inference_component
40
35
  from google_cloud_pipeline_components._implementation.model_evaluation.model_name_preprocessor.component import model_name_preprocessor as ModelNamePreprocessorOp
41
36
  from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp
42
- from google_cloud_pipeline_components._implementation.model_evaluation.text2sql.evaluation_llm_text2sql_pipeline import evaluation_llm_text2sql_pipeline
43
37
 
44
38
 
45
39
  __all__ = [
46
- 'evaluation_llm_safety_bias_pipeline',
47
- 'evaluation_llm_embedding_pipeline',
48
- 'evaluation_llm_text2sql_pipeline',
49
40
  'evaluation_llm_endpoint_batch_predict_pipeline_graph_component',
50
41
  'ChunkingOp',
51
42
  'EvaluationDataSamplerOp',
@@ -63,9 +54,6 @@ __all__ = [
63
54
  'LLMSafetyBiasMetricsOp',
64
55
  'ModelEvaluationFeatureAttributionOp',
65
56
  'ModelImportEvaluatedAnnotationOp',
66
- 'ModelImportEvaluationOp',
67
57
  'ModelNamePreprocessorOp',
68
58
  'TargetFieldDataRemoverOp',
69
- 'model_inference_component',
70
- 'model_inference_and_evaluation_component',
71
59
  ]
@@ -14,11 +14,12 @@
14
14
  """LLM embedding evaluation pipeline based on information retrieval (IR) task."""
15
15
 
16
16
  from typing import Dict, Optional, Union
17
+
17
18
  from google_cloud_pipeline_components._implementation.model_evaluation.endpoint_batch_predict.component import evaluation_llm_endpoint_batch_predict_pipeline_graph_component as LLMEndpointBatchPredictOp
18
- from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
19
19
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp
20
20
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp
21
21
  from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp
22
+ from google_cloud_pipeline_components.preview.model_evaluation.model_evaluation_import_component import model_evaluation_import as ModelImportEvaluationOp
22
23
  from google_cloud_pipeline_components.types.artifact_types import VertexModel
23
24
  from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
24
25
  import kfp
@@ -13,9 +13,38 @@
13
13
  # limitations under the License.
14
14
  """Placeholders for use in component authoring."""
15
15
 
16
- # prefer not using PIPELINE_TASK_ prefix like KFP does for reduced verbosity
16
+ # prefer not using PIPELINE_TASK_ or PIPELINE_ prefix like KFP does for reduced
17
+ # verbosity
17
18
  PROJECT_ID_PLACEHOLDER = "{{$.pipeline_google_cloud_project_id}}"
19
+ """A placeholder used to obtain Google Cloud project id where the pipeline
20
+ executes. The placeholder value is set at pipeline runtime.
21
+ """
18
22
  LOCATION_PLACEHOLDER = "{{$.pipeline_google_cloud_location}}"
23
+ """A placeholder used to obtain Google Cloud location where the pipeline
24
+ executes. The placeholder value is set at pipeline runtime.
25
+ """
26
+ SERVICE_ACCOUNT_PLACEHOLDER = "{{$.pipeline_service_account}}"
27
+ """A placeholder used to obtain service account that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs).
28
+ If PipelineJob doesn't have a service account set, this placeholder will be resolved to default service account.
29
+ The placeholder value is set at pipeline runtime.
30
+ """
31
+ NETWORK_PLACEHOLDER = "{{$.pipeline_network}}"
32
+ """A placeholder used to obtain network that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs).
33
+ If PipelineJob doesn't have a network set, this placeholder will be empty. The
34
+ placeholder value is set at pipeline runtime.
35
+ """
36
+ PERSISTENT_RESOURCE_ID_PLACEHOLDER = "{{$.pipeline_persistent_resource_id}}"
37
+ """A placeholder used to obtain persistent resource id that is defined in
38
+ PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig).
39
+ If PipelineJob doesn't have a persistent resource id, this placeholder will be
40
+ empty. The placeholder value is set at pipeline runtime.
41
+ """
42
+ ENCRYPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER = "{{$.pipeline_encryption_key_name}}"
43
+ """A placeholder used to obtain kmsKeyName that is defined in
44
+ PipelineJob's [EncryptionSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/EncryptionSpec).
45
+ If PipelineJob doesn't have a encryption key name, this placeholder will be
46
+ empty. The placeholder value is set at pipeline runtime.
47
+ """
19
48
 
20
49
 
21
50
  # omit placeholder type annotation to avoid dependency on KFP SDK internals
@@ -72,7 +72,7 @@ def automl_forecasting_ensemble(
72
72
  # fmt: on
73
73
  job_id = dsl.PIPELINE_JOB_ID_PLACEHOLDER
74
74
  task_id = dsl.PIPELINE_TASK_ID_PLACEHOLDER
75
- image_uri = 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325'
75
+ image_uri = 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240419_0625'
76
76
  display_name = f'automl-forecasting-ensemble-{job_id}-{task_id}'
77
77
 
78
78
  error_file_path = f'{root_dir}/{job_id}/{task_id}/error.pb'
@@ -99,14 +99,14 @@ def automl_forecasting_stage_1_tuner(
99
99
  ' 1, "machine_spec": {"machine_type": "n1-standard-8"},'
100
100
  ' "container_spec": {"image_uri":"'
101
101
  ),
102
- 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325',
102
+ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240419_0625',
103
103
  '", "args": ["forecasting_mp_l2l_stage_1_tuner',
104
104
  '", "--region=',
105
105
  location,
106
106
  '", "--transform_output_path=',
107
107
  transform_output.uri,
108
108
  '", "--training_docker_uri=',
109
- 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325',
109
+ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240419_0625',
110
110
  '", "--reduce_search_space_mode=',
111
111
  reduce_search_space_mode,
112
112
  f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}',
@@ -97,14 +97,14 @@ def automl_forecasting_stage_2_tuner(
97
97
  ' 1, "machine_spec": {"machine_type": "n1-standard-8"},'
98
98
  ' "container_spec": {"image_uri":"'
99
99
  ),
100
- 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325',
100
+ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240419_0625',
101
101
  '", "args": ["forecasting_mp_l2l_stage_2_tuner',
102
102
  '", "--region=',
103
103
  location,
104
104
  '", "--transform_output_path=',
105
105
  transform_output.uri,
106
106
  '", "--training_docker_uri=',
107
- 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240214_1325',
107
+ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20240419_0625',
108
108
  f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}',
109
109
  '", "--training_base_dir=',
110
110
  root_dir,