google-cloud-pipeline-components 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of google-cloud-pipeline-components might be problematic. Click here for more details.
- google_cloud_pipeline_components/_implementation/llm/batch_prediction_pairwise.py +14 -4
- google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py +7 -0
- google_cloud_pipeline_components/_implementation/llm/deployment_graph.py +6 -1
- google_cloud_pipeline_components/_implementation/llm/function_based.py +74 -168
- google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py +1 -1
- google_cloud_pipeline_components/_implementation/llm/model_evaluation_text_generation_pairwise.py +45 -3
- google_cloud_pipeline_components/_implementation/llm/online_evaluation_pairwise.py +14 -2
- google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py +9 -2
- google_cloud_pipeline_components/_implementation/llm/private_text_importer.py +8 -1
- google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py +14 -28
- google_cloud_pipeline_components/_implementation/llm/reinforcer.py +13 -0
- google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py +36 -27
- google_cloud_pipeline_components/_implementation/llm/reward_model_trainer.py +17 -0
- google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py +60 -0
- google_cloud_pipeline_components/_implementation/llm/supervised_fine_tuner.py +1 -0
- google_cloud_pipeline_components/_implementation/llm/utils.py +25 -2
- google_cloud_pipeline_components/_implementation/llm/validate_pipeline.py +113 -0
- google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +2 -0
- google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py +1 -1
- google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +2 -2
- google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation_preprocessor/component.py +2 -2
- google_cloud_pipeline_components/_implementation/model_evaluation/model_name_preprocessor/__init__.py +14 -0
- google_cloud_pipeline_components/_implementation/model_evaluation/model_name_preprocessor/component.py +74 -0
- google_cloud_pipeline_components/_implementation/model_evaluation/version.py +1 -1
- google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py +7 -7
- google_cloud_pipeline_components/preview/llm/infer/__init__.py +13 -0
- google_cloud_pipeline_components/preview/llm/infer/component.py +10 -10
- google_cloud_pipeline_components/preview/llm/rlaif/component.py +10 -3
- google_cloud_pipeline_components/preview/llm/rlhf/component.py +43 -22
- google_cloud_pipeline_components/preview/model_evaluation/__init__.py +2 -2
- google_cloud_pipeline_components/preview/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +45 -3
- google_cloud_pipeline_components/proto/preflight_validations_pb2.py +19 -30
- google_cloud_pipeline_components/v1/custom_job/utils.py +22 -22
- google_cloud_pipeline_components/v1/model/get_model/component.py +1 -1
- google_cloud_pipeline_components/v1/model_evaluation/__init__.py +4 -0
- google_cloud_pipeline_components/{preview → v1}/model_evaluation/evaluation_llm_classification_pipeline.py +14 -2
- google_cloud_pipeline_components/{preview → v1}/model_evaluation/evaluation_llm_text_generation_pipeline.py +29 -17
- google_cloud_pipeline_components/version.py +1 -1
- {google_cloud_pipeline_components-2.10.0.dist-info → google_cloud_pipeline_components-2.12.0.dist-info}/METADATA +1 -2
- {google_cloud_pipeline_components-2.10.0.dist-info → google_cloud_pipeline_components-2.12.0.dist-info}/RECORD +43 -39
- {google_cloud_pipeline_components-2.10.0.dist-info → google_cloud_pipeline_components-2.12.0.dist-info}/WHEEL +1 -1
- {google_cloud_pipeline_components-2.10.0.dist-info → google_cloud_pipeline_components-2.12.0.dist-info}/LICENSE +0 -0
- {google_cloud_pipeline_components-2.10.0.dist-info → google_cloud_pipeline_components-2.12.0.dist-info}/top_level.txt +0 -0
google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py
CHANGED
|
@@ -41,7 +41,7 @@ def model_evaluation_text_generation(
|
|
|
41
41
|
ground_truth_gcs_source: str = '',
|
|
42
42
|
enable_row_based_metrics: bool = False,
|
|
43
43
|
display_name: str = 'model-evaluation-text-generation',
|
|
44
|
-
machine_type: str = 'e2-
|
|
44
|
+
machine_type: str = 'e2-standard-4',
|
|
45
45
|
service_account: str = '',
|
|
46
46
|
network: str = '',
|
|
47
47
|
reserved_ip_ranges: List[str] = [],
|
|
@@ -78,7 +78,7 @@ def model_evaluation_text_generation(
|
|
|
78
78
|
only ground truth files to be used for this evaluation.
|
|
79
79
|
display_name: The name of the evaluation custom job.
|
|
80
80
|
machine_type: The machine type of this custom job. If not set, defaulted to
|
|
81
|
-
`e2-
|
|
81
|
+
`e2-standard-4`. More details:
|
|
82
82
|
https://cloud.google.com/compute/docs/machine-resource
|
|
83
83
|
service_account: Sets the default service account for workload run-as
|
|
84
84
|
account. The service account running the pipeline
|
|
@@ -110,7 +110,7 @@ def llm_evaluation_dataset_preprocessor_graph_component(
|
|
|
110
110
|
gcs_source_uris: List[str],
|
|
111
111
|
input_field_name: str = 'input_text',
|
|
112
112
|
display_name: str = 'llm_evaluation_dataset_preprocessor_component',
|
|
113
|
-
machine_type: str = 'e2-
|
|
113
|
+
machine_type: str = 'e2-standard-4',
|
|
114
114
|
service_account: str = '',
|
|
115
115
|
network: str = '',
|
|
116
116
|
encryption_spec_key_name: str = '',
|
|
@@ -128,7 +128,7 @@ def llm_evaluation_dataset_preprocessor_graph_component(
|
|
|
128
128
|
contains the input prompts to the LLM.
|
|
129
129
|
display_name: The name of the Evaluation job.
|
|
130
130
|
machine_type: The machine type of this custom job. If not set, defaulted
|
|
131
|
-
to `e2-
|
|
131
|
+
to `e2-standard-4`. More details:
|
|
132
132
|
https://cloud.google.com/compute/docs/machine-resource
|
|
133
133
|
service_account: Sets the default service account for workload run-as
|
|
134
134
|
account. The service account running the pipeline
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Model name preprocessor Component."""
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Model name preprocessor component used in KFP pipelines."""
|
|
15
|
+
|
|
16
|
+
from google_cloud_pipeline_components._implementation.model_evaluation import version
|
|
17
|
+
from kfp.dsl import container_component
|
|
18
|
+
from kfp.dsl import ContainerSpec
|
|
19
|
+
from kfp.dsl import OutputPath
|
|
20
|
+
from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@container_component
|
|
24
|
+
def model_name_preprocessor(
|
|
25
|
+
gcp_resources: OutputPath(str),
|
|
26
|
+
processed_model_name: OutputPath(str),
|
|
27
|
+
project: str,
|
|
28
|
+
location: str,
|
|
29
|
+
model_name: str,
|
|
30
|
+
service_account: str = '',
|
|
31
|
+
):
|
|
32
|
+
"""Preprocess inputs for text2sql evaluation pipeline.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
project: Required. The GCP project that runs the pipeline component.
|
|
36
|
+
location: Required. The GCP region that runs the pipeline component.
|
|
37
|
+
model_name: The Model name used to run evaluation. Must be a publisher
|
|
38
|
+
Model or a managed Model sharing the same ancestor location. Starting
|
|
39
|
+
this job has no impact on any existing deployments of the Model and
|
|
40
|
+
their resources.
|
|
41
|
+
service_account: Sets the default service account for workload run-as
|
|
42
|
+
account. The service account running the pipeline
|
|
43
|
+
(https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
gcp_resources (str):
|
|
47
|
+
Serialized gcp_resources proto tracking the custom job.
|
|
48
|
+
processed_model_name (str):
|
|
49
|
+
Preprocessed model name.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
return ContainerSpec(
|
|
53
|
+
image=version.LLM_EVAL_IMAGE_TAG,
|
|
54
|
+
args=[
|
|
55
|
+
'--model_name_preprocessor',
|
|
56
|
+
'true',
|
|
57
|
+
'--project',
|
|
58
|
+
project,
|
|
59
|
+
'--location',
|
|
60
|
+
location,
|
|
61
|
+
'--root_dir',
|
|
62
|
+
f'{PIPELINE_ROOT_PLACEHOLDER}',
|
|
63
|
+
'--model_name',
|
|
64
|
+
model_name,
|
|
65
|
+
'--processed_model_name',
|
|
66
|
+
processed_model_name,
|
|
67
|
+
'--service_account',
|
|
68
|
+
service_account,
|
|
69
|
+
'--gcp_resources',
|
|
70
|
+
gcp_resources,
|
|
71
|
+
'--executor_input',
|
|
72
|
+
'{{$}}',
|
|
73
|
+
],
|
|
74
|
+
)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Version constants for model evaluation components."""
|
|
15
15
|
|
|
16
16
|
_EVAL_VERSION = 'v0.9.4'
|
|
17
|
-
_LLM_EVAL_VERSION = 'v0.
|
|
17
|
+
_LLM_EVAL_VERSION = 'v0.6'
|
|
18
18
|
|
|
19
19
|
_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/model-evaluation'
|
|
20
20
|
_LLM_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/llm-model-evaluation'
|
|
@@ -338,13 +338,13 @@ def main(argv):
|
|
|
338
338
|
and slice_spec['dimension'] == 'annotationSpec'
|
|
339
339
|
):
|
|
340
340
|
slice_config['model_explanation'] = {
|
|
341
|
-
'mean_attributions': [
|
|
342
|
-
|
|
343
|
-
'
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
]
|
|
341
|
+
'mean_attributions': [{
|
|
342
|
+
'feature_attributions': (
|
|
343
|
+
sliced_feature_attributions[slice_spec['value']]
|
|
344
|
+
if slice_spec['value'] in sliced_feature_attributions
|
|
345
|
+
else None
|
|
346
|
+
)
|
|
347
|
+
}]
|
|
348
348
|
}
|
|
349
349
|
slices_with_explanations.append(slice_config)
|
|
350
350
|
elif 'slice_spec' in slice_spec:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -41,7 +41,9 @@ def infer_pipeline(
|
|
|
41
41
|
sampling_strategy: str = 'greedy',
|
|
42
42
|
instruction: Optional[str] = None,
|
|
43
43
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
44
|
+
accelerator_type: str = 'GPU',
|
|
44
45
|
location: str = _placeholders.LOCATION_PLACEHOLDER,
|
|
46
|
+
encryption_spec_key_name: str = '',
|
|
45
47
|
) -> PipelineOutput:
|
|
46
48
|
# fmt: off
|
|
47
49
|
"""Uses a large-language model to perform bulk inference on a prompt dataset.
|
|
@@ -55,7 +57,9 @@ def infer_pipeline(
|
|
|
55
57
|
sampling_strategy: This field specifies the sampling strategy. The valid options are 'greedy' and 'temperature_sampling'.
|
|
56
58
|
instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
|
|
57
59
|
project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
|
|
58
|
-
|
|
60
|
+
accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
|
|
61
|
+
location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
|
|
62
|
+
encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
|
|
59
63
|
|
|
60
64
|
Returns:
|
|
61
65
|
Cloud storage path to output predictions.
|
|
@@ -63,7 +67,7 @@ def infer_pipeline(
|
|
|
63
67
|
# fmt: on
|
|
64
68
|
prompt_column = 'input_text'
|
|
65
69
|
machine_spec = function_based.resolve_machine_spec(
|
|
66
|
-
|
|
70
|
+
accelerator_type=accelerator_type,
|
|
67
71
|
use_test_spec=env.get_use_test_machine_spec(),
|
|
68
72
|
).set_display_name('Resolve Machine Spec')
|
|
69
73
|
reference_model_metadata = function_based.resolve_reference_model_metadata(
|
|
@@ -82,9 +86,6 @@ def infer_pipeline(
|
|
|
82
86
|
large_model_reference=large_model_reference,
|
|
83
87
|
instruction=instruction,
|
|
84
88
|
).set_display_name('Resolve Instruction')
|
|
85
|
-
prompt_dataset_image_uri = function_based.resolve_private_image_uri(
|
|
86
|
-
image_name='text_importer',
|
|
87
|
-
).set_display_name('Resolve Prompt Dataset Image URI')
|
|
88
89
|
prompt_dataset_importer = (
|
|
89
90
|
private_text_importer.private_text_importer(
|
|
90
91
|
project=project,
|
|
@@ -96,21 +97,19 @@ def infer_pipeline(
|
|
|
96
97
|
large_model_reference=reference_model_metadata.outputs[
|
|
97
98
|
'large_model_reference'
|
|
98
99
|
],
|
|
99
|
-
image_uri=prompt_dataset_image_uri.output,
|
|
100
100
|
instruction=resolved_text_instruction.output,
|
|
101
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
101
102
|
)
|
|
102
103
|
.set_display_name('Import Prompt Dataset')
|
|
103
104
|
.set_caching_options(False)
|
|
104
105
|
)
|
|
105
106
|
|
|
106
|
-
bulk_inferrer_image_uri = function_based.
|
|
107
|
-
image_name='infer',
|
|
107
|
+
bulk_inferrer_image_uri = function_based.resolve_private_refined_image_uri(
|
|
108
108
|
accelerator_type=machine_spec.outputs['accelerator_type'],
|
|
109
|
-
accelerator_count=machine_spec.outputs['accelerator_count'],
|
|
110
109
|
).set_display_name('Resolve Bulk Inferrer Image URI')
|
|
111
110
|
bulk_inference = bulk_inferrer.bulk_inferrer(
|
|
112
111
|
project=project,
|
|
113
|
-
location=
|
|
112
|
+
location=machine_spec.outputs['tuning_location'],
|
|
114
113
|
input_model=reference_model_metadata.outputs['reference_model_path'],
|
|
115
114
|
input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'],
|
|
116
115
|
dataset_split=env.TRAIN_SPLIT,
|
|
@@ -124,6 +123,7 @@ def infer_pipeline(
|
|
|
124
123
|
accelerator_count=machine_spec.outputs['accelerator_count'],
|
|
125
124
|
machine_type=machine_spec.outputs['machine_type'],
|
|
126
125
|
image_uri=bulk_inferrer_image_uri.output,
|
|
126
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
127
127
|
).set_display_name('Bulk Inferrer')
|
|
128
128
|
|
|
129
129
|
return PipelineOutput(
|
|
@@ -40,6 +40,7 @@ def rlaif_pipeline(
|
|
|
40
40
|
prompt_dataset: str,
|
|
41
41
|
preference_prompt_dataset: str,
|
|
42
42
|
large_model_reference: str,
|
|
43
|
+
task_type: str,
|
|
43
44
|
model_display_name: Optional[str] = None,
|
|
44
45
|
prompt_sequence_length: int = 512,
|
|
45
46
|
target_sequence_length: int = 64,
|
|
@@ -54,6 +55,7 @@ def rlaif_pipeline(
|
|
|
54
55
|
instruction: Optional[str] = None,
|
|
55
56
|
eval_dataset: Optional[str] = None,
|
|
56
57
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
58
|
+
accelerator_type: str = 'GPU',
|
|
57
59
|
location: str = _placeholders.LOCATION_PLACEHOLDER,
|
|
58
60
|
tensorboard_resource_id: Optional[str] = None,
|
|
59
61
|
) -> PipelineOutput:
|
|
@@ -64,7 +66,9 @@ def rlaif_pipeline(
|
|
|
64
66
|
|
|
65
67
|
Args:
|
|
66
68
|
prompt_dataset: Cloud storage path to an unlabled JSONL dataset that contains prompts. Text datasets must contain an `input_text` field that contains the prompt. Chat datasets must contain at least 1 message in a `messages` field. Each message must be valid JSON that contains `author` and `content` fields, where valid `author` values are `user` and `assistant` and `content` must be non-empty. Each row may contain multiple messages, but the first and last author must be the `user`. An optional `context` field may be provided for each example in a chat dataset. If provided, the `context` will preprended to the message `content`. The `instruction` serves as the default context. (Useful if most messages use the same system-level context.) Any context provided in the example will override the default value.
|
|
67
|
-
preference_prompt_dataset: The prompt dataset used for two models' inferences to build the side by side comparison AI feedback.
|
|
69
|
+
preference_prompt_dataset: The prompt dataset used for two models' inferences to build the side by side comparison AI feedback.
|
|
70
|
+
large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`.
|
|
71
|
+
task_type: Evaluation task in the form {task}@{version}. task can be one of "summarization", "question_answering". Version is an integer with 3 digits or "latest". Ex: summarization@001 or question_answering@latest.
|
|
68
72
|
model_display_name: Name of the fine-tuned model shown in the Model Registry. If not provided, a default name will be created.
|
|
69
73
|
prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
|
|
70
74
|
target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
|
|
@@ -79,6 +83,7 @@ def rlaif_pipeline(
|
|
|
79
83
|
instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g., "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
|
|
80
84
|
eval_dataset: Optional Cloud storage path to an evaluation dataset. If provided, inference will be performed on this dataset after training. The dataset format is jsonl. Each example in the dataset must contain a field `input_text` that contains the prompt.
|
|
81
85
|
project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
|
|
86
|
+
accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
|
|
82
87
|
location: Location used to run custom jobs. If not specified the location used to run the pipeline will be used.
|
|
83
88
|
tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
|
|
84
89
|
|
|
@@ -88,7 +93,6 @@ def rlaif_pipeline(
|
|
|
88
93
|
"""
|
|
89
94
|
# fmt: on
|
|
90
95
|
id_columns = ['content']
|
|
91
|
-
task = 'summarization@001'
|
|
92
96
|
deploy_model = True
|
|
93
97
|
|
|
94
98
|
output_prediction_gcs_path_a = infer.infer_pipeline(
|
|
@@ -100,6 +104,7 @@ def rlaif_pipeline(
|
|
|
100
104
|
instruction=instruction,
|
|
101
105
|
project=project,
|
|
102
106
|
location=location,
|
|
107
|
+
accelerator_type=accelerator_type,
|
|
103
108
|
).set_display_name('Inferrer A')
|
|
104
109
|
output_prediction_gcs_path_b = infer.infer_pipeline(
|
|
105
110
|
large_model_reference=large_model_b_reference,
|
|
@@ -110,6 +115,7 @@ def rlaif_pipeline(
|
|
|
110
115
|
instruction=instruction,
|
|
111
116
|
project=project,
|
|
112
117
|
location=location,
|
|
118
|
+
accelerator_type=accelerator_type,
|
|
113
119
|
).set_display_name('Inferrer B')
|
|
114
120
|
|
|
115
121
|
inference_output_uri = (
|
|
@@ -125,7 +131,7 @@ def rlaif_pipeline(
|
|
|
125
131
|
autosxs = online_evaluation_pairwise.online_evaluation_pairwise(
|
|
126
132
|
inference_output_uri=inference_output_uri,
|
|
127
133
|
id_columns=id_columns,
|
|
128
|
-
task=
|
|
134
|
+
task=task_type,
|
|
129
135
|
).set_display_name('Build AI Feedback')
|
|
130
136
|
|
|
131
137
|
preference_dataset = (
|
|
@@ -155,6 +161,7 @@ def rlaif_pipeline(
|
|
|
155
161
|
project=project,
|
|
156
162
|
location=location,
|
|
157
163
|
tensorboard_resource_id=tensorboard_resource_id,
|
|
164
|
+
accelerator_type=accelerator_type,
|
|
158
165
|
)
|
|
159
166
|
.set_display_name('Reinforcement Learning From AI Feedback')
|
|
160
167
|
.outputs
|
|
@@ -17,9 +17,11 @@ from typing import NamedTuple, Optional
|
|
|
17
17
|
|
|
18
18
|
from google_cloud_pipeline_components import _placeholders
|
|
19
19
|
from google_cloud_pipeline_components._implementation.llm import deployment_graph
|
|
20
|
+
from google_cloud_pipeline_components._implementation.llm import env
|
|
20
21
|
from google_cloud_pipeline_components._implementation.llm import function_based
|
|
21
22
|
from google_cloud_pipeline_components._implementation.llm import reinforcement_learning_graph
|
|
22
23
|
from google_cloud_pipeline_components._implementation.llm import reward_model_graph
|
|
24
|
+
from google_cloud_pipeline_components._implementation.llm import validate_pipeline
|
|
23
25
|
from google_cloud_pipeline_components.preview.llm.infer import component
|
|
24
26
|
import kfp
|
|
25
27
|
|
|
@@ -48,8 +50,10 @@ def rlhf_pipeline(
|
|
|
48
50
|
deploy_model: bool = True,
|
|
49
51
|
eval_dataset: Optional[str] = None,
|
|
50
52
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
53
|
+
accelerator_type: str = 'GPU',
|
|
51
54
|
location: str = _placeholders.LOCATION_PLACEHOLDER,
|
|
52
|
-
|
|
55
|
+
encryption_spec_key_name: str = '',
|
|
56
|
+
tensorboard_resource_id: str = '',
|
|
53
57
|
) -> PipelineOutput:
|
|
54
58
|
# fmt: off
|
|
55
59
|
"""Performs reinforcement learning from human feedback.
|
|
@@ -68,9 +72,11 @@ def rlhf_pipeline(
|
|
|
68
72
|
kl_coeff: Coefficient for KL penalty. This regularizes the policy model and penalizes if it diverges from its initial distribution. If set to 0, the reference language model is not loaded into memory. Default value is 0.1.
|
|
69
73
|
instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
|
|
70
74
|
deploy_model: Whether to deploy the model to an endpoint in `us-central1`. Default is True.
|
|
71
|
-
eval_dataset: Optional Cloud storage path to an evaluation dataset.
|
|
75
|
+
eval_dataset: Optional Cloud storage path to an evaluation dataset. The dataset format is jsonl. The evaluation dataset can be used to compute train-time metrics (when training a reward model) or perform bulk inference for third-party models. To compute train-time metrics this dataset must contain the same fields as the peference dataset. For bulk inference with third-party models only `input_text` is needed. Note, train-time metrics are only computed for the first 5000 samples in the dataset for efficient evaluation during training.
|
|
72
76
|
project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
|
|
73
|
-
|
|
77
|
+
accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
|
|
78
|
+
location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
|
|
79
|
+
encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
|
|
74
80
|
tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
|
|
75
81
|
|
|
76
82
|
Returns:
|
|
@@ -78,31 +84,40 @@ def rlhf_pipeline(
|
|
|
78
84
|
endpoint_resource_name: Path the Online Prediction Endpoint. This will be an empty string if the model was not deployed.
|
|
79
85
|
"""
|
|
80
86
|
# fmt: on
|
|
81
|
-
|
|
82
87
|
# LoRA dim for reward model
|
|
83
88
|
reward_lora_dim = 4
|
|
84
89
|
|
|
85
|
-
|
|
86
|
-
|
|
90
|
+
validate_pipeline_task = validate_pipeline.validate_pipeline(
|
|
91
|
+
accelerator_type=accelerator_type,
|
|
92
|
+
location=location,
|
|
93
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
87
94
|
eval_dataset=eval_dataset,
|
|
88
95
|
).set_display_name('Validate Inputs')
|
|
89
96
|
|
|
90
97
|
reward_model_pipeline = (
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
98
|
+
(
|
|
99
|
+
reward_model_graph.pipeline(
|
|
100
|
+
preference_dataset=preference_dataset,
|
|
101
|
+
large_model_reference=large_model_reference,
|
|
102
|
+
prompt_sequence_length=prompt_sequence_length,
|
|
103
|
+
target_sequence_length=target_sequence_length,
|
|
104
|
+
eval_dataset=validate_pipeline_task.outputs[
|
|
105
|
+
'reward_model_eval_dataset'
|
|
106
|
+
],
|
|
107
|
+
instruction=instruction,
|
|
108
|
+
reward_model_learning_rate_multiplier=reward_model_learning_rate_multiplier,
|
|
109
|
+
reward_model_train_steps=reward_model_train_steps,
|
|
110
|
+
lora_dim=reward_lora_dim,
|
|
111
|
+
project=project,
|
|
112
|
+
location=location,
|
|
113
|
+
accelerator_type=accelerator_type,
|
|
114
|
+
tensorboard_resource_id=tensorboard_resource_id,
|
|
115
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
116
|
+
)
|
|
103
117
|
)
|
|
104
|
-
|
|
105
|
-
|
|
118
|
+
.set_display_name('Train Reward Model')
|
|
119
|
+
.after(validate_pipeline_task)
|
|
120
|
+
)
|
|
106
121
|
rl_model_pipeline = reinforcement_learning_graph.pipeline(
|
|
107
122
|
prompt_dataset=prompt_dataset,
|
|
108
123
|
input_reward_model_path=reward_model_pipeline.outputs[
|
|
@@ -123,8 +138,10 @@ def rlhf_pipeline(
|
|
|
123
138
|
instruction=instruction,
|
|
124
139
|
reward_lora_dim=reward_lora_dim,
|
|
125
140
|
project=project,
|
|
141
|
+
accelerator_type=accelerator_type,
|
|
126
142
|
location=location,
|
|
127
143
|
tensorboard_resource_id=tensorboard_resource_id,
|
|
144
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
128
145
|
).set_display_name('Reinforcement Learning')
|
|
129
146
|
|
|
130
147
|
has_inference_dataset = function_based.value_exists(
|
|
@@ -135,7 +152,7 @@ def rlhf_pipeline(
|
|
|
135
152
|
name='Perform Inference',
|
|
136
153
|
):
|
|
137
154
|
has_model_checkpoint = function_based.value_exists(
|
|
138
|
-
value=rl_model_pipeline.outputs['
|
|
155
|
+
value=rl_model_pipeline.outputs['output_model_path']
|
|
139
156
|
).set_display_name('Resolve Model Checkpoint')
|
|
140
157
|
with kfp.dsl.Condition(
|
|
141
158
|
has_model_checkpoint.output == True, # pylint: disable=singleton-comparison
|
|
@@ -145,11 +162,13 @@ def rlhf_pipeline(
|
|
|
145
162
|
project=project,
|
|
146
163
|
location=location,
|
|
147
164
|
large_model_reference=large_model_reference,
|
|
148
|
-
model_checkpoint=rl_model_pipeline.outputs['
|
|
165
|
+
model_checkpoint=rl_model_pipeline.outputs['output_model_path'],
|
|
149
166
|
prompt_dataset=eval_dataset,
|
|
150
167
|
prompt_sequence_length=prompt_sequence_length,
|
|
151
168
|
target_sequence_length=target_sequence_length,
|
|
152
169
|
instruction=instruction,
|
|
170
|
+
accelerator_type=accelerator_type,
|
|
171
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
153
172
|
)
|
|
154
173
|
|
|
155
174
|
llm_model_handler = deployment_graph.pipeline(
|
|
@@ -157,6 +176,8 @@ def rlhf_pipeline(
|
|
|
157
176
|
large_model_reference=large_model_reference,
|
|
158
177
|
model_display_name=model_display_name,
|
|
159
178
|
deploy_model=deploy_model,
|
|
179
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
180
|
+
upload_location=location,
|
|
160
181
|
).set_display_name('Upload and Deploy Tuned Model')
|
|
161
182
|
|
|
162
183
|
return PipelineOutput(
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
"""Model evaluation preview components."""
|
|
15
15
|
|
|
16
16
|
from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
|
|
17
|
-
from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
|
|
18
|
-
from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
|
|
19
17
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
|
20
18
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
|
|
21
19
|
from google_cloud_pipeline_components.preview.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
|
|
22
20
|
from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
|
|
21
|
+
from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
|
|
22
|
+
from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
|
|
23
23
|
|
|
24
24
|
__all__ = [
|
|
25
25
|
'autosxs_pipeline',
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"""Optimization AI Inference and AutoSxS pipeline function."""
|
|
15
15
|
|
|
16
|
-
from typing import Any, Dict, List
|
|
16
|
+
from typing import Any, Dict, List, NamedTuple
|
|
17
17
|
|
|
18
18
|
from google_cloud_pipeline_components import _placeholders
|
|
19
19
|
from google_cloud_pipeline_components._implementation.llm import batch_prediction_pairwise
|
|
@@ -21,6 +21,14 @@ from google_cloud_pipeline_components._implementation.llm import model_evaluatio
|
|
|
21
21
|
from google_cloud_pipeline_components._implementation.llm import online_evaluation_pairwise
|
|
22
22
|
from kfp import dsl
|
|
23
23
|
|
|
24
|
+
PipelineOutput = NamedTuple(
|
|
25
|
+
'Outputs',
|
|
26
|
+
model_a_evaluation_resource_name=str,
|
|
27
|
+
model_b_evaluation_resource_name=str,
|
|
28
|
+
evaluation_count=int,
|
|
29
|
+
evaluation_dataset_path=str,
|
|
30
|
+
)
|
|
31
|
+
|
|
24
32
|
|
|
25
33
|
# pylint: disable=dangerous-default-value,g-bare-generic,unused-argument
|
|
26
34
|
@dsl.pipeline(
|
|
@@ -46,7 +54,8 @@ def autosxs_pipeline(
|
|
|
46
54
|
judgments_format: str = 'jsonl',
|
|
47
55
|
bigquery_destination_prefix: str = '',
|
|
48
56
|
experimental_args: Dict[str, Any] = {},
|
|
49
|
-
|
|
57
|
+
encryption_spec_key_name: str = '',
|
|
58
|
+
) -> PipelineOutput:
|
|
50
59
|
# fmt: off
|
|
51
60
|
"""Evaluates two models side-by-side using an arbiter model.
|
|
52
61
|
|
|
@@ -69,6 +78,13 @@ def autosxs_pipeline(
|
|
|
69
78
|
judgments_format: The format to write judgments to. Can be either `[json, bigquery]`.
|
|
70
79
|
bigquery_destination_prefix: BigQuery table to write judgments to if the specified format is 'bigquery'.
|
|
71
80
|
experimental_args: Experimentally released arguments. Subject to change.
|
|
81
|
+
encryption_spec_key_name: Customer-managed encryption key options. If this is set, then all resources created by the pipeline will be encrypted with the provided encryption key.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
model_a_evaluation_resource_name: The path to write the ModelEvaluation for Model A to if Model A is a ModelRegistry Model.
|
|
85
|
+
model_b_evaluation_resource_name: The path to write the ModelEvaluation for Model B to if Model B is a ModelRegistry Model.
|
|
86
|
+
evaluation_count: The count of how many evaluations were included for this AutoSxS run.
|
|
87
|
+
evaluation_dataset_path: The path to the overall evaluation dataset including judgments.
|
|
72
88
|
"""
|
|
73
89
|
# fmt: on
|
|
74
90
|
responses = batch_prediction_pairwise.batch_prediction_pairwise(
|
|
@@ -87,6 +103,9 @@ def autosxs_pipeline(
|
|
|
87
103
|
model_b_parameters=model_b_parameters,
|
|
88
104
|
human_preference_column=human_preference_column,
|
|
89
105
|
experimental_args=experimental_args,
|
|
106
|
+
project=project,
|
|
107
|
+
location=location,
|
|
108
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
90
109
|
).set_display_name('AutoSxS Batch Prediction')
|
|
91
110
|
|
|
92
111
|
winners = online_evaluation_pairwise.online_evaluation_pairwise(
|
|
@@ -99,11 +118,34 @@ def autosxs_pipeline(
|
|
|
99
118
|
judgments_format=judgments_format,
|
|
100
119
|
bigquery_destination_prefix=bigquery_destination_prefix,
|
|
101
120
|
experimental_args=experimental_args,
|
|
121
|
+
project=project,
|
|
122
|
+
location=location,
|
|
123
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
102
124
|
).set_display_name('AutoSxS Autorater')
|
|
103
125
|
|
|
104
|
-
model_evaluation_text_generation_pairwise.model_evaluation_text_generation_pairwise(
|
|
126
|
+
metrics = model_evaluation_text_generation_pairwise.model_evaluation_text_generation_pairwise(
|
|
105
127
|
judgments_dir=winners.outputs['judgments_uri'],
|
|
106
128
|
human_preference_column=human_preference_column,
|
|
129
|
+
project=project,
|
|
130
|
+
location=location,
|
|
131
|
+
encryption_spec_key_name=encryption_spec_key_name,
|
|
132
|
+
model_a=model_a,
|
|
133
|
+
model_b=model_b,
|
|
134
|
+
evaluation_dataset=evaluation_dataset,
|
|
135
|
+
evaluation_dataset_metadata=winners.outputs['metadata'],
|
|
136
|
+
task=task,
|
|
107
137
|
).set_display_name(
|
|
108
138
|
'AutoSxS Metrics'
|
|
109
139
|
)
|
|
140
|
+
|
|
141
|
+
return PipelineOutput(
|
|
142
|
+
model_a_evaluation_resource_name=metrics.outputs[
|
|
143
|
+
'model_a_evaluation_path'
|
|
144
|
+
],
|
|
145
|
+
model_b_evaluation_resource_name=metrics.outputs[
|
|
146
|
+
'model_b_evaluation_path'
|
|
147
|
+
],
|
|
148
|
+
evaluation_count=metrics.outputs['evaluation_count_path'],
|
|
149
|
+
# Needs to be a component output
|
|
150
|
+
evaluation_dataset_path=metrics.outputs['evaluation_dataset_path'],
|
|
151
|
+
)
|
|
@@ -12,16 +12,17 @@ _sym_db = _symbol_database.Default()
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
|
15
|
-
b'\n\x13preflight_validations.proto\x12\x15preflight_validations"\
|
|
16
|
-
b' \
|
|
17
|
-
b' \
|
|
18
|
-
b' \
|
|
19
|
-
b' \x01(\t\
|
|
20
|
-
b' \
|
|
21
|
-
b' \x01(\
|
|
22
|
-
b' \x01(\
|
|
23
|
-
b' \x01(\t\x12\x13\n\x0bpermissions\x18\
|
|
24
|
-
b'
|
|
15
|
+
b'\n\x13preflight_validations.proto\x12\x15preflight_validations"\x8e\x02\n\x0fValidationItems\x12R\n\x0esa_validations\x18\x01'
|
|
16
|
+
b' \x03(\x0b\x32:.preflight_validations.GoogleCloudServiceAccountValidation\x12S\n\x11quota_validations\x18\x02'
|
|
17
|
+
b' \x03(\x0b\x32\x38.preflight_validations.GoogleCloudProjectQuotaValidation\x12R\n\x0f\x61pi_validations\x18\x03'
|
|
18
|
+
b' \x03(\x0b\x32\x39.preflight_validations.GoogleCloudApiEnablementValidation"p\n!GoogleCloudProjectQuotaValidation\x12\x13\n\x0bmetric_name\x18\x01'
|
|
19
|
+
b' \x01(\t\x12\x15\n\x0bint64_value\x18\x02'
|
|
20
|
+
b' \x01(\x03H\x00\x12\x16\n\x0c\x64ouble_value\x18\x03'
|
|
21
|
+
b' \x01(\x01H\x00\x42\x07\n\x05value"\x8d\x01\n#GoogleCloudServiceAccountValidation\x12\x1f\n\x17\x64\x65\x66\x61ult_principal_email\x18\x01'
|
|
22
|
+
b' \x01(\t\x12\x1c\n\x14override_placeholder\x18\x02'
|
|
23
|
+
b' \x01(\t\x12\x13\n\x0bpermissions\x18\x03'
|
|
24
|
+
b' \x03(\t\x12\x12\n\nrole_names\x18\x04'
|
|
25
|
+
b' \x03(\t";\n"GoogleCloudApiEnablementValidation\x12\x15\n\rservice_names\x18\x01'
|
|
25
26
|
b' \x03(\tB\x02P\x01\x62\x06proto3'
|
|
26
27
|
)
|
|
27
28
|
|
|
@@ -35,24 +36,12 @@ _builder.BuildTopDescriptorsAndMessages(
|
|
|
35
36
|
if not _descriptor._USE_C_DESCRIPTORS:
|
|
36
37
|
_globals['DESCRIPTOR']._loaded_options = None
|
|
37
38
|
_globals['DESCRIPTOR']._serialized_options = b'P\001'
|
|
38
|
-
_globals[
|
|
39
|
-
|
|
40
|
-
].
|
|
41
|
-
_globals[
|
|
42
|
-
|
|
43
|
-
].
|
|
44
|
-
_globals['
|
|
45
|
-
_globals['
|
|
46
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAMETADATA']._serialized_start = 417
|
|
47
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAMETADATA']._serialized_end = 652
|
|
48
|
-
_globals[
|
|
49
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
50
|
-
]._serialized_start = 591
|
|
51
|
-
_globals[
|
|
52
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
53
|
-
]._serialized_end = 652
|
|
54
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTMETADATA']._serialized_start = 654
|
|
55
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTMETADATA']._serialized_end = 734
|
|
56
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTMETADATA']._serialized_start = 736
|
|
57
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTMETADATA']._serialized_end = 793
|
|
39
|
+
_globals['_VALIDATIONITEMS']._serialized_start = 142
|
|
40
|
+
_globals['_VALIDATIONITEMS']._serialized_end = 412
|
|
41
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_start = 414
|
|
42
|
+
_globals['_GOOGLECLOUDPROJECTQUOTAVALIDATION']._serialized_end = 526
|
|
43
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_start = 529
|
|
44
|
+
_globals['_GOOGLECLOUDSERVICEACCOUNTVALIDATION']._serialized_end = 670
|
|
45
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_start = 672
|
|
46
|
+
_globals['_GOOGLECLOUDAPIENABLEMENTVALIDATION']._serialized_end = 731
|
|
58
47
|
# @@protoc_insertion_point(module_scope)
|