google-cloud-pipeline-components 2.19.0__py3-none-any.whl → 2.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of google-cloud-pipeline-components might be problematic. Click here for more details.
- google_cloud_pipeline_components/_implementation/starry_net/get_training_artifacts/component.py +2 -2
- google_cloud_pipeline_components/_implementation/starry_net/set_test_set/component.py +1 -1
- google_cloud_pipeline_components/_implementation/starry_net/upload_decomposition_plots/component.py +7 -4
- google_cloud_pipeline_components/_implementation/starry_net/version.py +3 -3
- google_cloud_pipeline_components/proto/README.md +49 -0
- google_cloud_pipeline_components/proto/gcp_resources.proto +25 -0
- google_cloud_pipeline_components/proto/task_error.proto +11 -0
- google_cloud_pipeline_components/proto/template_metadata.proto +323 -0
- google_cloud_pipeline_components/version.py +1 -1
- {google_cloud_pipeline_components-2.19.0.dist-info → google_cloud_pipeline_components-2.20.0.dist-info}/METADATA +18 -21
- {google_cloud_pipeline_components-2.19.0.dist-info → google_cloud_pipeline_components-2.20.0.dist-info}/RECORD +14 -18
- {google_cloud_pipeline_components-2.19.0.dist-info → google_cloud_pipeline_components-2.20.0.dist-info}/WHEEL +1 -1
- google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/__init__.py +0 -14
- google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +0 -208
- google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +0 -180
- google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +0 -178
- google_cloud_pipeline_components/preview/model_evaluation/model_based_llm_evaluation/__init__.py +0 -20
- google_cloud_pipeline_components/preview/model_evaluation/model_based_llm_evaluation/autosxs/__init__.py +0 -13
- google_cloud_pipeline_components/preview/model_evaluation/model_based_llm_evaluation/autosxs/autosxs_pipeline.py +0 -109
- google_cloud_pipeline_components/proto/preflight_validations_pb2.py +0 -58
- {google_cloud_pipeline_components-2.19.0.dist-info → google_cloud_pipeline_components-2.20.0.dist-info}/LICENSE +0 -0
- {google_cloud_pipeline_components-2.19.0.dist-info → google_cloud_pipeline_components-2.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
"""Optimization AI Inference and AutoSxS pipeline function."""
|
|
15
|
-
|
|
16
|
-
from typing import Any, Dict, List
|
|
17
|
-
|
|
18
|
-
from google_cloud_pipeline_components import _placeholders
|
|
19
|
-
from google_cloud_pipeline_components._implementation.llm import batch_prediction_pairwise
|
|
20
|
-
from google_cloud_pipeline_components._implementation.llm import model_evaluation_text_generation_pairwise
|
|
21
|
-
from google_cloud_pipeline_components._implementation.llm import online_evaluation_pairwise
|
|
22
|
-
from kfp import dsl
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# pylint: disable=dangerous-default-value,g-bare-generic,unused-argument
|
|
26
|
-
@dsl.pipeline(
|
|
27
|
-
name='autosxs-template',
|
|
28
|
-
description='Determines the SxS winrate between two models.',
|
|
29
|
-
)
|
|
30
|
-
def autosxs_pipeline(
|
|
31
|
-
evaluation_dataset: str,
|
|
32
|
-
task: str,
|
|
33
|
-
id_columns: List[str],
|
|
34
|
-
model_a: str = '',
|
|
35
|
-
model_b: str = '',
|
|
36
|
-
autorater_prompt_parameters: Dict[str, Dict[str, str]] = {},
|
|
37
|
-
model_a_prompt_parameters: Dict[str, Dict[str, str]] = {},
|
|
38
|
-
model_b_prompt_parameters: Dict[str, Dict[str, str]] = {},
|
|
39
|
-
response_column_a: str = '',
|
|
40
|
-
response_column_b: str = '',
|
|
41
|
-
model_a_parameters: Dict[str, str] = {},
|
|
42
|
-
model_b_parameters: Dict[str, str] = {},
|
|
43
|
-
human_preference_column: str = '',
|
|
44
|
-
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
|
45
|
-
location: str = _placeholders.LOCATION_PLACEHOLDER,
|
|
46
|
-
judgments_format: str = 'jsonl',
|
|
47
|
-
bigquery_destination_prefix: str = '',
|
|
48
|
-
experimental_args: Dict[str, Any] = {},
|
|
49
|
-
):
|
|
50
|
-
# fmt: off
|
|
51
|
-
"""Evaluates two models side-by-side using an arbiter model.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
evaluation_dataset: A BigQuery table or comma-separated list of GCS paths to a JSONL dataset containing evaluation examples.
|
|
55
|
-
task: Evaluation task in the form `{task}@{version}`. task can be one of `[summarization, question_answering]`. Version is an integer with 3 digits or "latest". Ex: `summarization@001` or `question_answering@latest`.
|
|
56
|
-
id_columns: The columns which distinguish unique evaluation examples.
|
|
57
|
-
model_a: A fully-qualified model resource name (`projects/{project}/locations/{location}/models/{model}@{version}`) or publisher model resource name (`publishers/{publisher}/models/{model}`). This parameter is optional if Model A responses are specified.
|
|
58
|
-
model_b: A fully-qualified model resource name (`projects/{project}/locations/{location}/models/{model}@{version}`) or publisher model resource name (`publishers/{publisher}/models/{model}`). This parameter is optional if Model B responses are specified.
|
|
59
|
-
autorater_prompt_parameters: Map of autorater prompt parameters to columns or templates. The expected parameters are: `inference_instruction` (details on how to perform a task) and `inference_context` (content to reference to perform the task). As an example, `{'inference_context': {'column': 'my_prompt'}}` uses the evaluation dataset's `my_prompt` column for the AutoRater's context.
|
|
60
|
-
model_a_prompt_parameters: Map of Model A prompt template parameters to columns or templates. This parameter is optional if Model A predictions are predefined. Example - `{'prompt': {'column': 'my_prompt'}}` uses the evaluation dataset's `my_prompt` column for the prompt parameter named `prompt`.
|
|
61
|
-
model_b_prompt_parameters: Map of Model B prompt template parameters to columns or templates. This parameter is optional if Model B predictions are predefined. Example - `{'prompt': {'column': 'my_prompt'}}` uses the evaluation dataset's `my_prompt` column for the prompt parameter named `prompt`.
|
|
62
|
-
response_column_a: Either the name of a column in the evaluation dataset containing predefined predictions, or the name of the column in the Model A output containing predictions. If no value is provided, the correct model output column name will attempt to be inferred.
|
|
63
|
-
response_column_b: Either the name of a column in the evaluation dataset containing predefined predictions, or the name of the column in the Model B output containing predictions. If no value is provided, the correct model output column name will attempt to be inferred.
|
|
64
|
-
model_a_parameters: The parameters that govern the predictions from model A, such as temperature or maximum output tokens.
|
|
65
|
-
model_b_parameters: The parameters that govern the predictions from model B, such as temperature or maximum output tokens.
|
|
66
|
-
human_preference_column: The column containing ground truth winners for each example. Providing this parameter adds additional metrics for checking the AutoRater alignment with human preferences.
|
|
67
|
-
project: Project used to run custom jobs. This should be the same project used to run the pipeline.
|
|
68
|
-
location: Location used to run custom jobs. This should be the same location used to run the pipeline.
|
|
69
|
-
judgments_format: The format to write judgments to. Can be either `[json, bigquery]`.
|
|
70
|
-
bigquery_destination_prefix: BigQuery table to write judgments to if the specified format is 'bigquery'.
|
|
71
|
-
experimental_args: Experimentally released arguments. Subject to change.
|
|
72
|
-
"""
|
|
73
|
-
# fmt: on
|
|
74
|
-
responses = batch_prediction_pairwise.batch_prediction_pairwise(
|
|
75
|
-
display_name='autosxs-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}',
|
|
76
|
-
evaluation_dataset=evaluation_dataset,
|
|
77
|
-
id_columns=id_columns,
|
|
78
|
-
task=task,
|
|
79
|
-
autorater_prompt_parameters=autorater_prompt_parameters,
|
|
80
|
-
response_column_a=response_column_a,
|
|
81
|
-
response_column_b=response_column_b,
|
|
82
|
-
model_a=model_a,
|
|
83
|
-
model_b=model_b,
|
|
84
|
-
model_a_prompt_parameters=model_a_prompt_parameters,
|
|
85
|
-
model_b_prompt_parameters=model_b_prompt_parameters,
|
|
86
|
-
model_a_parameters=model_a_parameters,
|
|
87
|
-
model_b_parameters=model_b_parameters,
|
|
88
|
-
human_preference_column=human_preference_column,
|
|
89
|
-
experimental_args=experimental_args,
|
|
90
|
-
).set_display_name('AutoSxS Batch Prediction')
|
|
91
|
-
|
|
92
|
-
winners = online_evaluation_pairwise.online_evaluation_pairwise(
|
|
93
|
-
inference_output_uri=responses.outputs[
|
|
94
|
-
'preprocessed_evaluation_dataset_uri'
|
|
95
|
-
],
|
|
96
|
-
id_columns=id_columns,
|
|
97
|
-
human_preference_column=human_preference_column,
|
|
98
|
-
task=task,
|
|
99
|
-
judgments_format=judgments_format,
|
|
100
|
-
bigquery_destination_prefix=bigquery_destination_prefix,
|
|
101
|
-
experimental_args=experimental_args,
|
|
102
|
-
).set_display_name('AutoSxS Autorater')
|
|
103
|
-
|
|
104
|
-
model_evaluation_text_generation_pairwise.model_evaluation_text_generation_pairwise(
|
|
105
|
-
judgments_dir=winners.outputs['judgments_uri'],
|
|
106
|
-
human_preference_column=human_preference_column,
|
|
107
|
-
).set_display_name(
|
|
108
|
-
'AutoSxS Metrics'
|
|
109
|
-
)
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
-
# Protobuf Python Version: 0.20240110.0
|
|
4
|
-
"""Generated protocol buffer code."""
|
|
5
|
-
from google.protobuf import descriptor as _descriptor
|
|
6
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
7
|
-
from google.protobuf import symbol_database as _symbol_database
|
|
8
|
-
from google.protobuf.internal import builder as _builder
|
|
9
|
-
# @@protoc_insertion_point(imports)
|
|
10
|
-
|
|
11
|
-
_sym_db = _symbol_database.Default()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
|
15
|
-
b'\n\x13preflight_validations.proto\x12\x15preflight_validations"\x90\x02\n\x0eValidationItem\x12O\n\x0bsa_metadata\x18\x02'
|
|
16
|
-
b' \x01(\x0b\x32\x38.preflight_validations.GoogleCloudServiceAccountMetadataH\x00\x12P\n\x0equota_metadata\x18\x03'
|
|
17
|
-
b' \x01(\x0b\x32\x36.preflight_validations.GoogleCloudProjectQuotaMetadataH\x00\x12O\n\x0c\x61pi_metadata\x18\x04'
|
|
18
|
-
b' \x01(\x0b\x32\x37.preflight_validations.GoogleCloudApiEnablementMetadataH\x00\x42\n\n\x08metadata"\xeb\x01\n\x1fGoogleCloudProjectQuotaMetadata\x12\x14\n\x0cservice_name\x18\x01'
|
|
19
|
-
b' \x01(\t\x12s\n\x17metrics_recommendations\x18\x02'
|
|
20
|
-
b' \x03(\x0b\x32R.preflight_validations.GoogleCloudProjectQuotaMetadata.MetricsRecommendationsEntry\x1a=\n\x1bMetricsRecommendationsEntry\x12\x0b\n\x03key\x18\x01'
|
|
21
|
-
b' \x01(\t\x12\r\n\x05value\x18\x02'
|
|
22
|
-
b' \x01(\x03:\x02\x38\x01"P\n!GoogleCloudServiceAccountMetadata\x12\x16\n\x0eprincipal_name\x18\x01'
|
|
23
|
-
b' \x01(\t\x12\x13\n\x0bpermissions\x18\x02 \x03(\t"9\n'
|
|
24
|
-
b' GoogleCloudApiEnablementMetadata\x12\x15\n\rservice_names\x18\x01'
|
|
25
|
-
b' \x03(\tB\x02P\x01\x62\x06proto3'
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
_globals = globals()
|
|
29
|
-
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
30
|
-
_builder.BuildTopDescriptorsAndMessages(
|
|
31
|
-
DESCRIPTOR,
|
|
32
|
-
'google_cloud_pipeline_components.google_cloud_pipeline_components.proto.preflight_validations_pb2',
|
|
33
|
-
_globals,
|
|
34
|
-
)
|
|
35
|
-
if not _descriptor._USE_C_DESCRIPTORS:
|
|
36
|
-
_globals['DESCRIPTOR']._loaded_options = None
|
|
37
|
-
_globals['DESCRIPTOR']._serialized_options = b'P\001'
|
|
38
|
-
_globals[
|
|
39
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
40
|
-
]._loaded_options = None
|
|
41
|
-
_globals[
|
|
42
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
43
|
-
]._serialized_options = b'8\001'
|
|
44
|
-
_globals['_VALIDATIONITEM']._serialized_start = 142
|
|
45
|
-
_globals['_VALIDATIONITEM']._serialized_end = 414
|
|
46
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAMETADATA']._serialized_start = 417
|
|
47
|
-
_globals['_GOOGLECLOUDPROJECTQUOTAMETADATA']._serialized_end = 652
|
|
48
|
-
_globals[
|
|
49
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
50
|
-
]._serialized_start = 591
|
|
51
|
-
_globals[
|
|
52
|
-
'_GOOGLECLOUDPROJECTQUOTAMETADATA_METRICSRECOMMENDATIONSENTRY'
|
|
53
|
-
]._serialized_end = 652
|
|
54
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTMETADATA']._serialized_start = 654
|
|
55
|
-
_globals['_GOOGLECLOUDSERVICEACCOUNTMETADATA']._serialized_end = 734
|
|
56
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTMETADATA']._serialized_start = 736
|
|
57
|
-
_globals['_GOOGLECLOUDAPIENABLEMENTMETADATA']._serialized_end = 793
|
|
58
|
-
# @@protoc_insertion_point(module_scope)
|
|
File without changes
|