PyPI - google-cloud-pipeline-components - Versions diffs - 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl - Mend

google-cloud-pipeline-components 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (43) hide show

google_cloud_pipeline_components/_implementation/llm/batch_prediction_pairwise.py CHANGED Viewed

@@ -51,6 +51,9 @@ def batch_prediction_pairwise(
     model_b_parameters: Dict[str, str] = {},
     human_preference_column: str = '',
     experimental_args: Dict[str, Any] = {},
+    project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
+    location: str = _placeholders.LOCATION_PLACEHOLDER,
+    encryption_spec_key_name: str = '',
 ) -> dsl.ContainerSpec:  # pylint: disable=g-doc-args
   """Runs up to two LLM Batch Prediction jobs side-by-side.
@@ -83,6 +86,11 @@ def batch_prediction_pairwise(
     human_preference_column: The column containing ground truths. The default
       value is an empty string if not be provided by users.
     experimental_args: Experimentally released arguments. Subject to change.
+    project: Project used to run batch prediction jobs.
+    location: Location used to run batch prediction jobs.
+    encryption_spec_key_name: Customer-managed encryption key options. If this
+      is set, then all resources created by the component will be encrypted with
+      the provided encryption key.
   Returns:
     preprocessed_evaluation_dataset: Dataset of the table containing the inputs
@@ -94,8 +102,8 @@ def batch_prediction_pairwise(
       metadata for the task preprocess component.
   """
   return gcpc_utils.build_serverless_customjob_container_spec(
-      project=_placeholders.PROJECT_ID_PLACEHOLDER,
-      location=_placeholders.LOCATION_PLACEHOLDER,
+      project=project,
+      location=location,
       custom_job_payload=utils.build_payload(
           display_name='batch_prediction_pairwise',
           machine_type='n1-standard-4',
@@ -110,8 +118,8 @@ def batch_prediction_pairwise(
                   "{{$.inputs.parameters['id_columns'].json_escape[0]}}"
               ),
               f'--task={task}',
-              f'--project={_placeholders.PROJECT_ID_PLACEHOLDER}',
-              f'--location={_placeholders.LOCATION_PLACEHOLDER}',
+              f'--project={project}',
+              f'--location={location}',
               f'--model_a={model_a}',
               f'--model_b={model_b}',
               (
@@ -147,9 +155,11 @@ def batch_prediction_pairwise(
               f'--staging_dir={dsl.PIPELINE_ROOT_PLACEHOLDER}',
               f'--preprocessed_evaluation_dataset_uri={preprocessed_evaluation_dataset_uri}',
               f'--metadata_path={metadata}',
+              f'--kms_key_name={encryption_spec_key_name}',
               f'--gcp_resources_path={gcp_resources}',
               '--executor_input={{$.json_escape[1]}}',
           ],
+          encryption_spec_key_name=encryption_spec_key_name,
       ),
       gcp_resources=gcp_resources,
   )

google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py CHANGED Viewed

@@ -37,6 +37,7 @@ def bulk_inferrer(
     output_prediction_gcs_path: kfp.dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     gcp_resources: kfp.dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     sampling_strategy: str = 'greedy',
+    encryption_spec_key_name: str = '',
 ) -> kfp.dsl.ContainerSpec:  # pylint: disable=g-doc-args
   """Performs bulk inference.
@@ -56,6 +57,10 @@ def bulk_inferrer(
     input_dataset_path: Path to dataset to use for inference.
     sampling_strategy: The sampling strategy for inference.
     dataset_split: Perform inference on this split of the input dataset.
+    encryption_spec_key_name: Customer-managed encryption key. If this is set,
+      then all resources created by the CustomJob will be encrypted with the
+      provided encryption key. Note that this is not supported for TPU at the
+      moment.
   Returns:
     output_prediction: Where to save the output prediction.
@@ -72,6 +77,7 @@ def bulk_inferrer(
           machine_type=machine_type,
           image_uri=image_uri,
           args=[
+              '--app_name=bulk_inferrer',
               f'--input_model={input_model}',
               f'--input_dataset={input_dataset_path}',
               f'--dataset_split={dataset_split}',
@@ -82,6 +88,7 @@ def bulk_inferrer(
               f'--output_prediction={output_prediction}',
               f'--output_prediction_gcs_path={output_prediction_gcs_path}',
           ],
+          encryption_spec_key_name=encryption_spec_key_name,
       ),
       gcp_resources=gcp_resources,
   )

google_cloud_pipeline_components/_implementation/llm/deployment_graph.py CHANGED Viewed

@@ -36,6 +36,8 @@ def pipeline(
     large_model_reference: str,
     model_display_name: Optional[str] = None,
     deploy_model: bool = True,
+    encryption_spec_key_name: str = '',
+    upload_location: str = _placeholders.LOCATION_PLACEHOLDER,
 ) -> PipelineOutput:
   # fmt: off
   """Uploads a tuned language model and (optionally) deploys it to an endpoint.
@@ -45,13 +47,14 @@ def pipeline(
     large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`.
     model_display_name: Name of the fine-tuned model shown in the Model Registry. If not provided, a default name will be created.
     deploy_model: Whether to deploy the model to an endpoint in `us-central1`. Default is True.
+    encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
+    upload_location: Region to upload and deploy the model to. Default is the location used to run the pipeline components.
   Returns:
     model_resource_name: Path to the model uploaded to the Model Registry. This will be an empty string if the model was not deployed.
     endpoint_resource_name: Path the Online Prediction Endpoint. This will be an empty string if the model was not deployed.
   """
   # fmt: on
-  upload_location = 'us-central1'
   adapter_artifact = kfp.dsl.importer(
       artifact_uri=output_adapter_path,
       artifact_class=kfp.dsl.Artifact,
@@ -87,6 +90,7 @@ def pipeline(
       model_display_name=display_name.output,
       model_reference_name=large_model_reference,
       upload_model=upload_model.output,
+      encryption_spec_key_name=encryption_spec_key_name,
       tune_type='rlhf',
   ).set_display_name('Upload Model')
   deploy_model = function_based.resolve_deploy_model(
@@ -102,6 +106,7 @@ def pipeline(
       display_name=display_name.output,
       regional_endpoint=regional_endpoint.output,
       deploy_model=deploy_model.output,
+      encryption_spec_key_name=encryption_spec_key_name,
   ).set_display_name('Deploy Model')
   return PipelineOutput(
       model_resource_name=upload_task.outputs['model_resource_name'],

google_cloud_pipeline_components/_implementation/llm/function_based.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 """Python function-based components used in KFP pipelies."""
 import functools
-from typing import Any, Dict, List, NamedTuple, Optional
+from typing import List, NamedTuple, Optional
 from google_cloud_pipeline_components import _image
 from google_cloud_pipeline_components._implementation.llm import env
@@ -22,19 +22,27 @@ from kfp import dsl
 @dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
 def resolve_machine_spec(
-    location: str,
+    accelerator_type: str = 'GPU',
     use_test_spec: bool = False,
 ) -> NamedTuple(
-    'MachineSpec', machine_type=str, accelerator_type=str, accelerator_count=int
+    'MachineSpec',
+    machine_type=str,
+    tuning_location=str,
+    accelerator_type=str,
+    accelerator_count=int,
 ):
-  """Returns machine spec to use for a given location.
+  """Returns machine spec to use for a given accelerator_type.
   Args:
-    location: Where the machine will run.
-    use_test_spec: Whether to use a lower resource machine for testing.
+    accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning
+      components run in europe-west4. Otherwise tuning components run in
+      us-central1 on GPUs. Default is 'GPU'.
+    use_test_spec: Whether to use a lower resource machine for testing. If True,
+      a machine with the specified `accelerator_type` is provisioned.
   Returns:
     Machine spec.
+    tuning_location: Where the machine will run.
   Raises:
     ValueError: If accelerators are requested in an unsupported location.
@@ -42,57 +50,78 @@ def resolve_machine_spec(
   outputs = NamedTuple(
       'MachineSpec',
       machine_type=str,
-      accelerator_type=str,
       accelerator_count=int,
+      tuning_location=str,
+      accelerator_type=str,
   )
-  tpu_regions = {'europe-west4'}
-  gpu_regions = {'us-central1'}
   if use_test_spec:
-    return outputs(
-        machine_type='a2-highgpu-1g',
-        accelerator_type='NVIDIA_TESLA_A100',
-        accelerator_count=1,
-    )
-  elif location in tpu_regions:
+    if accelerator_type == 'TPU':
+      return outputs(
+          machine_type='cloud-tpu',
+          accelerator_type='TPU_V3',
+          accelerator_count=32,
+          tuning_location='europe-west4',
+      )
+    elif accelerator_type == 'GPU':
+      return outputs(
+          machine_type='a2-highgpu-1g',
+          accelerator_type='NVIDIA_TESLA_A100',
+          accelerator_count=1,
+          tuning_location='us-central1',
+      )
+    elif accelerator_type == 'CPU':
+      return outputs(
+          machine_type='e2-standard-16',
+          accelerator_type='ACCELERATOR_TYPE_UNSPECIFIED',
+          accelerator_count=0,
+          tuning_location='us-central1',
+      )
+    else:
+      raise ValueError(
+          f'Unsupported test accelerator_type {accelerator_type}. Must be one '
+          'of TPU, GPU or CPU.'
+      )
+  if accelerator_type == 'TPU':
     return outputs(
         machine_type='cloud-tpu',
         accelerator_type='TPU_V3',
         accelerator_count=64,
+        tuning_location='europe-west4',
     )
-  elif location in gpu_regions:
+  elif accelerator_type == 'GPU':
     return outputs(
         machine_type='a2-ultragpu-8g',
         accelerator_type='NVIDIA_A100_80GB',
         accelerator_count=8,
+        tuning_location='us-central1',
+    )
+  else:
+    raise ValueError(
+        f'Unsupported accelerator_type {accelerator_type}. Must be one of'
+        'TPU or GPU.'
     )
-  raise ValueError(
-      f'Unsupported accelerator location {location}. Must be one of'
-      f' {tpu_regions | gpu_regions}.'
-  )
 @dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def resolve_image_uri(
-    image_name: str,
+def resolve_refined_image_uri(
     project: str,
     location: str,
     artifact_registry: str,
-    image_name_prefix: str,
     tag: str,
     accelerator_type: str = '',
-    accelerator_count: int = 0,
+    use_experimental_image: bool = False,
 ) -> str:
   """Generates image uri based on base image name and accelerator type.
   Args:
-    image_name: Base image name, e.g. ``'sft'`` or ``'reward_model'``.
     project: Project that contains the artifact registry.
     location: Region that contains the artifact registry.
     artifact_registry: Registry that contains Docker images.
-    image_name_prefix: Text to prepend to the base image name.
     tag: Image tag.
     accelerator_type: One of the supported accelerator types, e.g. ``'TPU_V3'``.
-    accelerator_count: Number of accelerators.
+    use_experimental_image: Whether to use refined experimental image. Default
+      is False.
   Returns:
     Docker image uri
@@ -100,61 +129,36 @@ def resolve_image_uri(
   Raises:
     ValueError: if an unsupported accelerator type is provided.
   """
-  cpu_only_images = {
-      'text_importer',
-      'text_comparison_importer',
-  }
-  if image_name in cpu_only_images:
-    accelerator_postfix = ''
-  elif accelerator_type == 'TPU_V3':
-    accelerator_postfix = '_tpu'
-  elif accelerator_type == 'NVIDIA_A100_80GB' and accelerator_count == 8:
-    accelerator_postfix = '_gpu_test'
+  if not accelerator_type or accelerator_type == 'ACCELERATOR_TYPE_UNSPECIFIED':
+    accelerator_postfix = 'cpu'
+  elif 'TPU' in accelerator_type:
+    accelerator_postfix = 'tpu'
+  elif 'A100' in accelerator_type:
+    accelerator_postfix = 'gpu'
   else:
-    accelerator_postfix = '_gpu'
-  backup_images = {
-      'sft',
-      'reward_model',
-      'reinforcer',
-      'infer',
-      'text_importer',
-      'text_comparison_importer',
-  }
-  if image_name in backup_images and accelerator_postfix != '_gpu_test':
-    accelerator_postfix += '_backup'
-  return f'{location}-docker.pkg.dev/{project}/{artifact_registry}/{image_name_prefix}{image_name}{accelerator_postfix}:{tag}'
+    raise ValueError(
+        f'Unsupported accelerator type {accelerator_type}. Must a TPU, an A100'
+        'variant or empty if using a CPU-only machine.'
+    )
+  image_name_prefix = 'refined_'
+  if use_experimental_image:
+    image_name_prefix += 'experimental_'
+  return f'{location}-docker.pkg.dev/{project}/{artifact_registry}/{image_name_prefix}{accelerator_postfix}:{tag}'
 # Resolves image uri from the environment's private artifact registry.
 # By default this resolves an image in the vertex private registry.
-resolve_private_image_uri = functools.partial(
-    resolve_image_uri,
+resolve_private_refined_image_uri = functools.partial(
+    resolve_refined_image_uri,
     project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
     location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
     artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
-    image_name_prefix=env.PRIVATE_IMAGE_NAME_PREFIX,
     tag=env.get_private_image_tag(),
 )
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def resolve_data_paths(
-    input_dataset: str,
-) -> NamedTuple('DataPaths', tfds_data_dir=str, tfds_name=str):
-  """Resolves dataset paths needed by downstream components."""
-  # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  import os
-  # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  outputs = NamedTuple('DataPaths', tfds_data_dir=str, tfds_name=str)
-  tfds_data_dir, tfds_name = os.path.split(input_dataset)
-  return outputs(
-      tfds_data_dir=tfds_data_dir,
-      tfds_name=tfds_name,
-  )
 @dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
 def resolve_reference_model_metadata(
     large_model_reference: str,
@@ -461,14 +465,6 @@ def value_exists(value: Optional[str] = None) -> bool:
   return True
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def resolve_candidate_columns(
-    candidate_columns: Optional[List[str]] = None,
-) -> List[str]:
-  """Returns candidate columns provided by the user or the default: ['candidate_0', 'candidate_1']."""
-  return candidate_columns or ['candidate_0', 'candidate_1']
 @dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
 def resolve_upload_model(large_model_reference: str) -> bool:
   """Returns whether the model should be uploaded."""
@@ -510,93 +506,3 @@ def resolve_num_microbatches(large_model_reference: str) -> int:
   if 'llama' in large_model_reference.lower():
     return 2
   return 0
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def read_file(path: str) -> str:
-  """Reads the contents of the given file."""
-  # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  import re
-  # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  path = re.sub('^gs://', '/gcs/', path)
-  with open(path, 'r') as f:
-    return f.read()
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def get_usage_metric(metadata: Dict[str, Any], key: str) -> bool:  # pytype: disable=unsupported-operands
-  """Extracts a single usage metric from metadata."""
-  return metadata[key]
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def dump_dict(value: Dict[Any, Any]) -> str:
-  """Dumps the given dict to a JSON string."""
-  # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  import json
-  # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  return json.dumps(value).replace('"', '\\"')
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def dump_list(value: List[Any]) -> str:
-  """Dumps the given dict to a JSON string."""
-  # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  import json
-  # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  return json.dumps(value).replace('"', '\\"')
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def identity(
-    x: str,
-) -> str:
-  return x
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def get_uri(artifact: dsl.Input[dsl.Artifact], is_dir: bool = False) -> str:  # pytype: disable=unsupported-operands
-  """Extracts the URI from an artifact."""
-  # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  import os
-  # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported
-  if is_dir:
-    return os.path.join(artifact.uri, '*')
-  return artifact.uri
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def get_empty_string() -> str:
-  return ''
-@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
-def validate_rlhf_inputs(
-    large_model_reference: str,
-    eval_dataset: Optional[str] = None,
-) -> None:
-  """Checks user-provided arguments are valid for the RLHF pipeline."""
-  models_that_support_bulk_inference = {
-      't5-small',
-      't5-large',
-      't5-xl',
-      't5-xxl',
-      'llama-2-7b',
-      'llama-2-7b-chat',
-      'llama-2-13b',
-      'llama-2-13b-chat',
-  }
-  if (
-      eval_dataset
-      and large_model_reference not in models_that_support_bulk_inference
-  ):
-    raise ValueError(
-        f'eval_dataset not supported for {large_model_reference}. '
-        'Please set this value to None when tuning this model. '
-        'This model can be evaluated after tuning using Batch or Online '
-        'Prediction.'
-    )

google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py CHANGED Viewed

@@ -17,4 +17,4 @@
 DO NOT EDIT - This file is generated, manual changes will be overridden.
 """
-IMAGE_TAG = '20240220_2307_RC00'
+IMAGE_TAG = '20240327_1338'

google_cloud_pipeline_components/_implementation/llm/model_evaluation_text_generation_pairwise.py CHANGED Viewed

@@ -33,22 +33,52 @@ def model_evaluation_text_generation_pairwise(
     judgments_dir: str,
     autosxs_metrics: dsl.Output[dsl.Metrics],  # pylint: disable=unused-argument # pytype: disable=unsupported-operands
     gcp_resources: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    model_a_evaluation_path: dsl.OutputPath(str),  # pylint: disable=unused-argument # pytype: disable=unsupported-operands
+    model_b_evaluation_path: dsl.OutputPath(str),  # pylint: disable=unused-argument # pytype: disable=unsupported-operands
+    evaluation_count_path: dsl.OutputPath(int),  # pylint: disable=unused-argument # pytype: disable=unsupported-operands
+    evaluation_dataset_path: dsl.OutputPath(str),  # pylint: disable=unused-argument # pytype: disable=unsupported-operands
     human_preference_column: str = '',
+    project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
+    location: str = _placeholders.LOCATION_PLACEHOLDER,
+    encryption_spec_key_name: str = '',
+    model_a: str = '',
+    model_b: str = '',
+    evaluation_dataset: str = '',
+    evaluation_dataset_metadata: str = '',  # pylint: disable=unused-argument
+    task: str = '',
 ) -> dsl.ContainerSpec:  # pylint: disable=g-doc-args
   """Compute AutoSXS metrics using judgments outputs from Arbiter.
   Args:
-    judgments_dir: Path where store the Judgments.
+    judgments_dir: Path to store the Judgments.
     human_preference_column: The column containing ground truths. The default
       value is an empty string if not be provided by users.
+    project: Project to upload evaluation metrics to.
+    location: Location to upload evaluation metrics to.
+    encryption_spec_key_name: Customer-managed encryption key options. If this
+      is set, then all resources created by the component will be encrypted with
+      the provided encryption key.
+    model_a: Resource path for Model A.
+    model_b: Resource path for Model B.
+    evaluation_dataset: Path to the evaluation dataset.
+    evaluation_dataset_metadata: AutoSxS metrics metadata json string.
+    task: Task that was used for this AutoSxS run.
   Returns:
     autosxs_metrics: Autosxs win rate metrics and human alignment metrics.
     gcp_resources: Tracker for GCP resources created by this component.
+    model_a_evaluation_path: Path to write the ModelEvaluation for Model A if it
+    is a
+      ModelRegistry model.
+    model_b_evaluation: Path to write the ModelEvaluation for Model B if it is a
+      ModelRegistry model.
+    evaluation_count: Path to write the EvaluationCount number to.
+    evaluation_dataset_path: Path to write the path to the evaluation dataset.
+      This is needed because Pipeline outputs must be component outputs.
   """
   return gcpc_utils.build_serverless_customjob_container_spec(
-      project=_placeholders.PROJECT_ID_PLACEHOLDER,
-      location=_placeholders.LOCATION_PLACEHOLDER,
+      project=project,
+      location=location,
       custom_job_payload=utils.build_payload(
           display_name='model_evaluation_text_generation_pairwise',
           machine_type='n1-standard-4',
@@ -58,8 +88,20 @@ def model_evaluation_text_generation_pairwise(
               'autosxs_metrics',
               f'--judgments_dir={judgments_dir}',
               f'--human_preference_column={human_preference_column}',
+              f'--project={project}',
+              f'--location={location}',
               '--executor_input={{$.json_escape[1]}}',
+              f'--model_a={model_a}',
+              f'--model_b={model_b}',
+              f'--model_a_evaluation_path={model_a_evaluation_path}',
+              f'--model_b_evaluation_path={model_b_evaluation_path}',
+              f'--evaluation_count_path={evaluation_count_path}',
+              f'--evaluation_dataset_path={evaluation_dataset_path}',
+              f'--evaluation_dataset={evaluation_dataset}',
+              "--evaluation_dataset_metadata={{$.inputs.parameters['evaluation_dataset_metadata'].json_escape[0]}}",
+              f'--task={task}',
           ],
+          encryption_spec_key_name=encryption_spec_key_name,
       ),
       gcp_resources=gcp_resources,
   )

google_cloud_pipeline_components/_implementation/llm/online_evaluation_pairwise.py CHANGED Viewed

@@ -49,6 +49,9 @@ def online_evaluation_pairwise(
     judgments_format: str = 'jsonl',
     bigquery_destination_prefix: str = '',
     experimental_args: Dict[str, Any] = {},
+    project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
+    location: str = _placeholders.LOCATION_PLACEHOLDER,
+    encryption_spec_key_name: str = '',
 ) -> dsl.ContainerSpec:  # pylint: disable=g-doc-args
   """Evaluate two models using an autorater.
@@ -65,6 +68,11 @@ def online_evaluation_pairwise(
     bigquery_destination_prefix: BigQuery table to write judgments to if the
       specified format is 'bigquery'.
     experimental_args: Experimentally released arguments. Subject to change.
+    project: Project used to make autorater predictions.
+    location: Location used to make autorater predictions.
+    encryption_spec_key_name: Customer-managed encryption key options. If this
+      is set, then all resources created by the component will be encrypted with
+      the provided encryption key.
   Returns:
     judgments: Individual judgments used to calculate the win rates.
@@ -74,8 +82,8 @@ def online_evaluation_pairwise(
     metadata: Computed runtime metrics metadata from this component.
   """
   return gcpc_utils.build_serverless_customjob_container_spec(
-      project=_placeholders.PROJECT_ID_PLACEHOLDER,
-      location=_placeholders.LOCATION_PLACEHOLDER,
+      project=project,
+      location=location,
       custom_job_payload=utils.build_payload(
           display_name='online_evaluation_pairwise',
           machine_type='n1-standard-4',
@@ -86,6 +94,8 @@ def online_evaluation_pairwise(
               f'--inference_output_uri={inference_output_uri}',
               f'--human_preference_column={human_preference_column}',
               f'--task={task}',
+              f'--project={project}',
+              f'--location={location}',
               f'--prediction_endpoint_overrides={_get_prediction_endpoint_overrides()}',
               f'--output_dir={dsl.PIPELINE_ROOT_PLACEHOLDER}',
               f'--judgments_uri={judgments_uri}',
@@ -100,8 +110,10 @@ def online_evaluation_pairwise(
                   "{{$.inputs.parameters['experimental_args'].json_escape[0]}}"
               ),
               '--executor_input={{$.json_escape[1]}}',
+              f'--kms_key_name={encryption_spec_key_name}',
               f'--metadata_path={metadata}',
           ],
+          encryption_spec_key_name=encryption_spec_key_name,
       ),
       gcp_resources=gcp_resources,
   )

google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py CHANGED Viewed

@@ -28,11 +28,12 @@ def private_text_comparison_importer(
     choice_field_name: str,
     split: str,
     large_model_reference: str,
-    image_uri: str,
     output_dataset_path: kfp.dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     gcp_resources: kfp.dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    image_uri: str = utils.get_default_image_uri('refined_cpu', ''),
     machine_type: str = 'e2-highmem-8',
     instruction: str = '',
+    encryption_spec_key_name: str = '',
 ) -> kfp.dsl.ContainerSpec:  # pylint: disable=g-doc-args
   """Import a text dataset.
@@ -52,8 +53,12 @@ def private_text_comparison_importer(
       this component tokenizes and then caches the tokenized tasks.
     machine_type: The type of the machine to provision for the custom job.
     instruction: Optional instruction to prepend to inputs field.
-    image_uri: Location of the text comparison importer image.
+    image_uri: Optional location of the text comparison importer image.
     dataflow_worker_image_uri: Location of the Dataflow worker image.
+    encryption_spec_key_name: Customer-managed encryption key. If this is set,
+      then all resources created by the CustomJob will be encrypted with the
+      provided encryption key. Note that this is not supported for TPU at the
+      moment.
   Returns:
     output_dataset_path: Path to cached SeqIO task created from input dataset.
@@ -67,6 +72,7 @@ def private_text_comparison_importer(
           machine_type=machine_type,
           image_uri=image_uri,
           args=[
+              '--app_name=text_comparison_importer',
               f'--input_text={input_text}',
               f'--inputs_field_name={inputs_field_name}',
               f'--comma_separated_candidates_field_names={comma_separated_candidates_field_names}',
@@ -81,6 +87,7 @@ def private_text_comparison_importer(
                   f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}'
               ),
           ],
+          encryption_spec_key_name=encryption_spec_key_name,
       ),
       gcp_resources=gcp_resources,
   )

google-cloud-pipeline-components 2.10.0__py3-none-any.whl → 2.12.0__py3-none-any.whl

Potentially problematic release.

google-cloud-pipeline-components 2.10.0py3-none-any.whl → 2.12.0py3-none-any.whl