PyPI - google-cloud-pipeline-components - Versions diffs - 2.13.1__py3-none-any.whl → 2.14.1__py3-none-any.whl - Mend

google-cloud-pipeline-components 2.13.1py3-none-any.whl → 2.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of google-cloud-pipeline-components might be problematic. Click here for more details.

Files changed (82) hide show

google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml CHANGED Viewed

@@ -2844,7 +2844,7 @@ deploymentSpec:
           \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
           "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
           {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
-          "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240214_1325", "\",
+          "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20240419_0625", "\",
           \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}",
           "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=",
           "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}'
@@ -2875,7 +2875,7 @@ deploymentSpec:
           \ *\n\ndef _bool_identity(value: bool) -> str:\n  \"\"\"Returns boolean\
           \ value.\n\n  Args:\n    value: Boolean value to return\n\n  Returns:\n\
           \    Boolean value.\n  \"\"\"\n  return 'true' if value else 'false'\n\n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
     exec-feature-transform-engine:
       container:
         args:
@@ -2960,8 +2960,8 @@ deploymentSpec:
           "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}'
         - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}'
         - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}'
-        - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325
-        - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
+        - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
+        - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
         - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}'
         - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}'
         - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}'
@@ -2978,7 +2978,7 @@ deploymentSpec:
         - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
           ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
         - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}'
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
         resources:
           cpuLimit: 8.0
           memoryLimit: 30.0
@@ -3098,10 +3098,10 @@ deploymentSpec:
           \ worker pool specs.\n  \"\"\"\n  import copy\n  import collections\n  import\
           \ os\n  import re\n\n  def get_gcs_path(path):\n    return re.sub(r'/gcs/',\
           \ 'gs://', path)\n\n  formatted_job_dir = get_gcs_path(job_dir)\n  prediction_docker_uri\
-          \ = (\n      'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20240214_1325'\n\
+          \ = (\n      'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20240419_0625'\n\
           \  )\n  master_worker_pool_spec = {\n      'replica_count': 1,\n      'machine_spec':\
           \ {\n          'machine_type': machine_type,\n      },\n      'container_spec':\
-          \ {\n          'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20240214_1325',\n\
+          \ {\n          'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20240419_0625',\n\
           \          'args': [\n              f'--job_dir={formatted_job_dir}',\n\
           \              f'--target_column={target_column}',\n              f'--objective={objective}',\n\
           \              f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\
@@ -3159,7 +3159,7 @@ deploymentSpec:
           \      'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\
           \  }\n  unmanaged_container_model.uri = model_dir\n\n  return collections.namedtuple('Outputs',\
           \ ['worker_pool_specs'])(\n      worker_pool_specs_lst\n  )\n\n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
     exec-get-model-display-name:
       container:
         args:
@@ -3186,7 +3186,7 @@ deploymentSpec:
           \n  return collections.namedtuple(\n      'Outputs',\n      [\n        \
           \  'model_display_name',\n      ],\n  )(\n      model_display_name,\n  )\n\
           \n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
     exec-get-prediction-type-for-xgboost:
       container:
         args:
@@ -3215,7 +3215,7 @@ deploymentSpec:
           \ Must be one of'\n        ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\
           \ reg:gamma,'\n        ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\
           \        ' multi:softprob].'\n    )\n\n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
     exec-model-batch-predict:
       container:
         args:
@@ -3407,7 +3407,7 @@ deploymentSpec:
           \ 'data_source_csv_filenames',\n          'data_source_bigquery_table_path',\n\
           \      ],\n  )(\n      data_source_csv_filenames,\n      data_source_bigquery_table_path,\n\
           \  )\n\n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20240419_0625
     exec-split-materialized-data:
       container:
         args:
@@ -3453,7 +3453,7 @@ deploymentSpec:
           \ 'w') as f:\n    f.write(file_patterns[0])\n\n  with tf.io.gfile.GFile(materialized_eval_split,\
           \ 'w') as f:\n    f.write(file_patterns[1])\n\n  with tf.io.gfile.GFile(materialized_test_split,\
           \ 'w') as f:\n    f.write(file_patterns[2])\n\n"
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20240419_0625
     exec-training-configurator-and-validator:
       container:
         args:
@@ -3498,7 +3498,7 @@ deploymentSpec:
           ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}'
         - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat":
           ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}'
-        image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240214_1325
+        image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20240419_0625
     exec-xgboost-trainer:
       container:
         args:

google_cloud_pipeline_components/preview/automl/vision/data_converter.py CHANGED Viewed

@@ -22,6 +22,7 @@ from google_cloud_pipeline_components.preview.automl.vision.json_utils import Co
 from kfp import dsl
+# pylint: disable=singleton-comparison
 # pylint: disable=g-doc-args
 @dsl.container_component
 def data_converter(
@@ -31,6 +32,7 @@ def data_converter(
     objective: str,
     output_dir: dsl.Output[dsl.Artifact],
     gcp_resources: dsl.OutputPath(str),
+    enable_input_validation: bool = True,
     location: str = 'us-central1',
     timeout: str = '604800s',
     service_account: Optional[str] = None,
@@ -75,7 +77,7 @@ def data_converter(
                   'image_uri': 'us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter',
                   'args': [
                       '--enable_input_validation',
-                      'true',
+                      str(enable_input_validation),
                       '--input_file_path',
                       input_file_path,
                       '--input_file_type',

google_cloud_pipeline_components/preview/custom_job/component.py CHANGED Viewed

@@ -37,7 +37,7 @@ def custom_training_job(
     base_output_directory: str = '',
     labels: Dict[str, str] = {},
     encryption_spec_key_name: str = '',
-    persistent_resource_id: str = '',
+    persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
     project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
 ):
   # fmt: off
@@ -57,7 +57,7 @@ def custom_training_job(
     base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
     labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
     encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key.
-    persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
+    persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
     project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run.
   Returns:
     gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob.

google_cloud_pipeline_components/preview/custom_job/utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import textwrap
 from typing import Callable, Dict, List, Optional
 import warnings
+from google_cloud_pipeline_components import _placeholders
 from google_cloud_pipeline_components.preview.custom_job import component
 from kfp import components
 import yaml
@@ -68,7 +69,7 @@ def create_custom_training_job_from_component(
     nfs_mounts: Optional[List[Dict[str, str]]] = None,
     base_output_directory: str = '',
     labels: Optional[Dict[str, str]] = None,
-    persistent_resource_id: str = '',
+    persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
     env: Optional[List[Dict[str, str]]] = None,
 ) -> Callable:
   # fmt: off
@@ -96,7 +97,7 @@ def create_custom_training_job_from_component(
       nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
       base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
       labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
-      persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
+      persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
       env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
   Returns:

google_cloud_pipeline_components/preview/llm/infer/component.py CHANGED Viewed

@@ -18,7 +18,7 @@ from typing import NamedTuple, Optional
 from google_cloud_pipeline_components import _placeholders
 from google_cloud_pipeline_components._implementation.llm import bulk_inferrer
 from google_cloud_pipeline_components._implementation.llm import env
-from google_cloud_pipeline_components._implementation.llm import function_based
+from google_cloud_pipeline_components._implementation.llm import infer_preprocessor
 from google_cloud_pipeline_components._implementation.llm import preprocess_chat_dataset
 from google_cloud_pipeline_components._implementation.llm import private_text_importer
 import kfp
@@ -66,14 +66,16 @@ def infer_pipeline(
   """
   # fmt: on
   prompt_column = 'input_text'
-  machine_spec = function_based.resolve_machine_spec(
+  preprocess_metadata = infer_preprocessor.infer_preprocessor(
+      large_model_reference=large_model_reference,
       accelerator_type=accelerator_type,
       use_test_spec=env.get_use_test_machine_spec(),
-  ).set_display_name('Resolve Machine Spec')
-  reference_model_metadata = function_based.resolve_reference_model_metadata(
-      large_model_reference=large_model_reference,
-      reference_model_path=model_checkpoint,
-  ).set_display_name('Resolve Model Metadata')
+      project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
+      location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
+      artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
+      tag=env.get_private_image_tag(),
+      instruction=instruction,
+  ).set_display_name('Preprocess Inputs')
   processed_dataset = preprocess_chat_dataset.preprocess_chat_dataset(
       large_model_reference=large_model_reference,
@@ -82,10 +84,6 @@ def infer_pipeline(
       dataset_type='prompt',
   ).set_display_name('Preprocess Dataset')
-  resolved_text_instruction = function_based.resolve_instruction(
-      large_model_reference=large_model_reference,
-      instruction=instruction,
-  ).set_display_name('Resolve Instruction')
   prompt_dataset_importer = (
       private_text_importer.private_text_importer(
           project=project,
@@ -94,35 +92,34 @@ def infer_pipeline(
           inputs_field_name=prompt_column,
           targets_field_name='',  # ignore targets_field_name
           output_split_name=env.TRAIN_SPLIT,
-          large_model_reference=reference_model_metadata.outputs[
-              'large_model_reference'
+          large_model_reference=preprocess_metadata.outputs[
+              'metadata_large_model_reference'
           ],
-          instruction=resolved_text_instruction.output,
+          instruction=preprocess_metadata.outputs['metadata_instruction'],
           encryption_spec_key_name=encryption_spec_key_name,
       )
       .set_display_name('Import Prompt Dataset')
       .set_caching_options(False)
   )
-  bulk_inferrer_image_uri = function_based.resolve_private_refined_image_uri(
-      accelerator_type=machine_spec.outputs['accelerator_type'],
-  ).set_display_name('Resolve Bulk Inferrer Image URI')
   bulk_inference = bulk_inferrer.bulk_inferrer(
       project=project,
-      location=machine_spec.outputs['tuning_location'],
-      input_model=reference_model_metadata.outputs['reference_model_path'],
+      location=preprocess_metadata.outputs['metadata_tuning_location'],
+      input_model=preprocess_metadata.outputs['metadata_reference_model_path'],
       input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'],
       dataset_split=env.TRAIN_SPLIT,
       inputs_sequence_length=prompt_sequence_length,
       targets_sequence_length=target_sequence_length,
-      large_model_reference=reference_model_metadata.outputs[
-          'large_model_reference'
+      large_model_reference=preprocess_metadata.outputs[
+          'metadata_large_model_reference'
       ],
       sampling_strategy=sampling_strategy,
-      accelerator_type=machine_spec.outputs['accelerator_type'],
-      accelerator_count=machine_spec.outputs['accelerator_count'],
-      machine_type=machine_spec.outputs['machine_type'],
-      image_uri=bulk_inferrer_image_uri.output,
+      accelerator_type=preprocess_metadata.outputs['metadata_accelerator_type'],
+      accelerator_count=preprocess_metadata.outputs[
+          'metadata_accelerator_count'
+      ],
+      machine_type=preprocess_metadata.outputs['metadata_machine_type'],
+      image_uri=preprocess_metadata.outputs['metadata_refined_image_uri'],
       encryption_spec_key_name=encryption_spec_key_name,
   ).set_display_name('Bulk Inferrer')

google_cloud_pipeline_components/preview/llm/rlhf/component.py CHANGED Viewed

@@ -21,6 +21,8 @@ from google_cloud_pipeline_components._implementation.llm import env
 from google_cloud_pipeline_components._implementation.llm import function_based
 from google_cloud_pipeline_components._implementation.llm import reinforcement_learning_graph
 from google_cloud_pipeline_components._implementation.llm import reward_model_graph
+from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
+from google_cloud_pipeline_components._implementation.llm import utils
 from google_cloud_pipeline_components._implementation.llm import validate_pipeline
 from google_cloud_pipeline_components.preview.llm.infer import component
 import kfp
@@ -94,11 +96,49 @@ def rlhf_pipeline(
       eval_dataset=eval_dataset,
   ).set_display_name('Validate Inputs')
+  preprocess_metadata = rlhf_preprocessor.rlhf_preprocessor(
+      large_model_reference=large_model_reference,
+      accelerator_type=accelerator_type,
+      use_test_spec=env.get_use_test_machine_spec(),
+      project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
+      location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
+      artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
+      tag=env.get_private_image_tag(),
+      evaluation_dataset=eval_dataset,
+      tensorboard_resource_id=tensorboard_resource_id,
+      upload_location=location,
+      model_display_name=model_display_name,
+      deploy_model=deploy_model,
+  ).set_display_name('Preprocess Inputs')
+  num_microbatches = preprocess_metadata.outputs['metadata_num_microbatches']
   reward_model_pipeline = (
       (
           reward_model_graph.pipeline(
               preference_dataset=preference_dataset,
               large_model_reference=large_model_reference,
+              reward_model_reference=preprocess_metadata.outputs[
+                  'metadata_reward_model_reference'
+              ],
+              reward_model_path=preprocess_metadata.outputs[
+                  'metadata_reward_model_path'
+              ],
+              machine_type=preprocess_metadata.outputs['metadata_machine_type'],
+              tuning_location=preprocess_metadata.outputs[
+                  'metadata_tuning_location'
+              ],
+              accelerator_type=preprocess_metadata.outputs[
+                  'metadata_accelerator_type'
+              ],
+              accelerator_count=preprocess_metadata.outputs[
+                  'metadata_accelerator_count'
+              ],
+              reward_model_image_uri=preprocess_metadata.outputs[
+                  'metadata_refined_image_uri'
+              ],
+              comma_separated_candidates_field_names=preprocess_metadata.outputs[
+                  'metadata_candidate_columns_string'
+              ],
               prompt_sequence_length=prompt_sequence_length,
               target_sequence_length=target_sequence_length,
               eval_dataset=validate_pipeline_task.outputs[
@@ -110,9 +150,9 @@ def rlhf_pipeline(
               lora_dim=reward_lora_dim,
               project=project,
               location=location,
-              accelerator_type=accelerator_type,
               tensorboard_resource_id=tensorboard_resource_id,
               encryption_spec_key_name=encryption_spec_key_name,
+              num_microbatches=num_microbatches,
           )
       )
       .set_display_name('Train Reward Model')
@@ -120,8 +160,8 @@ def rlhf_pipeline(
   )
   rl_model_pipeline = reinforcement_learning_graph.pipeline(
       prompt_dataset=prompt_dataset,
-      input_reward_model_path=reward_model_pipeline.outputs[
-          'reward_model_base_path'
+      input_reward_model_path=preprocess_metadata.outputs[
+          'metadata_reward_model_path'
       ],
       input_reward_adapter_path=reward_model_pipeline.outputs[
           'reward_model_adapter_path'
@@ -130,6 +170,22 @@ def rlhf_pipeline(
           'reward_dataset_path'
       ],
       large_model_reference=large_model_reference,
+      reward_model_reference=preprocess_metadata.outputs[
+          'metadata_reward_model_reference'
+      ],
+      policy_model_reference=preprocess_metadata.outputs[
+          'metadata_large_model_reference'
+      ],
+      policy_model_path=preprocess_metadata.outputs[
+          'metadata_reference_model_path'
+      ],
+      machine_type=preprocess_metadata.outputs['metadata_machine_type'],
+      tuning_location=preprocess_metadata.outputs['metadata_tuning_location'],
+      accelerator_type=preprocess_metadata.outputs['metadata_accelerator_type'],
+      accelerator_count=preprocess_metadata.outputs[
+          'metadata_accelerator_count'
+      ],
+      rl_image_uri=preprocess_metadata.outputs['metadata_refined_image_uri'],
       prompt_sequence_length=prompt_sequence_length,
       target_sequence_length=target_sequence_length,
       reinforcement_learning_rate_multiplier=reinforcement_learning_rate_multiplier,
@@ -138,17 +194,16 @@ def rlhf_pipeline(
       instruction=instruction,
       reward_lora_dim=reward_lora_dim,
       project=project,
-      accelerator_type=accelerator_type,
       location=location,
       tensorboard_resource_id=tensorboard_resource_id,
       encryption_spec_key_name=encryption_spec_key_name,
+      num_microbatches=num_microbatches,
   ).set_display_name('Reinforcement Learning')
-  has_inference_dataset = function_based.value_exists(
-      value=eval_dataset
-  ).set_display_name('Resolve Inference Dataset')
+  has_inference_dataset = preprocess_metadata.outputs['has_inference_dataset']
   with kfp.dsl.Condition(
-      has_inference_dataset.output == True,  # pylint: disable=singleton-comparison
+      has_inference_dataset == True,  # pylint: disable=singleton-comparison
       name='Perform Inference',
   ):
     has_model_checkpoint = function_based.value_exists(
@@ -174,10 +229,17 @@ def rlhf_pipeline(
   llm_model_handler = deployment_graph.pipeline(
       output_adapter_path=rl_model_pipeline.outputs['output_adapter_path'],
       large_model_reference=large_model_reference,
-      model_display_name=model_display_name,
-      deploy_model=deploy_model,
+      policy_model_reference=preprocess_metadata.outputs[
+          'metadata_large_model_reference'
+      ],
+      model_display_name=preprocess_metadata.outputs[
+          'metadata_model_display_name'
+      ],
+      deploy_model=preprocess_metadata.outputs['metadata_deploy_model'],
+      upload_model=preprocess_metadata.outputs['metadata_upload_model'],
       encryption_spec_key_name=encryption_spec_key_name,
       upload_location=location,
+      regional_endpoint=preprocess_metadata.outputs['metadata_upload_location'],
   ).set_display_name('Upload and Deploy Tuned Model')
   return PipelineOutput(

google_cloud_pipeline_components/preview/model_evaluation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2021 The Kubeflow Authors. All Rights Reserved.
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,10 +16,12 @@
 from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
 from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
 from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
-from google_cloud_pipeline_components.preview.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
 from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
+from google_cloud_pipeline_components.preview.model_evaluation.model_evaluation_import_component import model_evaluation_import as ModelImportEvaluationOp
 from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
 from google_cloud_pipeline_components.v1.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
+from google_cloud_pipeline_components.v1.model_evaluation.model_based_llm_evaluation.autosxs.autosxs_pipeline import autosxs_pipeline
 __all__ = [
     'autosxs_pipeline',
@@ -29,4 +31,5 @@ __all__ = [
     'FeatureAttributionGraphComponentOp',
     'DetectModelBiasOp',
     'DetectDataBiasOp',
+    'ModelImportEvaluationOp',
 ]

google_cloud_pipeline_components/preview/model_evaluation/model_evaluation_import_component.py ADDED Viewed

@@ -0,0 +1,209 @@
+# Copyright 2024 The Kubeflow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional
+from google_cloud_pipeline_components import _image
+from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics
+from google_cloud_pipeline_components.types.artifact_types import ForecastingMetrics
+from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics
+from google_cloud_pipeline_components.types.artifact_types import VertexModel
+from kfp import dsl
+from kfp.dsl import Input
+from kfp.dsl import Metrics
+@dsl.container_component
+def model_evaluation_import(
+    model: Input[VertexModel],
+    gcp_resources: dsl.OutputPath(str),
+    evaluation_resource_name: dsl.OutputPath(str),
+    metrics: Optional[Input[Metrics]] = None,
+    row_based_metrics: Optional[Input[Metrics]] = None,
+    problem_type: Optional[str] = None,
+    classification_metrics: Optional[Input[ClassificationMetrics]] = None,
+    forecasting_metrics: Optional[Input[ForecastingMetrics]] = None,
+    regression_metrics: Optional[Input[RegressionMetrics]] = None,
+    text_generation_metrics: Optional[Input[Metrics]] = None,
+    question_answering_metrics: Optional[Input[Metrics]] = None,
+    summarization_metrics: Optional[Input[Metrics]] = None,
+    explanation: Optional[Input[Metrics]] = None,
+    feature_attributions: Optional[Input[Metrics]] = None,
+    embedding_metrics: Optional[Input[Metrics]] = None,
+    display_name: str = "",
+    dataset_path: str = "",
+    dataset_paths: List[str] = [],
+    dataset_type: str = "",
+):
+  # fmt: off
+  """Imports a model evaluation artifact to an existing Vertex model with
+  ModelService.ImportModelEvaluation.
+  For more details, see
+  https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models.evaluations
+  One of the metrics inputs must be provided, metrics & problem_type,
+  classification_metrics, regression_metrics, or forecasting_metrics, text_generation_metrics,
+  question_answering_metrics, summarization_metrics, embedding_metrics.
+  Args:
+    model: Vertex model resource that will be the parent resource of the
+      uploaded evaluation.
+    metrics: Path of metrics generated from an evaluation component.
+    row_based_metrics:
+      Path of row_based_metrics generated from an evaluation component.
+    problem_type: The problem type of the metrics being imported to the
+      VertexModel. `classification`, `regression`, `forecasting`,
+      `text-generation`, `question-answering`, and `summarization` are the
+      currently supported problem types. Must be provided when `metrics` is
+      provided.
+    classification_metrics: google.ClassificationMetrics artifact generated from
+      the ModelEvaluationClassificationOp component.
+    forecasting_metrics: google.ForecastingMetrics artifact generated from
+      the ModelEvaluationForecastingOp component.
+    regression_metrics: google.ClassificationMetrics artifact generated from
+      the ModelEvaluationRegressionOp component.
+    text_generation_metrics: system.Metrics artifact generated from
+      the LLMEvaluationTextGenerationOp component. Subject to change to
+      google.TextGenerationMetrics.
+    question_answering_metrics: system.Metrics artifact generated from
+      the LLMEvaluationTextGenerationOp component. Subject to change to
+      google.QuestionAnsweringMetrics.
+    summarization_metrics: system.Metrics artifact generated from
+      the LLMEvaluationTextGenerationOp component. Subject to change to
+      google.SummarizationMetrics.
+    explanation: Path for model explanation metrics generated from an evaluation
+      component.
+    feature_attributions: The feature attributions metrics artifact generated
+      from the feature attribution component.
+    embedding_metrics: The embedding metrics artifact generated from the
+      embedding retrieval metrics component.
+    display_name: The display name for the uploaded model evaluation resource.
+  """
+  # fmt: on
+  return dsl.ContainerSpec(
+      image=_image.GCPC_IMAGE_TAG,
+      command=[
+          "python3",
+          "-u",
+          "-m",
+          "google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation",
+      ],
+      args=[
+          dsl.IfPresentPlaceholder(
+              input_name="metrics",
+              then=[
+                  "--metrics",
+                  metrics.uri,
+                  "--metrics_explanation",
+                  metrics.metadata["explanation_gcs_path"],
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="row_based_metrics",
+              then=[
+                  "--row_based_metrics",
+                  row_based_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="explanation",
+              then=[
+                  "--explanation",
+                  explanation.metadata["explanation_gcs_path"],
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="classification_metrics",
+              then=[
+                  "--classification_metrics",
+                  classification_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="forecasting_metrics",
+              then=[
+                  "--forecasting_metrics",
+                  forecasting_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="regression_metrics",
+              then=[
+                  "--regression_metrics",
+                  regression_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="text_generation_metrics",
+              then=[
+                  "--text_generation_metrics",
+                  text_generation_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="question_answering_metrics",
+              then=[
+                  "--question_answering_metrics",
+                  question_answering_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="summarization_metrics",
+              then=[
+                  "--summarization_metrics",
+                  summarization_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="feature_attributions",
+              then=[
+                  "--feature_attributions",
+                  feature_attributions.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="embedding_metrics",
+              then=[
+                  "--embedding_metrics",
+                  embedding_metrics.uri,
+              ],
+          ),
+          dsl.IfPresentPlaceholder(
+              input_name="problem_type",
+              then=[
+                  "--problem_type",
+                  problem_type,
+              ],
+          ),
+          "--display_name",
+          display_name,
+          "--dataset_path",
+          dataset_path,
+          "--dataset_paths",
+          dataset_paths,
+          "--dataset_type",
+          dataset_type,
+          "--pipeline_job_id",
+          dsl.PIPELINE_JOB_ID_PLACEHOLDER,
+          "--pipeline_job_resource_name",
+          dsl.PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER,
+          "--model_name",
+          model.metadata["resourceName"],
+          "--gcp_resources",
+          gcp_resources,
+          "--evaluation_resource_name",
+          evaluation_resource_name,
+      ],
+  )

google-cloud-pipeline-components 2.13.1__py3-none-any.whl → 2.14.1__py3-none-any.whl

Potentially problematic release.

google-cloud-pipeline-components 2.13.1py3-none-any.whl → 2.14.1py3-none-any.whl