PyPI - oracle-ads - Versions diffs - 2.11.16__py3-none-any.whl → 2.11.17__py3-none-any.whl - Mend

oracle-ads 2.11.16py3-none-any.whl → 2.11.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ads/aqua/app.py +5 -6
ads/aqua/common/enums.py +9 -0
ads/aqua/common/utils.py +128 -1
ads/aqua/constants.py +1 -0
ads/aqua/evaluation/evaluation.py +1 -1
ads/aqua/extension/common_handler.py +75 -5
ads/aqua/extension/deployment_handler.py +2 -0
ads/aqua/extension/model_handler.py +113 -12
ads/aqua/model/entities.py +20 -2
ads/aqua/model/model.py +417 -172
ads/aqua/modeldeployment/deployment.py +69 -55
ads/jobs/builders/infrastructure/dsc_job.py +12 -9
ads/opctl/operator/lowcode/forecast/model/arima.py +3 -1
{oracle_ads-2.11.16.dist-info → oracle_ads-2.11.17.dist-info}/METADATA +2 -1
{oracle_ads-2.11.16.dist-info → oracle_ads-2.11.17.dist-info}/RECORD +18 -18
{oracle_ads-2.11.16.dist-info → oracle_ads-2.11.17.dist-info}/LICENSE.txt +0 -0
{oracle_ads-2.11.16.dist-info → oracle_ads-2.11.17.dist-info}/WHEEL +0 -0
{oracle_ads-2.11.16.dist-info → oracle_ads-2.11.17.dist-info}/entry_points.txt +0 -0

ads/aqua/model/model.py CHANGED Viewed

@@ -2,23 +2,31 @@
 # Copyright (c) 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import os
+import pathlib
 from datetime import datetime, timedelta
 from threading import Lock
-from typing import Dict, Optional, Set, Union
+from typing import Dict, List, Optional, Set, Union
+import oci
 from cachetools import TTLCache
+from huggingface_hub import snapshot_download
+from oci.data_science.models import JobRun, Model
-from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID
+from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.app import AquaApp
 from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
+    LifecycleStatus,
+    _build_resource_identifier,
     copy_model_config,
     create_word_icon,
     get_artifact_path,
+    get_hf_model_info,
     list_os_files_with_extension,
     load_config,
     read_file,
+    upload_folder,
 )
 from ads.aqua.constants import (
     AQUA_MODEL_ARTIFACT_CONFIG,
@@ -38,8 +46,22 @@ from ads.aqua.constants import (
     VALIDATION_METRICS,
     VALIDATION_METRICS_FINAL,
 )
-from ads.aqua.model.constants import *
-from ads.aqua.model.entities import *
+from ads.aqua.model.constants import (
+    FineTuningCustomMetadata,
+    FineTuningMetricCategories,
+    ModelCustomMetadataFields,
+    ModelType,
+)
+from ads.aqua.model.entities import (
+    AquaFineTuneModel,
+    AquaFineTuningMetric,
+    AquaModel,
+    AquaModelLicense,
+    AquaModelSummary,
+    ImportModelDetails,
+    ModelFormat,
+    ModelValidationResult,
+)
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
@@ -55,7 +77,6 @@ from ads.config import (
 from ads.model import DataScienceModel
 from ads.model.model_metadata import ModelCustomMetadata, ModelCustomMetadataItem
 from ads.telemetry import telemetry
-from oci.data_science.models import JobRun, Model
 class AquaModelApp(AquaApp):
@@ -176,11 +197,9 @@ class AquaModelApp(AquaApp):
         if not self._if_show(ds_model):
             raise AquaRuntimeError(f"Target model `{ds_model.id} `is not Aqua model.")
-        is_fine_tuned_model = (
-            True
-            if ds_model.freeform_tags
+        is_fine_tuned_model = bool(
+            ds_model.freeform_tags
             and ds_model.freeform_tags.get(Tags.AQUA_FINE_TUNED_MODEL_TAG)
-            else False
         )
         # todo: consolidate this logic in utils for model and deployment use
@@ -218,6 +237,10 @@ class AquaModelApp(AquaApp):
             ModelCustomMetadataFields.FINETUNE_CONTAINER,
             ModelCustomMetadataItem(key=ModelCustomMetadataFields.FINETUNE_CONTAINER),
         ).value
+        artifact_location = ds_model.custom_metadata_list.get(
+            ModelCustomMetadataFields.ARTIFACT_LOCATION,
+            ModelCustomMetadataItem(key=ModelCustomMetadataFields.ARTIFACT_LOCATION),
+        ).value
         aqua_model_attributes = dict(
             **self._process_model(ds_model, self.region),
@@ -226,6 +249,7 @@ class AquaModelApp(AquaApp):
             inference_container=inference_container,
             finetuning_container=finetuning_container,
             evaluation_container=evaluation_container,
+            artifact_location=artifact_location,
         )
         if not is_fine_tuned_model:
@@ -260,7 +284,7 @@ class AquaModelApp(AquaApp):
                 logger.debug(str(e))
                 source_name = UNKNOWN
-            source_identifier = utils._build_resource_identifier(
+            source_identifier = _build_resource_identifier(
                 id=source_id,
                 name=source_name,
                 region=self.region,
@@ -278,7 +302,7 @@ class AquaModelApp(AquaApp):
                 )
             )
             # TODO: change the argument's name.
-            lifecycle_state = utils.LifecycleStatus.get_status(
+            lifecycle_state = LifecycleStatus.get_status(
                 evaluation_status=ds_model.lifecycle_state,
                 job_run_status=job_run_status,
             )
@@ -321,7 +345,7 @@ class AquaModelApp(AquaApp):
                 category=category,
                 scores=scores,
             )
-        except:
+        except Exception:
             return AquaFineTuningMetric(name=metric_name, category=category, scores=[])
     def _build_ft_metrics(
@@ -410,9 +434,7 @@ class AquaModelApp(AquaApp):
         )
         description = ""
-        if isinstance(model, DataScienceModel) or isinstance(
-            model, oci.data_science.models.model.Model
-        ):
+        if isinstance(model, (DataScienceModel, oci.data_science.models.model.Model)):
             description = model.description
         elif isinstance(model, oci.resource_search.models.ResourceSummary):
             description = model.additional_details.get("description")
@@ -438,16 +460,26 @@ class AquaModelApp(AquaApp):
             == READY_TO_IMPORT_STATUS
         )
+        try:
+            model_file = model.custom_metadata_list.get(AQUA_MODEL_ARTIFACT_FILE).value
+        except Exception:
+            model_file = UNKNOWN
         inference_containers = AquaContainerConfig.from_container_index_json().inference
-        model_format = ModelFormat[
-            freeform_tags.get(Tags.MODEL_FORMAT, ModelFormat.SAFETENSORS.value).upper()
+        model_formats_str = freeform_tags.get(
+            Tags.MODEL_FORMAT, ModelFormat.SAFETENSORS.value
+        ).upper()
+        model_formats = [
+            ModelFormat[model_format] for model_format in model_formats_str.split(",")
         ]
         supported_platform: Set[AquaContainerConfigItem.Platform] = set()
         for container in inference_containers.values():
-            if model_format in container.model_formats:
-                supported_platform.update(container.platforms)
+            for model_format in model_formats:
+                if model_format in container.model_formats:
+                    supported_platform.update(container.platforms)
         nvidia_gpu_supported = (
             AquaContainerConfigItem.Platform.NVIDIA_GPU in supported_platform
@@ -456,26 +488,27 @@ class AquaModelApp(AquaApp):
             AquaContainerConfigItem.Platform.ARM_CPU in supported_platform
         )
-        return dict(
-            compartment_id=model.compartment_id,
-            icon=icon or UNKNOWN,
-            id=model_id,
-            license=freeform_tags.get(Tags.LICENSE, UNKNOWN),
-            name=model.display_name,
-            organization=freeform_tags.get(Tags.ORGANIZATION, UNKNOWN),
-            task=freeform_tags.get(Tags.TASK, UNKNOWN),
-            time_created=str(model.time_created),
-            is_fine_tuned_model=is_fine_tuned_model,
-            tags=tags,
-            console_link=console_link,
-            search_text=search_text,
-            ready_to_deploy=ready_to_deploy,
-            ready_to_finetune=ready_to_finetune,
-            ready_to_import=ready_to_import,
-            nvidia_gpu_supported=nvidia_gpu_supported,
-            arm_cpu_supported=arm_cpu_supported,
-            model_format=model_format,
-        )
+        return {
+            "compartment_id": model.compartment_id,
+            "icon": icon or UNKNOWN,
+            "id": model_id,
+            "license": freeform_tags.get(Tags.LICENSE, UNKNOWN),
+            "name": model.display_name,
+            "organization": freeform_tags.get(Tags.ORGANIZATION, UNKNOWN),
+            "task": freeform_tags.get(Tags.TASK, UNKNOWN),
+            "time_created": str(model.time_created),
+            "is_fine_tuned_model": is_fine_tuned_model,
+            "tags": tags,
+            "console_link": console_link,
+            "search_text": search_text,
+            "ready_to_deploy": ready_to_deploy,
+            "ready_to_finetune": ready_to_finetune,
+            "ready_to_import": ready_to_import,
+            "nvidia_gpu_supported": nvidia_gpu_supported,
+            "arm_cpu_supported": arm_cpu_supported,
+            "model_file": model_file,
+            "model_formats": model_formats,
+        }
     @telemetry(entry_point="plugin=model&action=list", name="aqua")
     def list(
@@ -524,7 +557,7 @@ class AquaModelApp(AquaApp):
                 category="aqua/service/model", action="list"
             )
-            if ODSC_MODEL_COMPARTMENT_OCID in self._service_models_cache.keys():
+            if ODSC_MODEL_COMPARTMENT_OCID in self._service_models_cache:
                 logger.info(
                     f"Returning service models list in {ODSC_MODEL_COMPARTMENT_OCID} from cache."
                 )
@@ -576,7 +609,7 @@ class AquaModelApp(AquaApp):
         res = {}
         logger.info("Clearing _service_models_cache")
         with self._cache_lock:
-            if ODSC_MODEL_COMPARTMENT_OCID in self._service_models_cache.keys():
+            if ODSC_MODEL_COMPARTMENT_OCID in self._service_models_cache:
                 self._service_models_cache.pop(key=ODSC_MODEL_COMPARTMENT_OCID)
                 res = {
                     "key": {
@@ -593,10 +626,9 @@ class AquaModelApp(AquaApp):
         inference_container: str,
         finetuning_container: str,
         verified_model: DataScienceModel,
-        model_format: ModelFormat,
+        validation_result: ModelValidationResult,
         compartment_id: Optional[str],
         project_id: Optional[str],
-        model_file: Optional[str],
     ) -> DataScienceModel:
         """Create model by reference from the object storage path
@@ -625,11 +657,20 @@ class AquaModelApp(AquaApp):
             }
         )
         tags.update({Tags.BASE_MODEL_CUSTOM: "true"})
-        tags.update({Tags.MODEL_FORMAT: model_format.value})
+        if validation_result and validation_result.model_formats:
+            tags.update(
+                {
+                    Tags.MODEL_FORMAT: ",".join(
+                        model_format.value
+                        for model_format in validation_result.model_formats
+                    )
+                }
+            )
         # Remove `ready_to_import` tag that might get copied from service model.
         tags.pop(Tags.READY_TO_IMPORT, None)
-        metadata = None
         if verified_model:
             # Verified model is a model in the service catalog that either has no artifacts but contains all the necessary metadata for deploying and fine tuning.
             # If set, then we copy all the model metadata.
@@ -638,7 +679,6 @@ class AquaModelApp(AquaApp):
                 model = model.with_model_file_description(
                     json_dict=verified_model.model_file_description
                 )
         else:
             metadata = ModelCustomMetadata()
             if not inference_container:
@@ -658,10 +698,10 @@ class AquaModelApp(AquaApp):
                     "Proceeding with model registration without the fine-tuning container information. "
                     "This model will not be available for fine tuning."
                 )
-            if model_file:
+            if validation_result and validation_result.model_file:
                 metadata.add(
                     key=AQUA_MODEL_ARTIFACT_FILE,
-                    value=model_file,
+                    value=validation_result.model_file,
                     description=f"The model file for {model_name}",
                     category="Other",
                 )
@@ -678,9 +718,13 @@ class AquaModelApp(AquaApp):
                 description="Evaluation container mapping for SMC",
                 category="Other",
             )
-            # TODO: either get task and organization from user or a config file
-            # tags["task"] = "UNKNOWN"
-            # tags["organization"] = "UNKNOWN"
+            if validation_result and validation_result.tags:
+                tags[Tags.TASK] = validation_result.tags.get(Tags.TASK, UNKNOWN)
+                tags[Tags.ORGANIZATION] = validation_result.tags.get(
+                    Tags.ORGANIZATION, UNKNOWN
+                )
+                tags[Tags.LICENSE] = validation_result.tags.get(Tags.LICENSE, UNKNOWN)
         try:
             # If verified model already has a artifact json, use that.
@@ -695,7 +739,7 @@ class AquaModelApp(AquaApp):
             copy_model_config(
                 artifact_path=artifact_path, os_path=os_path, auth=default_signer()
             )
-        except:
+        except Exception:
             logger.debug(
                 f"Proceeding with model registration without copying model config files at {os_path}. "
                 f"Default configuration will be used for deployment and fine-tuning."
@@ -708,7 +752,6 @@ class AquaModelApp(AquaApp):
             category="Other",
             replace=True,
         )
         model = (
             model.with_custom_metadata_list(metadata)
             .with_compartment_id(compartment_id or COMPARTMENT_OCID)
@@ -721,7 +764,7 @@ class AquaModelApp(AquaApp):
         return model
     @staticmethod
-    def get_model_files(os_path: str, model_format: ModelFormat) -> [str]:
+    def get_model_files(os_path: str, model_format: ModelFormat) -> List[str]:
         """
         Get a list of model files based on the given OS path and model format.
@@ -734,13 +777,15 @@ class AquaModelApp(AquaApp):
         """
         model_files: List[str] = []
+        # todo: revisit this logic to account for .bin files. In the current state, .bin and .safetensor models
+        #   are grouped in one category and validation checks for config.json files only.
         if model_format == ModelFormat.SAFETENSORS:
             try:
                 load_config(
                     file_path=os_path,
                     config_file_name=AQUA_MODEL_ARTIFACT_CONFIG,
                 )
-            except AquaValueError:
+            except Exception:
                 pass
             else:
                 model_files.append(AQUA_MODEL_ARTIFACT_CONFIG)
@@ -751,142 +796,328 @@ class AquaModelApp(AquaApp):
             )
         return model_files
-    def validate_model_config(
+    @staticmethod
+    def get_hf_model_files(model_name: str, model_format: ModelFormat) -> List[str]:
+        """
+        Get a list of model files based on the given OS path and model format.
+        Args:
+            model_name (str): The huggingface model name.
+            model_format (ModelFormat): The format of the model files.
+        Returns:
+            List[str]: A list of model file names.
+        """
+        model_files: List[str] = []
+        # todo: revisit this logic to account for .bin files. In the current state, .bin and .safetensor models
+        #   are grouped in one category and returns config.json file only.
+        try:
+            model_siblings = get_hf_model_info(repo_id=model_name).siblings
+        except Exception as e:
+            huggingface_err_message = str(e)
+            raise AquaValueError(
+                f"Could not get the model files of {model_name} from https://huggingface.co. "
+                f"Error: {huggingface_err_message}."
+            ) from e
+        if not model_siblings:
+            raise AquaValueError(
+                f"Failed to fetch the model files of {model_name} from https://huggingface.co."
+            )
+        for model_sibling in model_siblings:
+            extension = pathlib.Path(model_sibling.rfilename).suffix[1:].upper()
+            if model_format == ModelFormat.SAFETENSORS:
+                if model_sibling.rfilename == AQUA_MODEL_ARTIFACT_CONFIG:
+                    model_files.append(model_sibling.rfilename)
+            elif extension == model_format.value:
+                model_files.append(model_sibling.rfilename)
+        return model_files
+    def _validate_model(
         self,
-        import_model_details: ImportModelDetails,
-        verified_model: Optional[DataScienceModel],
+        import_model_details: ImportModelDetails = None,
+        model_name: str = None,
+        verified_model: DataScienceModel = None,
     ) -> ModelValidationResult:
         """
         Validates the model configuration and returns the model format telemetry model name.
         Args:
-            import_model_details (ImportModelDetails): The details of the imported model.
-            verified_model (Optional[DataScienceModel]): The verified model.
+            import_model_details (ImportModelDetails): Model details for importing the model.
+            model_name (str): name of the model
+            verified_model (DataScienceModel): If set, then copies all the tags and custom metadata information from
+                the service verified model
         Returns:
             ModelValidationResult: The result of the model validation.
         Raises:
             AquaRuntimeError: If there is an error while loading the config file or if the model path is incorrect.
-            AquaValueError: If the model format is not supported by AQUA."""
-        inference_containers_config = (
-            AquaContainerConfig.from_container_index_json().inference
-        )
-        inference_container = import_model_details.inference_container
-        model_format: ModelFormat
+            AquaValueError: If the model format is not supported by AQUA.
+        """
+        model_formats = []
         validation_result: ModelValidationResult = ModelValidationResult()
+        hf_download_config_present = False
+        if import_model_details.download_from_hf:
+            safetensors_model_files = self.get_hf_model_files(
+                model_name, ModelFormat.SAFETENSORS
+            )
+            if safetensors_model_files:
+                hf_download_config_present = True
+            gguf_model_files = self.get_hf_model_files(model_name, ModelFormat.GGUF)
+        else:
+            safetensors_model_files = self.get_model_files(
+                import_model_details.os_path, ModelFormat.SAFETENSORS
+            )
+            gguf_model_files = self.get_model_files(
+                import_model_details.os_path, ModelFormat.GGUF
+            )
+        if not (safetensors_model_files or gguf_model_files):
+            raise AquaRuntimeError(
+                f"The model {model_name} does not contain either {ModelFormat.SAFETENSORS.value} "
+                f"or {ModelFormat.GGUF.value} files in {import_model_details.os_path} or Hugging Face repository. "
+                f"Please check if the path is correct or the model artifacts are available at this location."
+            )
         if verified_model:
             aqua_model = self.to_aqua_model(verified_model, self.region)
-            model_format = aqua_model.model_format
+            model_formats = aqua_model.model_formats
         else:
-            # Todo: Revisit this logic once a container supports multiple formats
-            try:
-                model_format = inference_containers_config[
-                    inference_container
-                ].model_formats[0]
-            except (KeyError, IndexError):
-                logger.warn(
-                    "Unable to fetch model format for the model automatically defaulting to safetensors"
-                )
-                model_format = ModelFormat.SAFETENSORS
-                pass
-        validation_result.model_format = model_format
-        if model_format == ModelFormat.SAFETENSORS:
-            try:
-                model_config = load_config(
-                    file_path=import_model_details.os_path,
-                    config_file_name=AQUA_MODEL_ARTIFACT_CONFIG,
-                )
-            except Exception as ex:
-                logger.error(
-                    f"Exception occurred while loading config file from {import_model_details.os_path}"
-                    f"Exception message: {ex}"
-                )
-                raise AquaRuntimeError(
-                    f"The model path {import_model_details.os_path} does not contain the file config.json. "
-                    f"Please check if the path is correct or the model artifacts are available at this location."
-                )
-            else:
+            if safetensors_model_files:
+                model_formats.append(ModelFormat.SAFETENSORS)
+            if gguf_model_files:
+                model_formats.append(ModelFormat.GGUF)
+            # get tags for models from hf
+            if import_model_details.download_from_hf:
+                model_info = get_hf_model_info(repo_id=model_name)
                 try:
-                    metadata_model_type = verified_model.custom_metadata_list.get(
-                        AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE
-                    ).value
-                    if metadata_model_type:
-                        if AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config:
-                            if (
-                                model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE]
-                                != metadata_model_type
-                            ):
-                                raise AquaRuntimeError(
-                                    f"The {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in {AQUA_MODEL_ARTIFACT_CONFIG}"
-                                    f" at {import_model_details.os_path} is invalid, expected {metadata_model_type} for "
-                                    f"the model {import_model_details.model}. Please check if the path is correct or "
-                                    f"the correct model artifacts are available at this location."
-                                    f""
-                                )
+                    license_value = UNKNOWN
+                    if model_info.tags:
+                        license_tag = next(
+                            (
+                                tag
+                                for tag in model_info.tags
+                                if tag.startswith("license:")
+                            ),
+                            UNKNOWN,
+                        )
+                        license_value = (
+                            license_tag.split(":")[1] if license_tag else UNKNOWN
+                        )
+                    hf_tags = {
+                        Tags.TASK: (model_info and model_info.pipeline_tag) or UNKNOWN,
+                        Tags.ORGANIZATION: (
+                            model_info.author
+                            if model_info and hasattr(model_info, "author")
+                            else UNKNOWN
+                        ),
+                        Tags.LICENSE: license_value,
+                    }
+                    validation_result.tags = hf_tags
+                except Exception:
+                    pass
+        validation_result.model_formats = model_formats
+        # now as we know that at least one type of model files exist, validate the content of oss path.
+        # for safetensors, we check if config.json files exist, and for gguf format we check if files with
+        # gguf extension exist.
+        for model_format in model_formats:
+            if (
+                model_format == ModelFormat.SAFETENSORS
+                and len(safetensors_model_files) > 0
+            ):
+                if import_model_details.download_from_hf:
+                    # validates config.json exists for safetensors model from hugginface
+                    if not hf_download_config_present:
+                        raise AquaRuntimeError(
+                            f"The model {model_name} does not contain {AQUA_MODEL_ARTIFACT_CONFIG} file as required "
+                            f"by {ModelFormat.SAFETENSORS.value} format model."
+                            f" Please check if the model name is correct in Hugging Face repository."
+                        )
+                else:
+                    try:
+                        model_config = load_config(
+                            file_path=import_model_details.os_path,
+                            config_file_name=AQUA_MODEL_ARTIFACT_CONFIG,
+                        )
+                    except Exception as ex:
+                        logger.error(
+                            f"Exception occurred while loading config file from {import_model_details.os_path}"
+                            f"Exception message: {ex}"
+                        )
+                        raise AquaRuntimeError(
+                            f"The model path {import_model_details.os_path} does not contain the file config.json. "
+                            f"Please check if the path is correct or the model artifacts are available at this location."
+                        ) from ex
+                    else:
+                        try:
+                            metadata_model_type = (
+                                verified_model.custom_metadata_list.get(
+                                    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE
+                                ).value
+                            )
+                            if metadata_model_type:
+                                if (
+                                    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE
+                                    in model_config
+                                ):
+                                    if (
+                                        model_config[
+                                            AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE
+                                        ]
+                                        != metadata_model_type
+                                    ):
+                                        raise AquaRuntimeError(
+                                            f"The {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in {AQUA_MODEL_ARTIFACT_CONFIG}"
+                                            f" at {import_model_details.os_path} is invalid, expected {metadata_model_type} for "
+                                            f"the model {model_name}. Please check if the path is correct or "
+                                            f"the correct model artifacts are available at this location."
+                                            f""
+                                        )
+                                else:
+                                    logger.debug(
+                                        f"Could not find {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in "
+                                        f"{AQUA_MODEL_ARTIFACT_CONFIG}. Proceeding with model registration."
+                                    )
+                        except Exception:
+                            pass
+                        if verified_model:
+                            validation_result.telemetry_model_name = (
+                                verified_model.display_name
+                            )
+                        elif (
+                            model_config is not None
+                            and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME in model_config
+                        ):
+                            validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME]}"
+                        elif (
+                            model_config is not None
+                            and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config
+                        ):
+                            validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE]}"
                         else:
-                            logger.debug(
-                                f"Could not find {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in "
-                                f"{AQUA_MODEL_ARTIFACT_CONFIG}. Proceeding with model registration."
+                            validation_result.telemetry_model_name = (
+                                AQUA_MODEL_TYPE_CUSTOM
                             )
-                except:
-                    pass
+            elif model_format == ModelFormat.GGUF and len(gguf_model_files) > 0:
+                if import_model_details.finetuning_container and not safetensors_model_files:
+                    raise AquaValueError(
+                        "Fine-tuning is currently not supported with GGUF model format."
+                    )
+                if verified_model:
+                    try:
+                        model_file = verified_model.custom_metadata_list.get(
+                            AQUA_MODEL_ARTIFACT_FILE
+                        ).value
+                    except ValueError as err:
+                        raise AquaRuntimeError(
+                            f"The model {verified_model.display_name} does not contain the custom metadata {AQUA_MODEL_ARTIFACT_FILE}. "
+                            f"Please check if the model has the valid metadata."
+                        ) from err
+                else:
+                    model_file = import_model_details.model_file
+                model_files = gguf_model_files
+                # todo: have a separate error validation class for different type of error messages.
+                if model_file:
+                    if model_file not in model_files:
+                        raise AquaRuntimeError(
+                            f"The model path {import_model_details.os_path} or the Hugging Face "
+                            f"model repository for {model_name} does not contain the file "
+                            f"{model_file}. Please check if the path is correct or the model "
+                            f"artifacts are available at this location."
+                        )
+                    else:
+                        validation_result.model_file = model_file
+                elif len(model_files) == 0:
+                    raise AquaRuntimeError(
+                        f"The model path {import_model_details.os_path} or the Hugging Face model "
+                        f"repository for {model_name} does not contain any GGUF format files. "
+                        f"Please check if the path is correct or the model artifacts are available "
+                        f"at this location."
+                    )
+                elif len(model_files) > 1:
+                    raise AquaRuntimeError(
+                        f"The model path {import_model_details.os_path} or the Hugging Face model "
+                        f"repository for {model_name} contains multiple GGUF format files. "
+                        f"Please specify the file that needs to be deployed using the model_file "
+                        f"parameter."
+                    )
+                else:
+                    validation_result.model_file = model_files[0]
                 if verified_model:
                     validation_result.telemetry_model_name = verified_model.display_name
-                elif (
-                    model_config is not None
-                    and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME in model_config
-                ):
-                    validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME]}"
-                elif (
-                    model_config is not None
-                    and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config
-                ):
-                    validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE]}"
+                elif import_model_details.download_from_hf:
+                    validation_result.telemetry_model_name = model_name
                 else:
                     validation_result.telemetry_model_name = AQUA_MODEL_TYPE_CUSTOM
-        elif model_format == ModelFormat.GGUF:
-            if import_model_details.finetuning_container:
-                raise AquaValueError(
-                    "Finetuning is currently not supported with GGUF model format"
-                )
-            if verified_model:
-                model_file = verified_model.custom_metadata_list.get(
-                    AQUA_MODEL_ARTIFACT_FILE, None
-                )
+        return validation_result
+    @staticmethod
+    def _download_model_from_hf(
+        model_name: str,
+        os_path: str,
+        local_dir: str = None,
+    ) -> str:
+        """This helper function downloads the model artifact from Hugging Face to a local folder, then uploads
+        to object storage location.
+        Parameters
+        ----------
+        model_name (str): The huggingface model name.
+        os_path (str): The OS path where the model files are located.
+        local_dir (str): The local temp dir to store the huggingface model.
+        Returns
+        -------
+        model_artifact_path (str): Location where the model artifacts are downloaded.
+        """
+        # Download the model from hub
+        if not local_dir:
+            local_dir = os.path.join(os.path.expanduser("~"), "cached-model")
+        local_dir = os.path.join(local_dir, model_name)
+        retry = 10
+        i = 0
+        huggingface_download_err_message = None
+        while i < retry:
+            try:
+                # Download to cache folder. The while loop retries when there is a network failure
+                snapshot_download(repo_id=model_name)
+            except Exception as e:
+                huggingface_download_err_message = str(e)
+                i += 1
             else:
-                model_file = import_model_details.model_file
-            model_files = self.get_model_files(
-                import_model_details.os_path, model_format
+                break
+        if i == retry:
+            raise Exception(
+                f"Could not download the model {model_name} from https://huggingface.co with message {huggingface_download_err_message}"
             )
-            if model_file:
-                if model_file not in model_files:
-                    raise AquaRuntimeError(
-                        f"The model path {import_model_details.os_path} does not contain the file {model_file}. "
-                        f"Please check if the path is correct or the model artifacts are available at this location."
-                    )
-                else:
-                    validation_result.model_file = model_file
-            elif len(model_files) == 0:
-                raise AquaRuntimeError(
-                    f"The model path {import_model_details.os_path} does not contain any GGUF format files. "
-                    f"Please check if the path is correct or the model artifacts are available at this location."
-                )
-            elif len(model_files) > 1:
-                raise AquaRuntimeError(
-                    f"The model path {import_model_details.os_path} contains multiple GGUF format files. Please specify the file that needs to be deployed."
-                )
-            else:
-                validation_result.model_file = model_files[0]
+        os.makedirs(local_dir, exist_ok=True)
+        # Copy the model from the cache to destination
+        snapshot_download(repo_id=model_name, local_dir=local_dir)
+        # Upload to object storage
+        model_artifact_path = upload_folder(
+            os_path=os_path,
+            local_dir=local_dir,
+            model_name=model_name,
+        )
-            if verified_model:
-                validation_result.telemetry_model_name = verified_model.display_name
-            else:
-                validation_result.telemetry_model_name = AQUA_MODEL_TYPE_CUSTOM
-        else:
-            raise AquaValueError("This model format is currently not supported by AQUA")
-        return validation_result
+        return model_artifact_path
     def register(
         self, import_model_details: ImportModelDetails = None, **kwargs
@@ -928,9 +1159,6 @@ class AquaModelApp(AquaApp):
                 )
                 verified_model = DataScienceModel.from_id(model_service_id)
-        validation_result = self.validate_model_config(
-            import_model_details, verified_model
-        )
         # Copy the model name from the service model if `model` is ocid
         model_name = (
             verified_model.display_name
@@ -938,17 +1166,33 @@ class AquaModelApp(AquaApp):
             else import_model_details.model
         )
+        # validate model and artifact
+        validation_result = self._validate_model(
+            import_model_details=import_model_details,
+            model_name=model_name,
+            verified_model=verified_model,
+        )
+        # download model from hugginface if indicates
+        if import_model_details.download_from_hf:
+            artifact_path = self._download_model_from_hf(
+                model_name=model_name,
+                os_path=import_model_details.os_path,
+                local_dir=import_model_details.local_dir,
+            ).rstrip("/")
+        else:
+            artifact_path = import_model_details.os_path.rstrip("/")
         # Create Model catalog entry with pass by reference
         ds_model = self._create_model_catalog_entry(
-            os_path=import_model_details.os_path,
+            os_path=artifact_path,
             model_name=model_name,
             inference_container=import_model_details.inference_container,
             finetuning_container=import_model_details.finetuning_container,
             verified_model=verified_model,
+            validation_result=validation_result,
             compartment_id=import_model_details.compartment_id,
             project_id=import_model_details.project_id,
-            model_file=validation_result.model_file,
-            model_format=validation_result.model_format,
         )
         # registered model will always have inference and evaluation container, but
         # fine-tuning container may be not set
@@ -962,7 +1206,7 @@ class AquaModelApp(AquaApp):
             finetuning_container = ds_model.custom_metadata_list.get(
                 ModelCustomMetadataFields.FINETUNE_CONTAINER,
             ).value
-        except:
+        except Exception:
             finetuning_container = None
         aqua_model_attributes = dict(
@@ -970,13 +1214,14 @@ class AquaModelApp(AquaApp):
             project_id=ds_model.project_id,
             model_card=str(
                 read_file(
-                    file_path=f"{import_model_details.os_path.rstrip('/')}/{README}",
+                    file_path=f"{artifact_path}/{README}",
                     auth=default_signer(),
                 )
             ),
             inference_container=inference_container,
             finetuning_container=finetuning_container,
             evaluation_container=evaluation_container,
+            artifact_location=artifact_path,
         )
         self.telemetry.record_event_async(

oracle-ads 2.11.16__py3-none-any.whl → 2.11.17__py3-none-any.whl

oracle-ads 2.11.16py3-none-any.whl → 2.11.17py3-none-any.whl