PyPI - eval-studio-client - Versions diffs - 0.8.0a2__py3-none-any.whl → 1.0.0a1__py3-none-any.whl - Mend

eval-studio-client 0.8.0a2py3-none-any.whl → 1.0.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (573) hide show

eval_studio_client/insights.py CHANGED Viewed

@@ -22,7 +22,7 @@ class Insight:
     _evaluator_id: Optional[str] = None
     @staticmethod
-    def _from_api_insight(api_insight: models.V1alphaInsight) -> "Insight":
+    def _from_api_insight(api_insight: models.V1Insight) -> "Insight":
         """Converts an API Insight to a client Insight."""
         return Insight(
             description=api_insight.description or "",

eval_studio_client/leaderboards.py CHANGED Viewed

@@ -33,10 +33,11 @@ class Leaderboard:
     update_time: Optional[datetime.datetime] = None
     problems: List[p6s.Problem] = dataclasses.field(default_factory=list)
     insights: List[i6s.Insight] = dataclasses.field(default_factory=list)
+    existing_collection: Optional[str] = None
     _report: Optional[str] = None
     _leaderboard: Optional[str] = None
     _model_name: Optional[str] = None
-    _status: Optional[models.V1alphaLeaderboardStatus] = None
+    _status: Optional[models.V1LeaderboardStatus] = None
     _client: Optional[api.ApiClient] = None
     def __post_init__(self):
@@ -60,16 +61,14 @@ class Leaderboard:
     def finished(self) -> bool:
         """Indicates whether the leaderboard has finished."""
         return self._status in [
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_FAILED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
         ]
     @property
     def successful(self) -> bool:
         """Indicates whether the leaderboard has finished successfully."""
-        return (
-            self._status == models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED
-        )
+        return self._status == models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED
     @property
     def test_suite(self) -> List[tests.Test]:
@@ -154,18 +153,19 @@ class Leaderboard:
         raise TimeoutError("Waiting timeout has been reached.")
-    def to_api_proto(self) -> models.V1alphaLeaderboard:
+    def to_api_proto(self) -> models.V1Leaderboard:
         """Converts the client Leaderboard to an API Leaderboard."""
-        return models.V1alphaLeaderboard(
+        return models.V1Leaderboard(
             display_name=self.name,
             description=self.description,
             llm_models=self.base_models or None,
             evaluator=self._evaluator_name,
             tests=self._test_names,
             model=self._model_name,
+            h2ogpte_collection=self.existing_collection or None,
         )
-    def _update_result(self, api_leaderboard: models.V1alphaLeaderboard):
+    def _update_result(self, api_leaderboard: models.V1Leaderboard):
         """Refresh the leaderboard with the latest API data."""
         self.key = api_leaderboard.name or ""
         self.update_time = api_leaderboard.update_time
@@ -175,7 +175,7 @@ class Leaderboard:
     @staticmethod
     def _from_api_leaderboard(
-        api_leaderboard: models.V1alphaLeaderboard, client: Optional[api.ApiClient]
+        api_leaderboard: models.V1Leaderboard, client: Optional[api.ApiClient]
     ) -> "Leaderboard":
         """Converts an API Leaderboard to a client Leaderboard."""
         api_problems = api_leaderboard.leaderboard_problems or []
@@ -191,6 +191,7 @@ class Leaderboard:
             update_time=api_leaderboard.update_time,
             problems=problems,
             insights=insights,
+            existing_collection=api_leaderboard.h2ogpte_collection or None,
             _evaluator_name=api_leaderboard.evaluator or "",
             _test_names=api_leaderboard.tests or [],
             _report=api_leaderboard.leaderboard_report or "",
@@ -200,8 +201,8 @@ class Leaderboard:
         )
     @staticmethod
-    def _is_finished_leaderboard(leaderboard: models.V1alphaLeaderboard) -> bool:
+    def _is_finished_leaderboard(leaderboard: models.V1Leaderboard) -> bool:
         return leaderboard.status in [
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_FAILED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
         ]

eval_studio_client/models.py CHANGED Viewed

@@ -15,6 +15,34 @@ from eval_studio_client.api import models
 # Key for Azure environment ID parameter within Model parameters.
 _AZURE_ENV_ID_PARAM = "environment_id"
+# Resource name of the default RAG model.
+DEFAULT_RAG_MODEL_KEY = "models/defaultRAGModel"
+# Resource name of the default LLM model.
+DEFAULT_LLM_MODEL_KEY = "models/defaultLLMModel"
+@dataclasses.dataclass
+class CollectionInfo:
+    """Represents the information about a collection in the H2OGPTE
+    or a Knowledge Base in Amazon Bedrock.
+    """
+    key: str
+    name: str
+    description: str
+    def __str__(self):
+        return f"{self.name} ({self.key})"
+    @staticmethod
+    def _from_api_collection_info(api_col: models.V1CollectionInfo) -> "CollectionInfo":
+        return CollectionInfo(
+            key=api_col.id or "",
+            name=api_col.display_name or "",
+            description=api_col.description or "",
+        )
 @dataclasses.dataclass
 class Model:
@@ -55,7 +83,7 @@ class Model:
         while True:
             res = self._leaderboard_api.leaderboard_service_list_leaderboards(
                 filter=f'model="{self.key}"',
-                view=models.V1alphaLeaderboardView.LEADERBOARD_VIEW_BASIC_WITH_TABLE,
+                view=models.V1LeaderboardView.LEADERBOARD_VIEW_BASIC_WITH_TABLE,
                 page_token=page_token,
             )
             if not res or not res.leaderboards:
@@ -74,6 +102,28 @@ class Model:
         return result
+    @property
+    def base_models(self) -> List[str]:
+        """List of base LLM models available to use e.g. for the evaluation."""
+        res = self._model_api.model_service_list_base_models(self.key)
+        if res and res.base_models:
+            return [str(m) for m in res.base_models]
+        raise RuntimeError("Failed to list base models")
+    @property
+    def collections(self) -> List[CollectionInfo]:
+        """List of collections available for evaluation.
+        NOTE: This is currently supported only for H2OGPTe and Amazon Bedrock RAG
+        model hosts.
+        """
+        res = self._model_api.model_service_list_model_collections(self.key)
+        if res and res.collections:
+            return list(res.collections)
+        raise RuntimeError("Failed to list model host collections")
     def create_leaderboard(
         self,
         name: str,
@@ -82,20 +132,27 @@ class Model:
         description: Optional[str] = None,
         base_models: Optional[List[str]] = None,
         use_cache: bool = True,
+        existing_collection: Optional[str] = None,
     ) -> Optional[l10s.Leaderboard]:
         """Runs a new evaluation for the model and creates a new leaderboard.
         Args:
+            name: The name of the leaderboard.
             evaluator: The evaluator to use for the evaluation.
             test_suite: The list of tests used to evaluate the model.
+            description (optional): The description of the leaderboard.
             base_models (optional): The base LLM models to use for the evaluation.
             use_cache (optional): Whether to use the cached answers if available.
+            existing_collection (str): ID or the resource name of the existing
+                collection, which will be used as a corpus for evaluation.
+                NOTE: This option works only for the H2OGPTe and Amazon Bedrock model hosts ATM.
         """
         lb = l10s.Leaderboard(
             key="",
             name=name,
             description=description or "",
             base_models=base_models or [],
+            existing_collection=existing_collection,
             _model_name=self.key,
             _evaluator_name=evaluator.key,
             _test_names=[t.key for t in test_suite],
@@ -122,6 +179,7 @@ class Model:
         test_suites: Union[tests.Test, List[tests.Test]],
         description: Optional[str] = None,
         base_models: Optional[List[str]] = None,
+        existing_collection: Optional[str] = None,
     ) -> Optional[d8s.Dashboard]:
         """Runs a new evaluation for the model and creates a new dashboard.
@@ -130,6 +188,9 @@ class Model:
             test_suites: The test(s) used to evaluate the model.
             description (optional): The description of the dashboard.
             base_models (optional): The base LLM models to use for the evaluation.
+            existing_collection (str): ID or the resource name of the existing
+                collection, which will be used as a corpus for evaluation.
+                NOTE: This option works only for the H2OGPTe and Amazon Bedrock model hosts ATM.
         """
         _evaluators = (
             [evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
@@ -138,25 +199,26 @@ class Model:
             [test_suites] if isinstance(test_suites, tests.Test) else test_suites
         )
-        create_lb_reqs: List[models.V1alphaCreateLeaderboardRequest] = []
+        create_lb_reqs: List[models.V1CreateLeaderboardRequest] = []
         for evaluator in _evaluators:
             lb = l10s.Leaderboard(
                 key="",
                 name=f"{name} - {evaluator.name}",
                 description=description or "",
                 base_models=base_models or [],
+                existing_collection=existing_collection,
                 _model_name=self.key,
                 _evaluator_name=evaluator.key,
                 _test_names=[t.key for t in _test_suites],
                 _client=self._client,
             )
-            create_lb_req = models.V1alphaCreateLeaderboardRequest(
+            create_lb_req = models.V1CreateLeaderboardRequest(
                 leaderboard=lb.to_api_proto()
             )
             create_lb_reqs.append(create_lb_req)
         res = self._leaderboard_api.leaderboard_service_batch_create_leaderboards(
-            models.V1alphaBatchCreateLeaderboardsRequest(
+            models.V1BatchCreateLeaderboardsRequest(
                 requests=create_lb_reqs,
                 dashboard_display_name=name,
                 dashboard_description=description,
@@ -184,7 +246,7 @@ class Model:
             test_lab: The test lab in JSON format to use for the evaluation.
             description (optional): The description of the leaderboard.
         """
-        req = models.V1alphaImportLeaderboardRequest(
+        req = models.V1ImportLeaderboardRequest(
             testLabJson=test_lab,
             evaluator=evaluator.key,
             model=self.key,
@@ -212,7 +274,7 @@ class Model:
         raise RuntimeError("Failed to list base models")
     def _get_leaderboard_from_operation(
-        self, operation: models.V1alphaOperation
+        self, operation: models.V1Operation
     ) -> Optional[l10s.Leaderboard]:
         """Retrieves the leaderboard from the operation, which created it.
@@ -230,7 +292,7 @@ class Model:
         return None
     def _get_dashboard_from_operation(
-        self, operation: models.V1alphaOperation
+        self, operation: models.V1Operation
     ) -> Optional[d8s.Dashboard]:
         """Retrieves the dashboard from the operation, which created it.
@@ -251,9 +313,7 @@ class Model:
         return None
     @staticmethod
-    def _from_api_model(
-        api_model: models.V1alphaModel, client: api.ApiClient
-    ) -> "Model":
+    def _from_api_model(api_model: models.V1Model, client: api.ApiClient) -> "Model":
         """Converts the API model to the client model."""
         return Model(
             key=api_model.name or "",
@@ -268,10 +328,10 @@ class Model:
         )
     @staticmethod
-    def _is_rag_model(api_model: models.V1alphaModel) -> bool:
+    def _is_rag_model(api_model: models.V1Model) -> bool:
         return api_model.type in [
-            models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG,
-            models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG,
+            models.V1ModelType.MODEL_TYPE_H2_OGPTE_RAG,
+            models.V1ModelType.MODEL_TYPE_OPENAI_RAG,
         ]
@@ -285,6 +345,12 @@ class _Models:
         Args:
             key: The model resource name to retrieve.
+        Returns:
+            Model: The model object.
+        Raises:
+            KeyError: If the model is not found.
         """
         res = self._api.model_service_get_model(key)
         if res and res.model:
@@ -292,6 +358,28 @@ class _Models:
         raise KeyError("Model not found.")
+    def get_default_rag(self) -> Model:
+        """Gets the default RAG model from Eval Studio.
+        Returns:
+            Model: The default RAG model object.
+        Raises:
+            KeyError: If no default RAG model is set.
+        """
+        return self.get(DEFAULT_RAG_MODEL_KEY)
+    def get_default_llm(self) -> Model:
+        """Gets the default LLM model from Eval Studio.
+        Returns:
+            Model: The default LLM model object.
+        Raises:
+            KeyError: If no default LLM model is set.
+        """
+        return self.get(DEFAULT_LLM_MODEL_KEY)
     def create_h2ogpte_model(
         self, name: str, is_rag: bool, description: str, url: str, api_key: str
     ) -> Model:
@@ -308,11 +396,11 @@ class _Models:
             api_key: API key for the model host system.
         """
         model_type = (
-            models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG
+            models.V1ModelType.MODEL_TYPE_H2_OGPTE_RAG
             if is_rag
-            else models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_LLM
+            else models.V1ModelType.MODEL_TYPE_H2_OGPTE_LLM
         )
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
@@ -336,12 +424,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_H2_OGPT_LLM,
+            type=models.V1ModelType.MODEL_TYPE_H2_OGPT_LLM,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -360,12 +448,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_H2_OLLMOPS,
+            type=models.V1ModelType.MODEL_TYPE_H2_OLLMOPS,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -399,11 +487,11 @@ class _Models:
             )
         model_type = (
-            models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG
+            models.V1ModelType.MODEL_TYPE_OPENAI_RAG
             if is_rag
-            else models.V1alphaModelType.MODEL_TYPE_OPENAI_CHAT
+            else models.V1ModelType.MODEL_TYPE_OPENAI_CHAT
         )
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             api_key=api_key,
@@ -429,12 +517,12 @@ class _Models:
             environmentID: Azure environment ID.
         """
         params = {_AZURE_ENV_ID_PARAM: environmentID}
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_AZURE_OPENAI_CHAT,
+            type=models.V1ModelType.MODEL_TYPE_AZURE_OPENAI_CHAT,
             parameters=json.dumps(params),
         )
         res = self._api.model_service_create_model(req)
@@ -454,12 +542,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_OLLAMA,
+            type=models.V1ModelType.MODEL_TYPE_OLLAMA,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -491,10 +579,10 @@ class _Models:
             "aws_secret_access_key": aws_secret_access_key,
             "aws_session_token": aws_session_token,
         }
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
-            type=models.V1alphaModelType.MODEL_TYPE_AMAZON_BEDROCK,
+            type=models.V1ModelType.MODEL_TYPE_AMAZON_BEDROCK,
             api_key=json.dumps(credentials),
             parameters=json.dumps({"region": aws_region}),
         )

eval_studio_client/perturbators.py CHANGED Viewed

@@ -17,12 +17,12 @@ class PerturbatorIntensity(enum.Enum):
     medium = "medium"
     high = "high"
-    def to_api_proto(self) -> models.V1alphaPerturbatorIntensity:
+    def to_api_proto(self) -> models.V1PerturbatorIntensity:
         """Converts the client PerturbatorIntensity to an API PerturbatorIntensity."""
         proto_values = {
-            PerturbatorIntensity.low: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_LOW,
-            PerturbatorIntensity.medium: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_MEDIUM,
-            PerturbatorIntensity.high: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_HIGH,
+            PerturbatorIntensity.low: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_LOW,
+            PerturbatorIntensity.medium: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_MEDIUM,
+            PerturbatorIntensity.high: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_HIGH,
         }
         return proto_values[self]
@@ -52,9 +52,7 @@ class Perturbator:
         self.intensity = self.intensity or PerturbatorIntensity.medium
     @staticmethod
-    def _from_api_perturbator(
-        api_perturbator: models.V1alphaPerturbator,
-    ) -> "Perturbator":
+    def _from_api_perturbator(api_perturbator: models.V1Perturbator) -> "Perturbator":
         """Converts an API Perturbator to a client Perturbator."""
         return Perturbator(
             key=api_perturbator.name or "",

eval_studio_client/problems.py CHANGED Viewed

@@ -32,7 +32,7 @@ class Problem:
     _evaluator_id: Optional[str] = None
     @staticmethod
-    def _from_api_problem(api_problem: models.V1alphaProblemAndAction) -> "Problem":
+    def _from_api_problem(api_problem: models.V1ProblemAndAction) -> "Problem":
         """Converts an API Problem to a client Problem."""
         try:
             severity = ProblemSeverity(api_problem.severity)

eval_studio_client/test_labs.py CHANGED Viewed

@@ -98,7 +98,7 @@ class TestLab:
         Args:
             evaluator: The evaluator to use for the evaluation.
         """
-        req = apiModels.V1alphaImportLeaderboardRequest(
+        req = apiModels.V1ImportLeaderboardRequest(
             testLabJson=self.json(),
             evaluator=evaluator.key,
             model=None,
@@ -132,7 +132,7 @@ class TestLab:
         return json.dumps(lab, indent=4, sort_keys=True)
     def _get_leaderboard_from_operation(
-        self, operation: apiModels.V1alphaOperation
+        self, operation: apiModels.V1Operation
     ) -> Optional[l10s.Leaderboard]:
         """Retrieves the leaderboard from the operation, which created it.

eval-studio-client 0.8.0a2__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

eval-studio-client 0.8.0a2py3-none-any.whl → 1.0.0a1py3-none-any.whl