PyPI - lmnr - Versions diffs - 0.4.13__tar.gz → 0.4.15b1__tar.gz - Mend

lmnr 0.4.13tar.gz → 0.4.15b1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{lmnr-0.4.13 → lmnr-0.4.15b1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lmnr
-Version: 0.4.13
+Version: 0.4.15b1
 Summary: Python SDK for Laminar AI
 License: Apache-2.0
 Author: lmnr.ai
@@ -53,7 +53,7 @@ Requires-Dist: posthog (>=3.0,<4.0)
 Requires-Dist: pydantic (>=2.7,<3.0)
 Requires-Dist: python-dotenv (>=1.0,<2.0)
 Requires-Dist: requests (>=2.0,<3.0)
-Requires-Dist: tenacity (>=8.0,<9.0)
+Requires-Dist: tenacity (>=8.0)
 Requires-Dist: tqdm (>=4.0,<5.0)
 Description-Content-Type: text/markdown
@@ -230,7 +230,8 @@ evaluate(
     executor=write_poem,
     evaluators={
         "containsPoem": contains_poem
-    }
+    },
+    group_id="my_first_feature"
 )
 ```

{lmnr-0.4.13 → lmnr-0.4.15b1}/README.md RENAMED Viewed

@@ -171,7 +171,8 @@ evaluate(
     executor=write_poem,
     evaluators={
         "containsPoem": contains_poem
-    }
+    },
+    group_id="my_first_feature"
 )
 ```

{lmnr-0.4.13 → lmnr-0.4.15b1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lmnr"
-version = "0.4.13"
+version = "0.4.15b1"
 description = "Python SDK for Laminar AI"
 authors = [
   { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
 [tool.poetry]
 name = "lmnr"
-version = "0.4.13"
+version = "0.4.15b1"
 description = "Python SDK for Laminar AI"
 authors = ["lmnr.ai"]
 readme = "README.md"
@@ -33,7 +33,7 @@ opentelemetry-instrumentation-sqlalchemy = "^0.48b0"
 opentelemetry-instrumentation-urllib3 = "^0.48b0"
 opentelemetry-instrumentation-threading = "^0.48b0"
 opentelemetry-semantic-conventions-ai = "0.4.1"
-tenacity = "~=8.0"
+tenacity = ">=8.0"
 jinja2 = "~=3.0"
 deprecated = "~=1.0"
 posthog = "~=3.0"

{lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/evaluations.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import re
 import sys
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
@@ -45,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
     return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
+def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
+    per_score_values = {}
+    for result in results:
+        for key, value in result.scores.items():
+            if key not in per_score_values:
+                per_score_values[key] = []
+            per_score_values[key].append(value)
+    average_scores = {}
+    for key, values in per_score_values.items():
+        average_scores[key] = sum(values) / len(values)
+    return average_scores
 class EvaluationReporter:
     def __init__(self):
         pass
-    def start(self, name: str, project_id: str, id: str, length: int):
-        print(f"Running evaluation {name}...\n")
-        print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
+    def start(self, length: int):
         self.cli_progress = tqdm(
             total=length,
             bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
@@ -65,9 +79,10 @@ class EvaluationReporter:
         self.cli_progress.close()
         sys.stderr.write(f"\nError: {error}\n")
-    def stop(self, average_scores: dict[str, Numeric]):
+    def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
         self.cli_progress.close()
-        print("\nAverage scores:")
+        print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
+        print("Average scores:")
         for name, score in average_scores.items():
             print(f"{name}: {score}")
         print("\n")
@@ -96,6 +111,7 @@ class Evaluation:
         data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
         executor: Any,
         evaluators: dict[str, EvaluatorFunction],
+        group_id: Optional[str] = None,
         name: Optional[str] = None,
         batch_size: int = DEFAULT_BATCH_SIZE,
         project_api_key: Optional[str] = None,
@@ -122,6 +138,8 @@ class Evaluation:
                 evaluator function. If the function is anonymous, it will be
                 named `evaluator_${index}`, where index is the index of the
                 evaluator function in the list starting from 1.
+            group_id (Optional[str], optional): Group id of the evaluation.
+                            Defaults to "default".
             name (Optional[str], optional): The name of the evaluation.
                             It will be auto-generated if not provided.
             batch_size (int, optional): The batch size for evaluation.
@@ -137,11 +155,16 @@ class Evaluation:
                             Defaults to None. If None, all available instruments will be used.
         """
+        if not evaluators:
+            raise ValueError("No evaluators provided")
+        # TODO: Compile regex once and then reuse it
+        for evaluator_name in evaluators:
+            if not re.match(r'^[\w\s-]+$', evaluator_name):
+                raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
         self.is_finished = False
-        self.name = name
         self.reporter = EvaluationReporter()
-        self.executor = executor
-        self.evaluators = evaluators
         if isinstance(data, list):
             self.data = [
                 (Datapoint.model_validate(point) if isinstance(point, dict) else point)
@@ -149,6 +172,10 @@ class Evaluation:
             ]
         else:
             self.data = data
+        self.executor = executor
+        self.evaluators = evaluators
+        self.group_id = group_id
+        self.name = name
         self.batch_size = batch_size
         L.initialize(
             project_api_key=project_api_key,
@@ -159,23 +186,6 @@ class Evaluation:
         )
     def run(self) -> Union[None, Awaitable[None]]:
-        """Runs the evaluation.
-        Creates a new evaluation if no evaluation with such name exists, or
-        adds data to an existing one otherwise. Evaluates data points in
-        batches of `self.batch_size`. The executor
-        function is called on each data point to get the output,
-        and then evaluate it by each evaluator function.
-        Usage:
-        ```python
-        # in a synchronous context:
-        e.run()
-        # in an asynchronous context:
-        await e.run()
-        ```
-        """
         if self.is_finished:
             raise Exception("Evaluation is already finished")
@@ -186,41 +196,34 @@ class Evaluation:
             return loop.run_until_complete(self._run())
     async def _run(self) -> None:
-        evaluation = L.create_evaluation(self.name)
         self.reporter.start(
-            evaluation.name,
-            evaluation.projectId,
-            evaluation.id,
             len(self.data),
         )
         try:
-            await self.evaluate_in_batches(evaluation.id)
+            result_datapoints = await self.evaluate_in_batches()
         except Exception as e:
-            L.update_evaluation_status(evaluation.id, "Error")
             self.reporter.stopWithError(e)
             self.is_finished = True
             return
+        else:
+            evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
+            average_scores = get_average_scores(result_datapoints)
+            self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
+            self.is_finished = True
-        update_evaluation_response = L.update_evaluation_status(evaluation.id, "Finished")
-        average_scores = update_evaluation_response.stats.averageScores
-        self.reporter.stop(average_scores)
-        self.is_finished = True
-    async def evaluate_in_batches(self, evaluation_id: uuid.UUID):
+    async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
+        result_datapoints = []
         for i in range(0, len(self.data), self.batch_size):
             batch = (
                 self.data[i: i + self.batch_size]
                 if isinstance(self.data, list)
                 else self.data.slice(i, i + self.batch_size)
             )
-            try:
-                results = await self._evaluate_batch(batch)
-                L.post_evaluation_results(evaluation_id, results)
-            except Exception as e:
-                print(f"Error evaluating batch: {e}")
-            finally:
-                self.reporter.update(len(batch))
+            batch_datapoints = await self._evaluate_batch(batch)
+            result_datapoints.extend(batch_datapoints)
+            self.reporter.update(len(batch))
+        return result_datapoints
     async def _evaluate_batch(
         self, batch: list[Datapoint]
@@ -281,6 +284,7 @@ def evaluate(
     data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
     executor: ExecutorFunction,
     evaluators: dict[str, EvaluatorFunction],
+    group_id: Optional[str] = None,
     name: Optional[str] = None,
     batch_size: int = DEFAULT_BATCH_SIZE,
     project_api_key: Optional[str] = None,
@@ -309,8 +313,11 @@ def evaluate(
             evaluator function. If the function is anonymous, it will be
             named `evaluator_${index}`, where index is the index of the
             evaluator function in the list starting from 1.
-        name (Optional[str], optional): The name of the evaluation.
-            It will be auto-generated if not provided.
+        group_id (Optional[str], optional): Group name which is same
+                        as the feature you are evaluating in your project or application.
+                        Defaults to "default".
+        name (Optional[str], optional): Optional name of the evaluation. Used to easily
+                        identify the evaluation in the group.
         batch_size (int, optional): The batch size for evaluation.
                         Defaults to DEFAULT_BATCH_SIZE.
         project_api_key (Optional[str], optional): The project API key.
@@ -330,6 +337,7 @@ def evaluate(
         data=data,
         executor=executor,
         evaluators=evaluators,
+        group_id=group_id,
         name=name,
         batch_size=batch_size,
         project_api_key=project_api_key,

{lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/laminar.py RENAMED Viewed

@@ -47,7 +47,6 @@ from .types import (
     NodeInput,
     PipelineRunRequest,
     TraceType,
-    UpdateEvaluationResponse,
 )
@@ -413,10 +412,14 @@ class Laminar:
         set_association_properties(props)
     @classmethod
-    def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
+    def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
         response = requests.post(
             cls.__base_http_url + "/v1/evaluations",
-            data=json.dumps({"name": name}),
+            data=json.dumps({
+                "groupId": group_id,
+                "name": name,
+                "points": [datapoint.to_dict() for datapoint in data]
+            }),
             headers=cls._headers(),
         )
         if response.status_code != 200:
@@ -427,66 +430,6 @@ class Laminar:
                 raise ValueError(f"Error creating evaluation {response.text}")
         return CreateEvaluationResponse.model_validate(response.json())
-    @classmethod
-    def post_evaluation_results(
-        cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
-    ) -> requests.Response:
-        body = {
-            "evaluationId": str(evaluation_id),
-            "points": [datapoint.to_dict() for datapoint in data],
-        }
-        response = requests.post(
-            cls.__base_http_url + "/v1/evaluation-datapoints",
-            data=json.dumps(body),
-            headers=cls._headers(),
-        )
-        if response.status_code != 200:
-            try:
-                resp_json = response.json()
-                raise ValueError(
-                    f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
-                )
-            except Exception:
-                raise ValueError(
-                    f"Failed to send evaluation results. Error: {response.text}"
-                )
-        return response
-    @classmethod
-    def update_evaluation_status(
-        cls, evaluation_id: str, status: str
-    ) -> UpdateEvaluationResponse:
-        """
-        Updates the status of an evaluation. Returns the updated evaluation object.
-        Args:
-            evaluation_id (str): The ID of the evaluation to update.
-            status (str): The status to set for the evaluation.
-        Returns:
-            UpdateEvaluationResponse: The updated evaluation response.
-        Raises:
-            ValueError: If the request fails.
-        """
-        body = {
-            "status": status,
-        }
-        url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
-        response = requests.post(
-            url,
-            data=json.dumps(body),
-            headers=cls._headers(),
-        )
-        if response.status_code != 200:
-            raise ValueError(
-                f"Failed to update evaluation status {evaluation_id}. "
-                f"Response: {response.text}"
-            )
-        return UpdateEvaluationResponse.model_validate(response.json())
     @classmethod
     def _headers(cls):
         assert cls.__project_api_key is not None, "Project API key is not set"

{lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/types.py RENAMED Viewed

@@ -2,7 +2,7 @@ import datetime
 from enum import Enum
 import pydantic
 import requests
-from typing import Any, Awaitable, Callable, Literal, Optional, Union
+from typing import Any, Awaitable, Callable, Optional, Union
 import uuid
 from .utils import serialize
@@ -107,24 +107,13 @@ EvaluatorFunction = Callable[
     Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
 ]
-EvaluationStatus = Literal["Started", "Finished", "Error"]
 class CreateEvaluationResponse(pydantic.BaseModel):
     id: uuid.UUID
     createdAt: datetime.datetime
+    groupId: str
     name: str
-    status: EvaluationStatus
     projectId: uuid.UUID
-    metadata: Optional[dict[str, Any]] = None
-class EvaluationStats(pydantic.BaseModel):
-    averageScores: dict[str, Numeric]
-class UpdateEvaluationResponse(pydantic.BaseModel):
-    stats: EvaluationStats
 class EvaluationResultDatapoint(pydantic.BaseModel):