PyPI - lmnr - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

lmnr 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

lmnr/__init__.py +1 -0
lmnr/sdk/evaluations.py +163 -0
lmnr/sdk/laminar.py +67 -0
lmnr/sdk/types.py +55 -3
{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/METADATA +64 -1
lmnr-0.4.1.dist-info/RECORD +13 -0
lmnr-0.4.0.dist-info/RECORD +0 -12
{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/LICENSE +0 -0
{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/WHEEL +0 -0
{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/entry_points.txt +0 -0

lmnr/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from .sdk.evaluations import Evaluation
 from .sdk.laminar import Laminar
 from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
 from .sdk.decorators import observe

lmnr/sdk/evaluations.py ADDED Viewed

@@ -0,0 +1,163 @@
+from typing import Union
+from .utils import is_async
+from .types import EvaluatorFunction, ExecutorFunction, EvaluationDatapoint, Numeric
+from .laminar import Laminar as L
+import asyncio
+from abc import ABC, abstractmethod
+DEFAULT_BATCH_SIZE = 5
+class EvaluationDataset(ABC):
+    @abstractmethod
+    def __init__(self, *args, **kwargs):
+        pass
+    @abstractmethod
+    def __len__(self) -> int:
+        pass
+    @abstractmethod
+    def __getitem__(self, idx) -> EvaluationDatapoint:
+        pass
+    def slice(self, start: int, end: int):
+        return [self[i] for i in range(max(start, 0), min(end, len(self)))]
+class Evaluation:
+    def __init__(
+        self,
+        name,
+        data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
+        executor: ExecutorFunction,
+        evaluators: list[EvaluatorFunction],
+        batch_size: int = DEFAULT_BATCH_SIZE,
+        project_api_key: str = "",
+        base_url: str = "https://api.lmnr.ai",
+    ):
+        """
+        Initializes an instance of the Evaluations class.
+        Parameters:
+            name (str): The name of the evaluation.
+            data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
+                            `data` is the input to the executor function,
+                            `target` is the input to the evaluator function.
+            executor (Callable[..., Any]): The executor function.
+                            Takes the data point + any additional arguments
+                            and returns the output to evaluate.
+            evaluators (List[Callable[..., Any]]): List of evaluator functions.
+                Each evaluator function takes the output of the executor _and_
+                the target data, and returns a score. The score can be a
+                single number or a record of string keys and number values.
+                If the score is a single number, it will be named after the
+                evaluator function. If the function is anonymous, it will be
+                named `evaluator_${index}`, where index is the index of the
+                evaluator function in the list starting from 1.
+            batch_size (int, optional): The batch size for evaluation.
+                            Defaults to DEFAULT_BATCH_SIZE.
+            project_api_key (str, optional): The project API key.
+                            Defaults to an empty string.
+            base_url (str, optional): The base URL for the LMNR API.
+                            Useful if self-hosted elsewhere.
+                            Defaults to "https://api.lmnr.ai".
+        """
+        self.name = name
+        self.executor = executor
+        self.evaluators = dict(
+            zip(
+                [
+                    (
+                        e.__name__
+                        if e.__name__ and e.__name__ != "<lambda>"
+                        else f"evaluator_{i+1}"
+                    )
+                    for i, e in enumerate(evaluators)
+                ],
+                evaluators,
+            )
+        )
+        self.evaluator_names = list(self.evaluators.keys())
+        if isinstance(data, list):
+            self.data = [
+                (
+                    EvaluationDatapoint.model_validate(point)
+                    if isinstance(point, dict)
+                    else point
+                )
+                for point in data
+            ]
+        else:
+            self.data = data
+        self.batch_size = batch_size
+        L.initialize(project_api_key=project_api_key, base_url=base_url)
+    async def run(self):
+        """Runs the evaluation.
+        Creates a new evaluation if no evaluation with such name exists, or
+        adds data to an existing one otherwise. Evaluates data points in
+        batches of `self.batch_size`. The executor
+        function is called on each data point to get the output,
+        and then evaluate it by each evaluator function.
+        """
+        response = L.create_evaluation(self.name)
+        batch_promises = []
+        for i in range(0, len(self.data), self.batch_size):
+            batch = (
+                self.data[i : i + self.batch_size]
+                if isinstance(self.data, list)
+                else self.data.slice(i, i + self.batch_size)
+            )
+            batch_promises.append(self._evaluate_batch(batch))
+        try:
+            await asyncio.gather(*batch_promises)
+            L.update_evaluation_status(response.name, "Finished")
+            print(f"Evaluation {response.id} complete")
+        except Exception as e:
+            print(f"Error evaluating batch: {e}")
+    async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
+        results = []
+        for datapoint in batch:
+            output = (
+                await self.executor(datapoint.data)
+                if is_async(self.executor)
+                else self.executor(datapoint.data)
+            )
+            target = datapoint.target
+            # iterate in order of evaluators
+            scores = {}
+            for evaluator_name in self.evaluator_names:
+                evaluator = self.evaluators[evaluator_name]
+                value = (
+                    await evaluator(output, target)
+                    if is_async(evaluator)
+                    else evaluator(output, target)
+                )
+                # if the evaluator returns a single number,
+                # use the evaluator name as the key
+                if isinstance(value, Numeric):
+                    scores[evaluator_name] = value
+                else:
+                    # if the evaluator returns an object,
+                    # use the object keys as the keys
+                    scores.update(value)
+            results.append(
+                {
+                    "executorOutput": output,
+                    "data": datapoint.data,
+                    "target": target,
+                    "scores": scores,
+                }
+            )
+        return L.post_evaluation_results(self.name, results)

lmnr/sdk/laminar.py CHANGED Viewed

@@ -25,6 +25,8 @@ import uuid
 from .log import VerboseColorfulFormatter
 from .types import (
+    CreateEvaluationResponse,
+    EvaluationResultDatapoint,
     PipelineRunError,
     PipelineRunResponse,
     NodeInput,
@@ -372,6 +374,71 @@ class Laminar:
         props.pop("user_id", None)
         Traceloop.set_association_properties(props)
+    @classmethod
+    def create_evaluation(cls, name: str) -> CreateEvaluationResponse:
+        response = requests.post(
+            cls.__base_url + "/v1/evaluations",
+            data=json.dumps({"name": name}),
+            headers=cls._headers(),
+        )
+        if response.status_code != 200:
+            try:
+                resp_json = response.json()
+                raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
+            except Exception:
+                raise ValueError(f"Error creating evaluation {response.text}")
+        return CreateEvaluationResponse.model_validate(response.json())
+    @classmethod
+    def post_evaluation_results(
+        cls, evaluation_name: str, data: list[EvaluationResultDatapoint]
+    ) -> requests.Response:
+        body = {
+            "name": evaluation_name,
+            "points": data,
+        }
+        response = requests.post(
+            cls.__base_url + "/v1/evaluation-datapoints",
+            data=json.dumps(body),
+            headers=cls._headers(),
+        )
+        if response.status_code != 200:
+            try:
+                resp_json = response.json()
+                raise ValueError(
+                    f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
+                )
+            except Exception:
+                raise ValueError(
+                    f"Failed to send evaluation results. Error: {response.text}"
+                )
+        return response
+    @classmethod
+    def update_evaluation_status(
+        cls, evaluation_name: str, status: str
+    ) -> requests.Response:
+        body = {
+            "name": evaluation_name,
+            "status": status,
+        }
+        response = requests.put(
+            cls.__base_url + "/v1/evaluations/",
+            data=json.dumps(body),
+            headers=cls._headers(),
+        )
+        if response.status_code != 200:
+            try:
+                resp_json = response.json()
+                raise ValueError(
+                    f"Failed to send evaluation status. Response: {json.dumps(resp_json)}"
+                )
+            except Exception:
+                raise ValueError(
+                    f"Failed to send evaluation status. Error: {response.text}"
+                )
+        return response
     @classmethod
     def _headers(cls):
         return {

lmnr/sdk/types.py CHANGED Viewed

@@ -1,7 +1,8 @@
+import datetime
 import requests
 import pydantic
 import uuid
-from typing import Optional, Union
+from typing import Any, Awaitable, Callable, Literal, Optional, TypeAlias, Union
 from .utils import to_dict
@@ -16,7 +17,9 @@ class ConditionedValue(pydantic.BaseModel):
     value: "NodeInput"
-NodeInput = Union[str, list[ChatMessage], ConditionedValue]  # TypeAlias
+Numeric: TypeAlias = Union[int, float]
+NodeInput: TypeAlias = Union[str, list[ChatMessage], ConditionedValue, Numeric, bool]
+PipelineOutput: TypeAlias = Union[NodeInput]
 class PipelineRunRequest(pydantic.BaseModel):
@@ -45,7 +48,7 @@ class PipelineRunRequest(pydantic.BaseModel):
 class PipelineRunResponse(pydantic.BaseModel):
-    outputs: dict[str, dict[str, NodeInput]]
+    outputs: dict[str, dict[str, PipelineOutput]]
     run_id: str
@@ -69,3 +72,52 @@ class PipelineRunError(Exception):
             )
         except Exception:
             return super().__str__()
+EvaluationDatapointData: TypeAlias = dict[str, Any]
+EvaluationDatapointTarget: TypeAlias = dict[str, Any]
+# EvaluationDatapoint is a single data point in the evaluation
+class EvaluationDatapoint(pydantic.BaseModel):
+    # input to the executor function. Must be a dict with string keys
+    data: EvaluationDatapointData
+    # input to the evaluator function (alongside the executor output).
+    # Must be a dict with string keys
+    target: EvaluationDatapointTarget
+ExecutorFunctionReturnType: TypeAlias = Any
+EvaluatorFunctionReturnType: TypeAlias = Union[Numeric, dict[str, Numeric]]
+ExecutorFunction: TypeAlias = Callable[
+    [EvaluationDatapointData, *tuple[Any, ...], dict[str, Any]],
+    Union[ExecutorFunctionReturnType, Awaitable[ExecutorFunctionReturnType]],
+]
+# EvaluatorFunction is a function that takes the output of the executor and the
+# target data, and returns a score. The score can be a single number or a
+# record of string keys and number values. The latter is useful for evaluating
+# multiple criteria in one go instead of running multiple evaluators.
+EvaluatorFunction: TypeAlias = Callable[
+    [ExecutorFunctionReturnType, *tuple[Any, ...], dict[str, Any]],
+    Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
+]
+EvaluationStatus: TypeAlias = Literal["Started", "Finished", "Error"]
+class CreateEvaluationResponse(pydantic.BaseModel):
+    id: uuid.UUID
+    createdAt: datetime.datetime
+    name: str
+    status: EvaluationStatus
+    projectId: uuid.UUID
+    metadata: Optional[dict[str, Any]] = None
+class EvaluationResultDatapoint(pydantic.BaseModel):
+    data: EvaluationDatapointData
+    target: EvaluationDatapointTarget
+    executor_output: ExecutorFunctionReturnType
+    scores: dict[str, Numeric]

{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lmnr
-Version: 0.4.0
+Version: 0.4.1
 Summary: Python SDK for Laminar AI
 License: Apache-2.0
 Author: lmnr.ai
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: asyncio (>=3.4.3,<4.0.0)
 Requires-Dist: backoff (>=2.2.1,<3.0.0)
 Requires-Dist: pydantic (>=2.7.4,<3.0.0)
 Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
@@ -149,3 +150,65 @@ PipelineRunResponse(
 )
 ```
+## Running offline evaluations on your data
+You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
+Evaluation takes in the following parameters:
+- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
+- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
+- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
+- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
+\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
+### Example
+```python
+from openai import AsyncOpenAI
+import asyncio
+import os
+openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
+async def get_capital(data):
+    country = data["country"]
+    response = await openai_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": f"What is the capital of {country}? Just name the "
+                "city and nothing else",
+            },
+        ],
+    )
+    return response.choices[0].message.content.strip()
+# Evaluation data
+data = [
+    {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
+    {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
+    {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
+]
+def evaluator_A(output, target):
+    return 1 if output == target["capital"] else 0
+# Create an Evaluation instance
+e = Evaluation(
+    name="py-evaluation-async",
+    data=data,
+    executor=get_capital,
+    evaluators=[evaluator_A],
+    project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
+)
+# Run the evaluation
+asyncio.run(e.run())
+```

lmnr-0.4.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+lmnr/__init__.py,sha256=wQwnHl662Xcz7GdSofFsEjmAK0nxioYA2Yq6Q78m4ps,194
+lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lmnr/sdk/decorators.py,sha256=Xs6n0TGX9LZ9i1hE_UZz4LEyd_ZAfpVGfNQh_rKwOuA,2493
+lmnr/sdk/evaluations.py,sha256=LkQApHAhR7y_rC2ovnJi8yHpdcl0-7yesdBqvOJ0BKg,6107
+lmnr/sdk/laminar.py,sha256=970fvaw969pBdBqrDRD8lQ82uPEn8V5n-4rIIe_5pqM,16552
+lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
+lmnr/sdk/types.py,sha256=gDwRSWR9A1__FGtQhVaFc6PUYQuIhubo5tpfYAajTQQ,4055
+lmnr/sdk/utils.py,sha256=ZsGJ86tq8lIbvOhSb1gJWH5K3GylO_lgX68FN6rG2nM,3358
+lmnr-0.4.1.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
+lmnr-0.4.1.dist-info/METADATA,sha256=_g6TaAlJuPxl_sbWYPjlg4380V0Ja34P8OXHAXofakI,7025
+lmnr-0.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+lmnr-0.4.1.dist-info/entry_points.txt,sha256=Qg7ZRax4k-rcQsZ26XRYQ8YFSBiyY2PNxYfq4a6PYXI,41
+lmnr-0.4.1.dist-info/RECORD,,

lmnr-0.4.0.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-lmnr/__init__.py,sha256=LDr-OWinRQz-KjzXAotEzUNoi59QoZi3MMll-vcAE8Y,154
-lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lmnr/sdk/decorators.py,sha256=Xs6n0TGX9LZ9i1hE_UZz4LEyd_ZAfpVGfNQh_rKwOuA,2493
-lmnr/sdk/laminar.py,sha256=Ae5w6no2SqM6Zgp9aVJ1kvQUKbgeKX-1fBTovdfElZo,14197
-lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
-lmnr/sdk/types.py,sha256=5-Ft-l35wtmn2xxE8BTqsM3nx1zD799tRv4qiOkED50,2121
-lmnr/sdk/utils.py,sha256=ZsGJ86tq8lIbvOhSb1gJWH5K3GylO_lgX68FN6rG2nM,3358
-lmnr-0.4.0.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
-lmnr-0.4.0.dist-info/METADATA,sha256=cRoKKpLeNNk6E3yxNzLHvGmaStrmCaQXCUAEr-Ix7Dg,4548
-lmnr-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-lmnr-0.4.0.dist-info/entry_points.txt,sha256=Qg7ZRax4k-rcQsZ26XRYQ8YFSBiyY2PNxYfq4a6PYXI,41
-lmnr-0.4.0.dist-info/RECORD,,

{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{lmnr-0.4.0.dist-info → lmnr-0.4.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

lmnr 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

lmnr 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl