lmnr 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lmnr/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
+ from .sdk.evaluations import Evaluation
1
2
  from .sdk.laminar import Laminar
2
3
  from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
3
4
  from .sdk.decorators import observe
@@ -0,0 +1,163 @@
1
+ from typing import Union
2
+
3
+ from .utils import is_async
4
+ from .types import EvaluatorFunction, ExecutorFunction, EvaluationDatapoint, Numeric
5
+ from .laminar import Laminar as L
6
+ import asyncio
7
+
8
+ from abc import ABC, abstractmethod
9
+
10
+ DEFAULT_BATCH_SIZE = 5
11
+
12
+
13
+ class EvaluationDataset(ABC):
14
+ @abstractmethod
15
+ def __init__(self, *args, **kwargs):
16
+ pass
17
+
18
+ @abstractmethod
19
+ def __len__(self) -> int:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __getitem__(self, idx) -> EvaluationDatapoint:
24
+ pass
25
+
26
+ def slice(self, start: int, end: int):
27
+ return [self[i] for i in range(max(start, 0), min(end, len(self)))]
28
+
29
+
30
+ class Evaluation:
31
+ def __init__(
32
+ self,
33
+ name,
34
+ data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
35
+ executor: ExecutorFunction,
36
+ evaluators: list[EvaluatorFunction],
37
+ batch_size: int = DEFAULT_BATCH_SIZE,
38
+ project_api_key: str = "",
39
+ base_url: str = "https://api.lmnr.ai",
40
+ ):
41
+ """
42
+ Initializes an instance of the Evaluations class.
43
+ Parameters:
44
+ name (str): The name of the evaluation.
45
+ data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
46
+ `data` is the input to the executor function,
47
+ `target` is the input to the evaluator function.
48
+ executor (Callable[..., Any]): The executor function.
49
+ Takes the data point + any additional arguments
50
+ and returns the output to evaluate.
51
+ evaluators (List[Callable[..., Any]]): List of evaluator functions.
52
+ Each evaluator function takes the output of the executor _and_
53
+ the target data, and returns a score. The score can be a
54
+ single number or a record of string keys and number values.
55
+ If the score is a single number, it will be named after the
56
+ evaluator function. If the function is anonymous, it will be
57
+ named `evaluator_${index}`, where index is the index of the
58
+ evaluator function in the list starting from 1.
59
+ batch_size (int, optional): The batch size for evaluation.
60
+ Defaults to DEFAULT_BATCH_SIZE.
61
+ project_api_key (str, optional): The project API key.
62
+ Defaults to an empty string.
63
+ base_url (str, optional): The base URL for the LMNR API.
64
+ Useful if self-hosted elsewhere.
65
+ Defaults to "https://api.lmnr.ai".
66
+ """
67
+
68
+ self.name = name
69
+ self.executor = executor
70
+ self.evaluators = dict(
71
+ zip(
72
+ [
73
+ (
74
+ e.__name__
75
+ if e.__name__ and e.__name__ != "<lambda>"
76
+ else f"evaluator_{i+1}"
77
+ )
78
+ for i, e in enumerate(evaluators)
79
+ ],
80
+ evaluators,
81
+ )
82
+ )
83
+ self.evaluator_names = list(self.evaluators.keys())
84
+ if isinstance(data, list):
85
+ self.data = [
86
+ (
87
+ EvaluationDatapoint.model_validate(point)
88
+ if isinstance(point, dict)
89
+ else point
90
+ )
91
+ for point in data
92
+ ]
93
+ else:
94
+ self.data = data
95
+ self.batch_size = batch_size
96
+ L.initialize(project_api_key=project_api_key, base_url=base_url)
97
+
98
+ async def run(self):
99
+ """Runs the evaluation.
100
+
101
+ Creates a new evaluation if no evaluation with such name exists, or
102
+ adds data to an existing one otherwise. Evaluates data points in
103
+ batches of `self.batch_size`. The executor
104
+ function is called on each data point to get the output,
105
+ and then evaluate it by each evaluator function.
106
+ """
107
+ response = L.create_evaluation(self.name)
108
+ batch_promises = []
109
+
110
+ for i in range(0, len(self.data), self.batch_size):
111
+ batch = (
112
+ self.data[i : i + self.batch_size]
113
+ if isinstance(self.data, list)
114
+ else self.data.slice(i, i + self.batch_size)
115
+ )
116
+ batch_promises.append(self._evaluate_batch(batch))
117
+
118
+ try:
119
+ await asyncio.gather(*batch_promises)
120
+ L.update_evaluation_status(response.name, "Finished")
121
+ print(f"Evaluation {response.id} complete")
122
+ except Exception as e:
123
+ print(f"Error evaluating batch: {e}")
124
+
125
+ async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
126
+ results = []
127
+ for datapoint in batch:
128
+ output = (
129
+ await self.executor(datapoint.data)
130
+ if is_async(self.executor)
131
+ else self.executor(datapoint.data)
132
+ )
133
+ target = datapoint.target
134
+
135
+ # iterate in order of evaluators
136
+ scores = {}
137
+ for evaluator_name in self.evaluator_names:
138
+ evaluator = self.evaluators[evaluator_name]
139
+ value = (
140
+ await evaluator(output, target)
141
+ if is_async(evaluator)
142
+ else evaluator(output, target)
143
+ )
144
+
145
+ # if the evaluator returns a single number,
146
+ # use the evaluator name as the key
147
+ if isinstance(value, Numeric):
148
+ scores[evaluator_name] = value
149
+ else:
150
+ # if the evaluator returns an object,
151
+ # use the object keys as the keys
152
+ scores.update(value)
153
+
154
+ results.append(
155
+ {
156
+ "executorOutput": output,
157
+ "data": datapoint.data,
158
+ "target": target,
159
+ "scores": scores,
160
+ }
161
+ )
162
+
163
+ return L.post_evaluation_results(self.name, results)
lmnr/sdk/laminar.py CHANGED
@@ -25,6 +25,8 @@ import uuid
25
25
  from .log import VerboseColorfulFormatter
26
26
 
27
27
  from .types import (
28
+ CreateEvaluationResponse,
29
+ EvaluationResultDatapoint,
28
30
  PipelineRunError,
29
31
  PipelineRunResponse,
30
32
  NodeInput,
@@ -372,6 +374,71 @@ class Laminar:
372
374
  props.pop("user_id", None)
373
375
  Traceloop.set_association_properties(props)
374
376
 
377
+ @classmethod
378
+ def create_evaluation(cls, name: str) -> CreateEvaluationResponse:
379
+ response = requests.post(
380
+ cls.__base_url + "/v1/evaluations",
381
+ data=json.dumps({"name": name}),
382
+ headers=cls._headers(),
383
+ )
384
+ if response.status_code != 200:
385
+ try:
386
+ resp_json = response.json()
387
+ raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
388
+ except Exception:
389
+ raise ValueError(f"Error creating evaluation {response.text}")
390
+ return CreateEvaluationResponse.model_validate(response.json())
391
+
392
+ @classmethod
393
+ def post_evaluation_results(
394
+ cls, evaluation_name: str, data: list[EvaluationResultDatapoint]
395
+ ) -> requests.Response:
396
+ body = {
397
+ "name": evaluation_name,
398
+ "points": data,
399
+ }
400
+ response = requests.post(
401
+ cls.__base_url + "/v1/evaluation-datapoints",
402
+ data=json.dumps(body),
403
+ headers=cls._headers(),
404
+ )
405
+ if response.status_code != 200:
406
+ try:
407
+ resp_json = response.json()
408
+ raise ValueError(
409
+ f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
410
+ )
411
+ except Exception:
412
+ raise ValueError(
413
+ f"Failed to send evaluation results. Error: {response.text}"
414
+ )
415
+ return response
416
+
417
+ @classmethod
418
+ def update_evaluation_status(
419
+ cls, evaluation_name: str, status: str
420
+ ) -> requests.Response:
421
+ body = {
422
+ "name": evaluation_name,
423
+ "status": status,
424
+ }
425
+ response = requests.put(
426
+ cls.__base_url + "/v1/evaluations/",
427
+ data=json.dumps(body),
428
+ headers=cls._headers(),
429
+ )
430
+ if response.status_code != 200:
431
+ try:
432
+ resp_json = response.json()
433
+ raise ValueError(
434
+ f"Failed to send evaluation status. Response: {json.dumps(resp_json)}"
435
+ )
436
+ except Exception:
437
+ raise ValueError(
438
+ f"Failed to send evaluation status. Error: {response.text}"
439
+ )
440
+ return response
441
+
375
442
  @classmethod
376
443
  def _headers(cls):
377
444
  return {
lmnr/sdk/types.py CHANGED
@@ -1,7 +1,8 @@
1
+ import datetime
1
2
  import requests
2
3
  import pydantic
3
4
  import uuid
4
- from typing import Optional, Union
5
+ from typing import Any, Awaitable, Callable, Literal, Optional, TypeAlias, Union
5
6
 
6
7
  from .utils import to_dict
7
8
 
@@ -16,7 +17,9 @@ class ConditionedValue(pydantic.BaseModel):
16
17
  value: "NodeInput"
17
18
 
18
19
 
19
- NodeInput = Union[str, list[ChatMessage], ConditionedValue] # TypeAlias
20
+ Numeric: TypeAlias = Union[int, float]
21
+ NodeInput: TypeAlias = Union[str, list[ChatMessage], ConditionedValue, Numeric, bool]
22
+ PipelineOutput: TypeAlias = Union[NodeInput]
20
23
 
21
24
 
22
25
  class PipelineRunRequest(pydantic.BaseModel):
@@ -45,7 +48,7 @@ class PipelineRunRequest(pydantic.BaseModel):
45
48
 
46
49
 
47
50
  class PipelineRunResponse(pydantic.BaseModel):
48
- outputs: dict[str, dict[str, NodeInput]]
51
+ outputs: dict[str, dict[str, PipelineOutput]]
49
52
  run_id: str
50
53
 
51
54
 
@@ -69,3 +72,52 @@ class PipelineRunError(Exception):
69
72
  )
70
73
  except Exception:
71
74
  return super().__str__()
75
+
76
+
77
+ EvaluationDatapointData: TypeAlias = dict[str, Any]
78
+ EvaluationDatapointTarget: TypeAlias = dict[str, Any]
79
+
80
+
81
+ # EvaluationDatapoint is a single data point in the evaluation
82
+ class EvaluationDatapoint(pydantic.BaseModel):
83
+ # input to the executor function. Must be a dict with string keys
84
+ data: EvaluationDatapointData
85
+ # input to the evaluator function (alongside the executor output).
86
+ # Must be a dict with string keys
87
+ target: EvaluationDatapointTarget
88
+
89
+
90
+ ExecutorFunctionReturnType: TypeAlias = Any
91
+ EvaluatorFunctionReturnType: TypeAlias = Union[Numeric, dict[str, Numeric]]
92
+
93
+ ExecutorFunction: TypeAlias = Callable[
94
+ [EvaluationDatapointData, *tuple[Any, ...], dict[str, Any]],
95
+ Union[ExecutorFunctionReturnType, Awaitable[ExecutorFunctionReturnType]],
96
+ ]
97
+
98
+ # EvaluatorFunction is a function that takes the output of the executor and the
99
+ # target data, and returns a score. The score can be a single number or a
100
+ # record of string keys and number values. The latter is useful for evaluating
101
+ # multiple criteria in one go instead of running multiple evaluators.
102
+ EvaluatorFunction: TypeAlias = Callable[
103
+ [ExecutorFunctionReturnType, *tuple[Any, ...], dict[str, Any]],
104
+ Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
105
+ ]
106
+
107
+ EvaluationStatus: TypeAlias = Literal["Started", "Finished", "Error"]
108
+
109
+
110
+ class CreateEvaluationResponse(pydantic.BaseModel):
111
+ id: uuid.UUID
112
+ createdAt: datetime.datetime
113
+ name: str
114
+ status: EvaluationStatus
115
+ projectId: uuid.UUID
116
+ metadata: Optional[dict[str, Any]] = None
117
+
118
+
119
+ class EvaluationResultDatapoint(pydantic.BaseModel):
120
+ data: EvaluationDatapointData
121
+ target: EvaluationDatapointTarget
122
+ executor_output: ExecutorFunctionReturnType
123
+ scores: dict[str, Numeric]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: asyncio (>=3.4.3,<4.0.0)
14
15
  Requires-Dist: backoff (>=2.2.1,<3.0.0)
15
16
  Requires-Dist: pydantic (>=2.7.4,<3.0.0)
16
17
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
@@ -149,3 +150,65 @@ PipelineRunResponse(
149
150
  )
150
151
  ```
151
152
 
153
+ ## Running offline evaluations on your data
154
+
155
+ You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
156
+
157
+ Evaluation takes in the following parameters:
158
+ - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
159
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
160
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
161
+ - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
162
+
163
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
164
+
165
+ ### Example
166
+
167
+ ```python
168
+ from openai import AsyncOpenAI
169
+ import asyncio
170
+ import os
171
+
172
+ openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
173
+
174
+ async def get_capital(data):
175
+ country = data["country"]
176
+ response = await openai_client.chat.completions.create(
177
+ model="gpt-4o-mini",
178
+ messages=[
179
+ {"role": "system", "content": "You are a helpful assistant."},
180
+ {
181
+ "role": "user",
182
+ "content": f"What is the capital of {country}? Just name the "
183
+ "city and nothing else",
184
+ },
185
+ ],
186
+ )
187
+ return response.choices[0].message.content.strip()
188
+
189
+
190
+ # Evaluation data
191
+ data = [
192
+ {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
193
+ {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
194
+ {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
195
+ ]
196
+
197
+
198
+ def evaluator_A(output, target):
199
+ return 1 if output == target["capital"] else 0
200
+
201
+
202
+ # Create an Evaluation instance
203
+ e = Evaluation(
204
+ name="py-evaluation-async",
205
+ data=data,
206
+ executor=get_capital,
207
+ evaluators=[evaluator_A],
208
+ project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
209
+ )
210
+
211
+ # Run the evaluation
212
+ asyncio.run(e.run())
213
+ ```
214
+
@@ -0,0 +1,13 @@
1
+ lmnr/__init__.py,sha256=wQwnHl662Xcz7GdSofFsEjmAK0nxioYA2Yq6Q78m4ps,194
2
+ lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ lmnr/sdk/decorators.py,sha256=Xs6n0TGX9LZ9i1hE_UZz4LEyd_ZAfpVGfNQh_rKwOuA,2493
4
+ lmnr/sdk/evaluations.py,sha256=LkQApHAhR7y_rC2ovnJi8yHpdcl0-7yesdBqvOJ0BKg,6107
5
+ lmnr/sdk/laminar.py,sha256=970fvaw969pBdBqrDRD8lQ82uPEn8V5n-4rIIe_5pqM,16552
6
+ lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
7
+ lmnr/sdk/types.py,sha256=gDwRSWR9A1__FGtQhVaFc6PUYQuIhubo5tpfYAajTQQ,4055
8
+ lmnr/sdk/utils.py,sha256=ZsGJ86tq8lIbvOhSb1gJWH5K3GylO_lgX68FN6rG2nM,3358
9
+ lmnr-0.4.1.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
10
+ lmnr-0.4.1.dist-info/METADATA,sha256=_g6TaAlJuPxl_sbWYPjlg4380V0Ja34P8OXHAXofakI,7025
11
+ lmnr-0.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
12
+ lmnr-0.4.1.dist-info/entry_points.txt,sha256=Qg7ZRax4k-rcQsZ26XRYQ8YFSBiyY2PNxYfq4a6PYXI,41
13
+ lmnr-0.4.1.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- lmnr/__init__.py,sha256=LDr-OWinRQz-KjzXAotEzUNoi59QoZi3MMll-vcAE8Y,154
2
- lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- lmnr/sdk/decorators.py,sha256=Xs6n0TGX9LZ9i1hE_UZz4LEyd_ZAfpVGfNQh_rKwOuA,2493
4
- lmnr/sdk/laminar.py,sha256=Ae5w6no2SqM6Zgp9aVJ1kvQUKbgeKX-1fBTovdfElZo,14197
5
- lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
6
- lmnr/sdk/types.py,sha256=5-Ft-l35wtmn2xxE8BTqsM3nx1zD799tRv4qiOkED50,2121
7
- lmnr/sdk/utils.py,sha256=ZsGJ86tq8lIbvOhSb1gJWH5K3GylO_lgX68FN6rG2nM,3358
8
- lmnr-0.4.0.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
9
- lmnr-0.4.0.dist-info/METADATA,sha256=cRoKKpLeNNk6E3yxNzLHvGmaStrmCaQXCUAEr-Ix7Dg,4548
10
- lmnr-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
- lmnr-0.4.0.dist-info/entry_points.txt,sha256=Qg7ZRax4k-rcQsZ26XRYQ8YFSBiyY2PNxYfq4a6PYXI,41
12
- lmnr-0.4.0.dist-info/RECORD,,
File without changes
File without changes