lmnr 0.3.7__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lmnr-0.4.1/PKG-INFO ADDED
@@ -0,0 +1,214 @@
1
+ Metadata-Version: 2.1
2
+ Name: lmnr
3
+ Version: 0.4.1
4
+ Summary: Python SDK for Laminar AI
5
+ License: Apache-2.0
6
+ Author: lmnr.ai
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: License :: OSI Approved :: Apache Software License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: asyncio (>=3.4.3,<4.0.0)
15
+ Requires-Dist: backoff (>=2.2.1,<3.0.0)
16
+ Requires-Dist: pydantic (>=2.7.4,<3.0.0)
17
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
18
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
19
+ Requires-Dist: traceloop-sdk (>=0.29.2,<0.30.0)
20
+ Description-Content-Type: text/markdown
21
+
22
+ # Laminar Python
23
+
24
+ OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
25
+
26
+ <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
27
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
28
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
29
+
30
+
31
+
32
+ ## Quickstart
33
+ ```sh
34
+ python3 -m venv .myenv
35
+ source .myenv/bin/activate # or use your favorite env management tool
36
+
37
+ pip install lmnr
38
+ ```
39
+
40
+ And the in your main Python file
41
+
42
+ ```python
43
+ from lmnr import Laminar as L
44
+
45
+ L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
46
+ ```
47
+
48
+ This will automatically instrument most of the LLM, Vector DB, and related
49
+ calls with OpenTelemetry-compatible instrumentation.
50
+
51
+ We rely on the amazing [OpenLLMetry](https://github.com/traceloop/openllmetry), open-source package
52
+ by TraceLoop, to achieve that.
53
+
54
+ ### Project API key
55
+
56
+ Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
57
+ You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
58
+
59
+ ## Instrumentation
60
+
61
+ In addition to automatic instrumentation, we provide a simple `@observe()` decorator, if you want more fine-grained tracing
62
+ or to trace other functions.
63
+
64
+ ### Example
65
+
66
+ ```python
67
+ import os
68
+ from openai import OpenAI
69
+
70
+
71
+ from lmnr import observe, Laminar as L
72
+ L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
73
+
74
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
75
+
76
+ @observe() # annotate all functions you want to trace
77
+ def poem_writer(topic="turbulence"):
78
+ prompt = f"write a poem about {topic}"
79
+ response = client.chat.completions.create(
80
+ model="gpt-4o",
81
+ messages=[
82
+ {"role": "system", "content": "You are a helpful assistant."},
83
+ {"role": "user", "content": prompt},
84
+ ],
85
+ )
86
+ poem = response.choices[0].message.content
87
+ return poem
88
+
89
+ print(poem_writer(topic="laminar flow"))
90
+ ```
91
+
92
+
93
+ ## Sending events
94
+
95
+ You can send events in two ways:
96
+ - `.event(name, value)` – for a pre-defined event with one of possible values.
97
+ - `.evaluate_event(name, evaluator, data)` – for an event that is evaluated by evaluator pipeline based on the data.
98
+
99
+ Note that to run an evaluate event, you need to crate an evaluator pipeline and create a target version for it.
100
+
101
+ Read our [docs](https://docs.lmnr.ai) to learn more about event types and how they are created and evaluated.
102
+
103
+ ### Example
104
+
105
+ ```python
106
+ from lmnr import Laminar as L
107
+ # ...
108
+ poem = response.choices[0].message.content
109
+
110
+ # this will register True or False value with Laminar
111
+ L.event("topic alignment", topic in poem)
112
+
113
+ # this will run the pipeline `check_wordy` with `poem` set as the value
114
+ # of `text_input` node, and write the result as an event with name
115
+ # "excessive_wordiness"
116
+ L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
117
+ ```
118
+
119
+ ## Laminar pipelines as prompt chain managers
120
+
121
+ You can create Laminar pipelines in the UI and manage chains of LLM calls there.
122
+
123
+ After you are ready to use your pipeline in your code, deploy it in Laminar by selecting the target version for the pipeline.
124
+
125
+ Once your pipeline target is set, you can call it from Python in just a few lines.
126
+
127
+ Example use:
128
+
129
+ ```python
130
+ from lmnr import Laminar as L
131
+
132
+ L.initialize('<YOUR_PROJECT_API_KEY>')
133
+
134
+ result = l.run(
135
+ pipeline = 'my_pipeline_name',
136
+ inputs = {'input_node_name': 'some_value'},
137
+ # all environment variables
138
+ env = {'OPENAI_API_KEY': 'sk-some-key'},
139
+ )
140
+ ```
141
+
142
+ Resulting in:
143
+
144
+ ```python
145
+ >>> result
146
+ PipelineRunResponse(
147
+ outputs={'output': {'value': [ChatMessage(role='user', content='hello')]}},
148
+ # useful to locate your trace
149
+ run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
150
+ )
151
+ ```
152
+
153
+ ## Running offline evaluations on your data
154
+
155
+ You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
156
+
157
+ Evaluation takes in the following parameters:
158
+ - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
159
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
160
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
161
+ - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
162
+
163
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
164
+
165
+ ### Example
166
+
167
+ ```python
168
+ from openai import AsyncOpenAI
169
+ import asyncio
170
+ import os
171
+
172
+ openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
173
+
174
+ async def get_capital(data):
175
+ country = data["country"]
176
+ response = await openai_client.chat.completions.create(
177
+ model="gpt-4o-mini",
178
+ messages=[
179
+ {"role": "system", "content": "You are a helpful assistant."},
180
+ {
181
+ "role": "user",
182
+ "content": f"What is the capital of {country}? Just name the "
183
+ "city and nothing else",
184
+ },
185
+ ],
186
+ )
187
+ return response.choices[0].message.content.strip()
188
+
189
+
190
+ # Evaluation data
191
+ data = [
192
+ {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
193
+ {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
194
+ {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
195
+ ]
196
+
197
+
198
+ def evaluator_A(output, target):
199
+ return 1 if output == target["capital"] else 0
200
+
201
+
202
+ # Create an Evaluation instance
203
+ e = Evaluation(
204
+ name="py-evaluation-async",
205
+ data=data,
206
+ executor=get_capital,
207
+ evaluators=[evaluator_A],
208
+ project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
209
+ )
210
+
211
+ # Run the evaluation
212
+ asyncio.run(e.run())
213
+ ```
214
+
lmnr-0.4.1/README.md ADDED
@@ -0,0 +1,192 @@
1
+ # Laminar Python
2
+
3
+ OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
4
+
5
+ <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
6
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
7
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
8
+
9
+
10
+
11
+ ## Quickstart
12
+ ```sh
13
+ python3 -m venv .myenv
14
+ source .myenv/bin/activate # or use your favorite env management tool
15
+
16
+ pip install lmnr
17
+ ```
18
+
19
+ And the in your main Python file
20
+
21
+ ```python
22
+ from lmnr import Laminar as L
23
+
24
+ L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
25
+ ```
26
+
27
+ This will automatically instrument most of the LLM, Vector DB, and related
28
+ calls with OpenTelemetry-compatible instrumentation.
29
+
30
+ We rely on the amazing [OpenLLMetry](https://github.com/traceloop/openllmetry), open-source package
31
+ by TraceLoop, to achieve that.
32
+
33
+ ### Project API key
34
+
35
+ Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
36
+ You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
37
+
38
+ ## Instrumentation
39
+
40
+ In addition to automatic instrumentation, we provide a simple `@observe()` decorator, if you want more fine-grained tracing
41
+ or to trace other functions.
42
+
43
+ ### Example
44
+
45
+ ```python
46
+ import os
47
+ from openai import OpenAI
48
+
49
+
50
+ from lmnr import observe, Laminar as L
51
+ L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
52
+
53
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
54
+
55
+ @observe() # annotate all functions you want to trace
56
+ def poem_writer(topic="turbulence"):
57
+ prompt = f"write a poem about {topic}"
58
+ response = client.chat.completions.create(
59
+ model="gpt-4o",
60
+ messages=[
61
+ {"role": "system", "content": "You are a helpful assistant."},
62
+ {"role": "user", "content": prompt},
63
+ ],
64
+ )
65
+ poem = response.choices[0].message.content
66
+ return poem
67
+
68
+ print(poem_writer(topic="laminar flow"))
69
+ ```
70
+
71
+
72
+ ## Sending events
73
+
74
+ You can send events in two ways:
75
+ - `.event(name, value)` – for a pre-defined event with one of possible values.
76
+ - `.evaluate_event(name, evaluator, data)` – for an event that is evaluated by evaluator pipeline based on the data.
77
+
78
+ Note that to run an evaluate event, you need to crate an evaluator pipeline and create a target version for it.
79
+
80
+ Read our [docs](https://docs.lmnr.ai) to learn more about event types and how they are created and evaluated.
81
+
82
+ ### Example
83
+
84
+ ```python
85
+ from lmnr import Laminar as L
86
+ # ...
87
+ poem = response.choices[0].message.content
88
+
89
+ # this will register True or False value with Laminar
90
+ L.event("topic alignment", topic in poem)
91
+
92
+ # this will run the pipeline `check_wordy` with `poem` set as the value
93
+ # of `text_input` node, and write the result as an event with name
94
+ # "excessive_wordiness"
95
+ L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
96
+ ```
97
+
98
+ ## Laminar pipelines as prompt chain managers
99
+
100
+ You can create Laminar pipelines in the UI and manage chains of LLM calls there.
101
+
102
+ After you are ready to use your pipeline in your code, deploy it in Laminar by selecting the target version for the pipeline.
103
+
104
+ Once your pipeline target is set, you can call it from Python in just a few lines.
105
+
106
+ Example use:
107
+
108
+ ```python
109
+ from lmnr import Laminar as L
110
+
111
+ L.initialize('<YOUR_PROJECT_API_KEY>')
112
+
113
+ result = l.run(
114
+ pipeline = 'my_pipeline_name',
115
+ inputs = {'input_node_name': 'some_value'},
116
+ # all environment variables
117
+ env = {'OPENAI_API_KEY': 'sk-some-key'},
118
+ )
119
+ ```
120
+
121
+ Resulting in:
122
+
123
+ ```python
124
+ >>> result
125
+ PipelineRunResponse(
126
+ outputs={'output': {'value': [ChatMessage(role='user', content='hello')]}},
127
+ # useful to locate your trace
128
+ run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
129
+ )
130
+ ```
131
+
132
+ ## Running offline evaluations on your data
133
+
134
+ You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
135
+
136
+ Evaluation takes in the following parameters:
137
+ - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
138
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
139
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
140
+ - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
141
+
142
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
143
+
144
+ ### Example
145
+
146
+ ```python
147
+ from openai import AsyncOpenAI
148
+ import asyncio
149
+ import os
150
+
151
+ openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
152
+
153
+ async def get_capital(data):
154
+ country = data["country"]
155
+ response = await openai_client.chat.completions.create(
156
+ model="gpt-4o-mini",
157
+ messages=[
158
+ {"role": "system", "content": "You are a helpful assistant."},
159
+ {
160
+ "role": "user",
161
+ "content": f"What is the capital of {country}? Just name the "
162
+ "city and nothing else",
163
+ },
164
+ ],
165
+ )
166
+ return response.choices[0].message.content.strip()
167
+
168
+
169
+ # Evaluation data
170
+ data = [
171
+ {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
172
+ {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
173
+ {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
174
+ ]
175
+
176
+
177
+ def evaluator_A(output, target):
178
+ return 1 if output == target["capital"] else 0
179
+
180
+
181
+ # Create an Evaluation instance
182
+ e = Evaluation(
183
+ name="py-evaluation-async",
184
+ data=data,
185
+ executor=get_capital,
186
+ evaluators=[evaluator_A],
187
+ project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
188
+ )
189
+
190
+ # Run the evaluation
191
+ asyncio.run(e.run())
192
+ ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.3.7"
3
+ version = "0.4.1"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.3.7"
14
+ version = "0.4.1"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -22,9 +22,9 @@ python = "^3.9"
22
22
  pydantic = "^2.7.4"
23
23
  requests = "^2.32.3"
24
24
  python-dotenv = "^1.0.1"
25
-
26
- openai = "^1.41.1"
27
25
  backoff = "^2.2.1"
26
+ traceloop-sdk = "^0.29.2"
27
+ asyncio = "^3.4.3"
28
28
 
29
29
  [tool.poetry.group.dev.dependencies]
30
30
  black = "^24.8.0"
@@ -0,0 +1,4 @@
1
+ from .sdk.evaluations import Evaluation
2
+ from .sdk.laminar import Laminar
3
+ from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
+ from .sdk.decorators import observe
@@ -0,0 +1,72 @@
1
+ from traceloop.sdk.decorators.base import (
2
+ entity_method,
3
+ aentity_method,
4
+ )
5
+ from opentelemetry.trace import INVALID_SPAN, get_current_span
6
+ from traceloop.sdk import Traceloop
7
+
8
+ from typing import Callable, Optional, ParamSpec, TypeVar, cast
9
+
10
+ from .laminar import Laminar as L
11
+ from .utils import is_async
12
+
13
+ P = ParamSpec("P")
14
+ R = TypeVar("R")
15
+
16
+
17
+ def observe(
18
+ *,
19
+ name: Optional[str] = None,
20
+ user_id: Optional[str] = None,
21
+ session_id: Optional[str] = None,
22
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]:
23
+ """The main decorator entrypoint for Laminar. This is used to wrap
24
+ functions and methods to create spans.
25
+
26
+ Args:
27
+ name (Optional[str], optional): Name of the span. Function
28
+ name is used if not specified.
29
+ Defaults to None.
30
+ user_id (Optional[str], optional): User ID to associate
31
+ with the span and the following context.
32
+ Defaults to None.
33
+ session_id (Optional[str], optional): Session ID to associate with the
34
+ span and the following context. Defaults to None.
35
+
36
+ Raises:
37
+ Exception: re-raises the exception if the wrapped function raises
38
+ an exception
39
+
40
+ Returns:
41
+ R: Returns the result of the wrapped function
42
+ """
43
+
44
+ def decorator(func: Callable[P, R]) -> Callable[P, R]:
45
+ if not L.is_initialized():
46
+ raise Exception(
47
+ "Laminar is not initialized. Please "
48
+ + "call Laminar.initialize() first."
49
+ )
50
+ current_span = get_current_span()
51
+ if current_span != INVALID_SPAN:
52
+ if session_id is not None:
53
+ current_span.set_attribute(
54
+ "traceloop.association.properties.session_id", session_id
55
+ )
56
+ if user_id is not None:
57
+ current_span.set_attribute(
58
+ "traceloop.association.properties.user_id", user_id
59
+ )
60
+ association_properties = {}
61
+ if session_id is not None:
62
+ association_properties["session_id"] = session_id
63
+ if user_id is not None:
64
+ association_properties["user_id"] = user_id
65
+ Traceloop.set_association_properties(association_properties)
66
+ return (
67
+ aentity_method(name=name)(func)
68
+ if is_async(func)
69
+ else entity_method(name=name)(func)
70
+ )
71
+
72
+ return cast(Callable[P, R], decorator)
@@ -0,0 +1,163 @@
1
+ from typing import Union
2
+
3
+ from .utils import is_async
4
+ from .types import EvaluatorFunction, ExecutorFunction, EvaluationDatapoint, Numeric
5
+ from .laminar import Laminar as L
6
+ import asyncio
7
+
8
+ from abc import ABC, abstractmethod
9
+
10
+ DEFAULT_BATCH_SIZE = 5
11
+
12
+
13
+ class EvaluationDataset(ABC):
14
+ @abstractmethod
15
+ def __init__(self, *args, **kwargs):
16
+ pass
17
+
18
+ @abstractmethod
19
+ def __len__(self) -> int:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __getitem__(self, idx) -> EvaluationDatapoint:
24
+ pass
25
+
26
+ def slice(self, start: int, end: int):
27
+ return [self[i] for i in range(max(start, 0), min(end, len(self)))]
28
+
29
+
30
+ class Evaluation:
31
+ def __init__(
32
+ self,
33
+ name,
34
+ data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
35
+ executor: ExecutorFunction,
36
+ evaluators: list[EvaluatorFunction],
37
+ batch_size: int = DEFAULT_BATCH_SIZE,
38
+ project_api_key: str = "",
39
+ base_url: str = "https://api.lmnr.ai",
40
+ ):
41
+ """
42
+ Initializes an instance of the Evaluations class.
43
+ Parameters:
44
+ name (str): The name of the evaluation.
45
+ data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
46
+ `data` is the input to the executor function,
47
+ `target` is the input to the evaluator function.
48
+ executor (Callable[..., Any]): The executor function.
49
+ Takes the data point + any additional arguments
50
+ and returns the output to evaluate.
51
+ evaluators (List[Callable[..., Any]]): List of evaluator functions.
52
+ Each evaluator function takes the output of the executor _and_
53
+ the target data, and returns a score. The score can be a
54
+ single number or a record of string keys and number values.
55
+ If the score is a single number, it will be named after the
56
+ evaluator function. If the function is anonymous, it will be
57
+ named `evaluator_${index}`, where index is the index of the
58
+ evaluator function in the list starting from 1.
59
+ batch_size (int, optional): The batch size for evaluation.
60
+ Defaults to DEFAULT_BATCH_SIZE.
61
+ project_api_key (str, optional): The project API key.
62
+ Defaults to an empty string.
63
+ base_url (str, optional): The base URL for the LMNR API.
64
+ Useful if self-hosted elsewhere.
65
+ Defaults to "https://api.lmnr.ai".
66
+ """
67
+
68
+ self.name = name
69
+ self.executor = executor
70
+ self.evaluators = dict(
71
+ zip(
72
+ [
73
+ (
74
+ e.__name__
75
+ if e.__name__ and e.__name__ != "<lambda>"
76
+ else f"evaluator_{i+1}"
77
+ )
78
+ for i, e in enumerate(evaluators)
79
+ ],
80
+ evaluators,
81
+ )
82
+ )
83
+ self.evaluator_names = list(self.evaluators.keys())
84
+ if isinstance(data, list):
85
+ self.data = [
86
+ (
87
+ EvaluationDatapoint.model_validate(point)
88
+ if isinstance(point, dict)
89
+ else point
90
+ )
91
+ for point in data
92
+ ]
93
+ else:
94
+ self.data = data
95
+ self.batch_size = batch_size
96
+ L.initialize(project_api_key=project_api_key, base_url=base_url)
97
+
98
+ async def run(self):
99
+ """Runs the evaluation.
100
+
101
+ Creates a new evaluation if no evaluation with such name exists, or
102
+ adds data to an existing one otherwise. Evaluates data points in
103
+ batches of `self.batch_size`. The executor
104
+ function is called on each data point to get the output,
105
+ and then evaluate it by each evaluator function.
106
+ """
107
+ response = L.create_evaluation(self.name)
108
+ batch_promises = []
109
+
110
+ for i in range(0, len(self.data), self.batch_size):
111
+ batch = (
112
+ self.data[i : i + self.batch_size]
113
+ if isinstance(self.data, list)
114
+ else self.data.slice(i, i + self.batch_size)
115
+ )
116
+ batch_promises.append(self._evaluate_batch(batch))
117
+
118
+ try:
119
+ await asyncio.gather(*batch_promises)
120
+ L.update_evaluation_status(response.name, "Finished")
121
+ print(f"Evaluation {response.id} complete")
122
+ except Exception as e:
123
+ print(f"Error evaluating batch: {e}")
124
+
125
+ async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
126
+ results = []
127
+ for datapoint in batch:
128
+ output = (
129
+ await self.executor(datapoint.data)
130
+ if is_async(self.executor)
131
+ else self.executor(datapoint.data)
132
+ )
133
+ target = datapoint.target
134
+
135
+ # iterate in order of evaluators
136
+ scores = {}
137
+ for evaluator_name in self.evaluator_names:
138
+ evaluator = self.evaluators[evaluator_name]
139
+ value = (
140
+ await evaluator(output, target)
141
+ if is_async(evaluator)
142
+ else evaluator(output, target)
143
+ )
144
+
145
+ # if the evaluator returns a single number,
146
+ # use the evaluator name as the key
147
+ if isinstance(value, Numeric):
148
+ scores[evaluator_name] = value
149
+ else:
150
+ # if the evaluator returns an object,
151
+ # use the object keys as the keys
152
+ scores.update(value)
153
+
154
+ results.append(
155
+ {
156
+ "executorOutput": output,
157
+ "data": datapoint.data,
158
+ "target": target,
159
+ "scores": scores,
160
+ }
161
+ )
162
+
163
+ return L.post_evaluation_results(self.name, results)