lmnr 0.3.7__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr-0.4.1/PKG-INFO +214 -0
- lmnr-0.4.1/README.md +192 -0
- {lmnr-0.3.7 → lmnr-0.4.1}/pyproject.toml +4 -4
- lmnr-0.4.1/src/lmnr/__init__.py +4 -0
- lmnr-0.4.1/src/lmnr/sdk/decorators.py +72 -0
- lmnr-0.4.1/src/lmnr/sdk/evaluations.py +163 -0
- lmnr-0.4.1/src/lmnr/sdk/laminar.py +447 -0
- lmnr-0.4.1/src/lmnr/sdk/log.py +39 -0
- lmnr-0.4.1/src/lmnr/sdk/types.py +123 -0
- {lmnr-0.3.7 → lmnr-0.4.1}/src/lmnr/sdk/utils.py +10 -11
- lmnr-0.3.7/PKG-INFO +0 -266
- lmnr-0.3.7/README.md +0 -245
- lmnr-0.3.7/src/lmnr/__init__.py +0 -7
- lmnr-0.3.7/src/lmnr/sdk/client.py +0 -161
- lmnr-0.3.7/src/lmnr/sdk/collector.py +0 -177
- lmnr-0.3.7/src/lmnr/sdk/constants.py +0 -1
- lmnr-0.3.7/src/lmnr/sdk/context.py +0 -483
- lmnr-0.3.7/src/lmnr/sdk/decorators.py +0 -284
- lmnr-0.3.7/src/lmnr/sdk/interface.py +0 -316
- lmnr-0.3.7/src/lmnr/sdk/providers/__init__.py +0 -2
- lmnr-0.3.7/src/lmnr/sdk/providers/base.py +0 -28
- lmnr-0.3.7/src/lmnr/sdk/providers/fallback.py +0 -154
- lmnr-0.3.7/src/lmnr/sdk/providers/openai.py +0 -159
- lmnr-0.3.7/src/lmnr/sdk/providers/utils.py +0 -33
- lmnr-0.3.7/src/lmnr/sdk/tracing_types.py +0 -210
- lmnr-0.3.7/src/lmnr/sdk/types.py +0 -71
- lmnr-0.3.7/src/lmnr/semantic_conventions/__init__.py +0 -0
- lmnr-0.3.7/src/lmnr/semantic_conventions/gen_ai_spans.py +0 -48
- {lmnr-0.3.7 → lmnr-0.4.1}/LICENSE +0 -0
- {lmnr-0.3.7 → lmnr-0.4.1}/src/lmnr/sdk/__init__.py +0 -0
lmnr-0.4.1/PKG-INFO
ADDED
@@ -0,0 +1,214 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: lmnr
|
3
|
+
Version: 0.4.1
|
4
|
+
Summary: Python SDK for Laminar AI
|
5
|
+
License: Apache-2.0
|
6
|
+
Author: lmnr.ai
|
7
|
+
Requires-Python: >=3.9,<4.0
|
8
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Requires-Dist: asyncio (>=3.4.3,<4.0.0)
|
15
|
+
Requires-Dist: backoff (>=2.2.1,<3.0.0)
|
16
|
+
Requires-Dist: pydantic (>=2.7.4,<3.0.0)
|
17
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
18
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
19
|
+
Requires-Dist: traceloop-sdk (>=0.29.2,<0.30.0)
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
|
22
|
+
# Laminar Python
|
23
|
+
|
24
|
+
OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
|
25
|
+
|
26
|
+
<a href="https://pypi.org/project/lmnr/">  </a>
|
27
|
+

|
28
|
+

|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
## Quickstart
|
33
|
+
```sh
|
34
|
+
python3 -m venv .myenv
|
35
|
+
source .myenv/bin/activate # or use your favorite env management tool
|
36
|
+
|
37
|
+
pip install lmnr
|
38
|
+
```
|
39
|
+
|
40
|
+
And the in your main Python file
|
41
|
+
|
42
|
+
```python
|
43
|
+
from lmnr import Laminar as L
|
44
|
+
|
45
|
+
L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
|
46
|
+
```
|
47
|
+
|
48
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
49
|
+
calls with OpenTelemetry-compatible instrumentation.
|
50
|
+
|
51
|
+
We rely on the amazing [OpenLLMetry](https://github.com/traceloop/openllmetry), open-source package
|
52
|
+
by TraceLoop, to achieve that.
|
53
|
+
|
54
|
+
### Project API key
|
55
|
+
|
56
|
+
Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
|
57
|
+
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
58
|
+
|
59
|
+
## Instrumentation
|
60
|
+
|
61
|
+
In addition to automatic instrumentation, we provide a simple `@observe()` decorator, if you want more fine-grained tracing
|
62
|
+
or to trace other functions.
|
63
|
+
|
64
|
+
### Example
|
65
|
+
|
66
|
+
```python
|
67
|
+
import os
|
68
|
+
from openai import OpenAI
|
69
|
+
|
70
|
+
|
71
|
+
from lmnr import observe, Laminar as L
|
72
|
+
L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
|
73
|
+
|
74
|
+
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
75
|
+
|
76
|
+
@observe() # annotate all functions you want to trace
|
77
|
+
def poem_writer(topic="turbulence"):
|
78
|
+
prompt = f"write a poem about {topic}"
|
79
|
+
response = client.chat.completions.create(
|
80
|
+
model="gpt-4o",
|
81
|
+
messages=[
|
82
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
83
|
+
{"role": "user", "content": prompt},
|
84
|
+
],
|
85
|
+
)
|
86
|
+
poem = response.choices[0].message.content
|
87
|
+
return poem
|
88
|
+
|
89
|
+
print(poem_writer(topic="laminar flow"))
|
90
|
+
```
|
91
|
+
|
92
|
+
|
93
|
+
## Sending events
|
94
|
+
|
95
|
+
You can send events in two ways:
|
96
|
+
- `.event(name, value)` – for a pre-defined event with one of possible values.
|
97
|
+
- `.evaluate_event(name, evaluator, data)` – for an event that is evaluated by evaluator pipeline based on the data.
|
98
|
+
|
99
|
+
Note that to run an evaluate event, you need to crate an evaluator pipeline and create a target version for it.
|
100
|
+
|
101
|
+
Read our [docs](https://docs.lmnr.ai) to learn more about event types and how they are created and evaluated.
|
102
|
+
|
103
|
+
### Example
|
104
|
+
|
105
|
+
```python
|
106
|
+
from lmnr import Laminar as L
|
107
|
+
# ...
|
108
|
+
poem = response.choices[0].message.content
|
109
|
+
|
110
|
+
# this will register True or False value with Laminar
|
111
|
+
L.event("topic alignment", topic in poem)
|
112
|
+
|
113
|
+
# this will run the pipeline `check_wordy` with `poem` set as the value
|
114
|
+
# of `text_input` node, and write the result as an event with name
|
115
|
+
# "excessive_wordiness"
|
116
|
+
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
117
|
+
```
|
118
|
+
|
119
|
+
## Laminar pipelines as prompt chain managers
|
120
|
+
|
121
|
+
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
122
|
+
|
123
|
+
After you are ready to use your pipeline in your code, deploy it in Laminar by selecting the target version for the pipeline.
|
124
|
+
|
125
|
+
Once your pipeline target is set, you can call it from Python in just a few lines.
|
126
|
+
|
127
|
+
Example use:
|
128
|
+
|
129
|
+
```python
|
130
|
+
from lmnr import Laminar as L
|
131
|
+
|
132
|
+
L.initialize('<YOUR_PROJECT_API_KEY>')
|
133
|
+
|
134
|
+
result = l.run(
|
135
|
+
pipeline = 'my_pipeline_name',
|
136
|
+
inputs = {'input_node_name': 'some_value'},
|
137
|
+
# all environment variables
|
138
|
+
env = {'OPENAI_API_KEY': 'sk-some-key'},
|
139
|
+
)
|
140
|
+
```
|
141
|
+
|
142
|
+
Resulting in:
|
143
|
+
|
144
|
+
```python
|
145
|
+
>>> result
|
146
|
+
PipelineRunResponse(
|
147
|
+
outputs={'output': {'value': [ChatMessage(role='user', content='hello')]}},
|
148
|
+
# useful to locate your trace
|
149
|
+
run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
|
150
|
+
)
|
151
|
+
```
|
152
|
+
|
153
|
+
## Running offline evaluations on your data
|
154
|
+
|
155
|
+
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
156
|
+
|
157
|
+
Evaluation takes in the following parameters:
|
158
|
+
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
159
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
160
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
161
|
+
- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
162
|
+
|
163
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
164
|
+
|
165
|
+
### Example
|
166
|
+
|
167
|
+
```python
|
168
|
+
from openai import AsyncOpenAI
|
169
|
+
import asyncio
|
170
|
+
import os
|
171
|
+
|
172
|
+
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
173
|
+
|
174
|
+
async def get_capital(data):
|
175
|
+
country = data["country"]
|
176
|
+
response = await openai_client.chat.completions.create(
|
177
|
+
model="gpt-4o-mini",
|
178
|
+
messages=[
|
179
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
180
|
+
{
|
181
|
+
"role": "user",
|
182
|
+
"content": f"What is the capital of {country}? Just name the "
|
183
|
+
"city and nothing else",
|
184
|
+
},
|
185
|
+
],
|
186
|
+
)
|
187
|
+
return response.choices[0].message.content.strip()
|
188
|
+
|
189
|
+
|
190
|
+
# Evaluation data
|
191
|
+
data = [
|
192
|
+
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
193
|
+
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
194
|
+
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
195
|
+
]
|
196
|
+
|
197
|
+
|
198
|
+
def evaluator_A(output, target):
|
199
|
+
return 1 if output == target["capital"] else 0
|
200
|
+
|
201
|
+
|
202
|
+
# Create an Evaluation instance
|
203
|
+
e = Evaluation(
|
204
|
+
name="py-evaluation-async",
|
205
|
+
data=data,
|
206
|
+
executor=get_capital,
|
207
|
+
evaluators=[evaluator_A],
|
208
|
+
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
209
|
+
)
|
210
|
+
|
211
|
+
# Run the evaluation
|
212
|
+
asyncio.run(e.run())
|
213
|
+
```
|
214
|
+
|
lmnr-0.4.1/README.md
ADDED
@@ -0,0 +1,192 @@
|
|
1
|
+
# Laminar Python
|
2
|
+
|
3
|
+
OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
|
4
|
+
|
5
|
+
<a href="https://pypi.org/project/lmnr/">  </a>
|
6
|
+

|
7
|
+

|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## Quickstart
|
12
|
+
```sh
|
13
|
+
python3 -m venv .myenv
|
14
|
+
source .myenv/bin/activate # or use your favorite env management tool
|
15
|
+
|
16
|
+
pip install lmnr
|
17
|
+
```
|
18
|
+
|
19
|
+
And the in your main Python file
|
20
|
+
|
21
|
+
```python
|
22
|
+
from lmnr import Laminar as L
|
23
|
+
|
24
|
+
L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
|
25
|
+
```
|
26
|
+
|
27
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
28
|
+
calls with OpenTelemetry-compatible instrumentation.
|
29
|
+
|
30
|
+
We rely on the amazing [OpenLLMetry](https://github.com/traceloop/openllmetry), open-source package
|
31
|
+
by TraceLoop, to achieve that.
|
32
|
+
|
33
|
+
### Project API key
|
34
|
+
|
35
|
+
Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
|
36
|
+
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
37
|
+
|
38
|
+
## Instrumentation
|
39
|
+
|
40
|
+
In addition to automatic instrumentation, we provide a simple `@observe()` decorator, if you want more fine-grained tracing
|
41
|
+
or to trace other functions.
|
42
|
+
|
43
|
+
### Example
|
44
|
+
|
45
|
+
```python
|
46
|
+
import os
|
47
|
+
from openai import OpenAI
|
48
|
+
|
49
|
+
|
50
|
+
from lmnr import observe, Laminar as L
|
51
|
+
L.initialize(project_api_key="<LMNR_PROJECT_API_KEY>")
|
52
|
+
|
53
|
+
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
54
|
+
|
55
|
+
@observe() # annotate all functions you want to trace
|
56
|
+
def poem_writer(topic="turbulence"):
|
57
|
+
prompt = f"write a poem about {topic}"
|
58
|
+
response = client.chat.completions.create(
|
59
|
+
model="gpt-4o",
|
60
|
+
messages=[
|
61
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
62
|
+
{"role": "user", "content": prompt},
|
63
|
+
],
|
64
|
+
)
|
65
|
+
poem = response.choices[0].message.content
|
66
|
+
return poem
|
67
|
+
|
68
|
+
print(poem_writer(topic="laminar flow"))
|
69
|
+
```
|
70
|
+
|
71
|
+
|
72
|
+
## Sending events
|
73
|
+
|
74
|
+
You can send events in two ways:
|
75
|
+
- `.event(name, value)` – for a pre-defined event with one of possible values.
|
76
|
+
- `.evaluate_event(name, evaluator, data)` – for an event that is evaluated by evaluator pipeline based on the data.
|
77
|
+
|
78
|
+
Note that to run an evaluate event, you need to crate an evaluator pipeline and create a target version for it.
|
79
|
+
|
80
|
+
Read our [docs](https://docs.lmnr.ai) to learn more about event types and how they are created and evaluated.
|
81
|
+
|
82
|
+
### Example
|
83
|
+
|
84
|
+
```python
|
85
|
+
from lmnr import Laminar as L
|
86
|
+
# ...
|
87
|
+
poem = response.choices[0].message.content
|
88
|
+
|
89
|
+
# this will register True or False value with Laminar
|
90
|
+
L.event("topic alignment", topic in poem)
|
91
|
+
|
92
|
+
# this will run the pipeline `check_wordy` with `poem` set as the value
|
93
|
+
# of `text_input` node, and write the result as an event with name
|
94
|
+
# "excessive_wordiness"
|
95
|
+
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
96
|
+
```
|
97
|
+
|
98
|
+
## Laminar pipelines as prompt chain managers
|
99
|
+
|
100
|
+
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
101
|
+
|
102
|
+
After you are ready to use your pipeline in your code, deploy it in Laminar by selecting the target version for the pipeline.
|
103
|
+
|
104
|
+
Once your pipeline target is set, you can call it from Python in just a few lines.
|
105
|
+
|
106
|
+
Example use:
|
107
|
+
|
108
|
+
```python
|
109
|
+
from lmnr import Laminar as L
|
110
|
+
|
111
|
+
L.initialize('<YOUR_PROJECT_API_KEY>')
|
112
|
+
|
113
|
+
result = l.run(
|
114
|
+
pipeline = 'my_pipeline_name',
|
115
|
+
inputs = {'input_node_name': 'some_value'},
|
116
|
+
# all environment variables
|
117
|
+
env = {'OPENAI_API_KEY': 'sk-some-key'},
|
118
|
+
)
|
119
|
+
```
|
120
|
+
|
121
|
+
Resulting in:
|
122
|
+
|
123
|
+
```python
|
124
|
+
>>> result
|
125
|
+
PipelineRunResponse(
|
126
|
+
outputs={'output': {'value': [ChatMessage(role='user', content='hello')]}},
|
127
|
+
# useful to locate your trace
|
128
|
+
run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
|
129
|
+
)
|
130
|
+
```
|
131
|
+
|
132
|
+
## Running offline evaluations on your data
|
133
|
+
|
134
|
+
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
135
|
+
|
136
|
+
Evaluation takes in the following parameters:
|
137
|
+
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
138
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
139
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
140
|
+
- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
141
|
+
|
142
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
143
|
+
|
144
|
+
### Example
|
145
|
+
|
146
|
+
```python
|
147
|
+
from openai import AsyncOpenAI
|
148
|
+
import asyncio
|
149
|
+
import os
|
150
|
+
|
151
|
+
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
152
|
+
|
153
|
+
async def get_capital(data):
|
154
|
+
country = data["country"]
|
155
|
+
response = await openai_client.chat.completions.create(
|
156
|
+
model="gpt-4o-mini",
|
157
|
+
messages=[
|
158
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
159
|
+
{
|
160
|
+
"role": "user",
|
161
|
+
"content": f"What is the capital of {country}? Just name the "
|
162
|
+
"city and nothing else",
|
163
|
+
},
|
164
|
+
],
|
165
|
+
)
|
166
|
+
return response.choices[0].message.content.strip()
|
167
|
+
|
168
|
+
|
169
|
+
# Evaluation data
|
170
|
+
data = [
|
171
|
+
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
172
|
+
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
173
|
+
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
174
|
+
]
|
175
|
+
|
176
|
+
|
177
|
+
def evaluator_A(output, target):
|
178
|
+
return 1 if output == target["capital"] else 0
|
179
|
+
|
180
|
+
|
181
|
+
# Create an Evaluation instance
|
182
|
+
e = Evaluation(
|
183
|
+
name="py-evaluation-async",
|
184
|
+
data=data,
|
185
|
+
executor=get_capital,
|
186
|
+
evaluators=[evaluator_A],
|
187
|
+
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
188
|
+
)
|
189
|
+
|
190
|
+
# Run the evaluation
|
191
|
+
asyncio.run(e.run())
|
192
|
+
```
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.
|
3
|
+
version = "0.4.1"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.
|
14
|
+
version = "0.4.1"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -22,9 +22,9 @@ python = "^3.9"
|
|
22
22
|
pydantic = "^2.7.4"
|
23
23
|
requests = "^2.32.3"
|
24
24
|
python-dotenv = "^1.0.1"
|
25
|
-
|
26
|
-
openai = "^1.41.1"
|
27
25
|
backoff = "^2.2.1"
|
26
|
+
traceloop-sdk = "^0.29.2"
|
27
|
+
asyncio = "^3.4.3"
|
28
28
|
|
29
29
|
[tool.poetry.group.dev.dependencies]
|
30
30
|
black = "^24.8.0"
|
@@ -0,0 +1,72 @@
|
|
1
|
+
from traceloop.sdk.decorators.base import (
|
2
|
+
entity_method,
|
3
|
+
aentity_method,
|
4
|
+
)
|
5
|
+
from opentelemetry.trace import INVALID_SPAN, get_current_span
|
6
|
+
from traceloop.sdk import Traceloop
|
7
|
+
|
8
|
+
from typing import Callable, Optional, ParamSpec, TypeVar, cast
|
9
|
+
|
10
|
+
from .laminar import Laminar as L
|
11
|
+
from .utils import is_async
|
12
|
+
|
13
|
+
P = ParamSpec("P")
|
14
|
+
R = TypeVar("R")
|
15
|
+
|
16
|
+
|
17
|
+
def observe(
|
18
|
+
*,
|
19
|
+
name: Optional[str] = None,
|
20
|
+
user_id: Optional[str] = None,
|
21
|
+
session_id: Optional[str] = None,
|
22
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
23
|
+
"""The main decorator entrypoint for Laminar. This is used to wrap
|
24
|
+
functions and methods to create spans.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
name (Optional[str], optional): Name of the span. Function
|
28
|
+
name is used if not specified.
|
29
|
+
Defaults to None.
|
30
|
+
user_id (Optional[str], optional): User ID to associate
|
31
|
+
with the span and the following context.
|
32
|
+
Defaults to None.
|
33
|
+
session_id (Optional[str], optional): Session ID to associate with the
|
34
|
+
span and the following context. Defaults to None.
|
35
|
+
|
36
|
+
Raises:
|
37
|
+
Exception: re-raises the exception if the wrapped function raises
|
38
|
+
an exception
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
R: Returns the result of the wrapped function
|
42
|
+
"""
|
43
|
+
|
44
|
+
def decorator(func: Callable[P, R]) -> Callable[P, R]:
|
45
|
+
if not L.is_initialized():
|
46
|
+
raise Exception(
|
47
|
+
"Laminar is not initialized. Please "
|
48
|
+
+ "call Laminar.initialize() first."
|
49
|
+
)
|
50
|
+
current_span = get_current_span()
|
51
|
+
if current_span != INVALID_SPAN:
|
52
|
+
if session_id is not None:
|
53
|
+
current_span.set_attribute(
|
54
|
+
"traceloop.association.properties.session_id", session_id
|
55
|
+
)
|
56
|
+
if user_id is not None:
|
57
|
+
current_span.set_attribute(
|
58
|
+
"traceloop.association.properties.user_id", user_id
|
59
|
+
)
|
60
|
+
association_properties = {}
|
61
|
+
if session_id is not None:
|
62
|
+
association_properties["session_id"] = session_id
|
63
|
+
if user_id is not None:
|
64
|
+
association_properties["user_id"] = user_id
|
65
|
+
Traceloop.set_association_properties(association_properties)
|
66
|
+
return (
|
67
|
+
aentity_method(name=name)(func)
|
68
|
+
if is_async(func)
|
69
|
+
else entity_method(name=name)(func)
|
70
|
+
)
|
71
|
+
|
72
|
+
return cast(Callable[P, R], decorator)
|
@@ -0,0 +1,163 @@
|
|
1
|
+
from typing import Union
|
2
|
+
|
3
|
+
from .utils import is_async
|
4
|
+
from .types import EvaluatorFunction, ExecutorFunction, EvaluationDatapoint, Numeric
|
5
|
+
from .laminar import Laminar as L
|
6
|
+
import asyncio
|
7
|
+
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
|
10
|
+
DEFAULT_BATCH_SIZE = 5
|
11
|
+
|
12
|
+
|
13
|
+
class EvaluationDataset(ABC):
|
14
|
+
@abstractmethod
|
15
|
+
def __init__(self, *args, **kwargs):
|
16
|
+
pass
|
17
|
+
|
18
|
+
@abstractmethod
|
19
|
+
def __len__(self) -> int:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def __getitem__(self, idx) -> EvaluationDatapoint:
|
24
|
+
pass
|
25
|
+
|
26
|
+
def slice(self, start: int, end: int):
|
27
|
+
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
28
|
+
|
29
|
+
|
30
|
+
class Evaluation:
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
name,
|
34
|
+
data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
|
35
|
+
executor: ExecutorFunction,
|
36
|
+
evaluators: list[EvaluatorFunction],
|
37
|
+
batch_size: int = DEFAULT_BATCH_SIZE,
|
38
|
+
project_api_key: str = "",
|
39
|
+
base_url: str = "https://api.lmnr.ai",
|
40
|
+
):
|
41
|
+
"""
|
42
|
+
Initializes an instance of the Evaluations class.
|
43
|
+
Parameters:
|
44
|
+
name (str): The name of the evaluation.
|
45
|
+
data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
|
46
|
+
`data` is the input to the executor function,
|
47
|
+
`target` is the input to the evaluator function.
|
48
|
+
executor (Callable[..., Any]): The executor function.
|
49
|
+
Takes the data point + any additional arguments
|
50
|
+
and returns the output to evaluate.
|
51
|
+
evaluators (List[Callable[..., Any]]): List of evaluator functions.
|
52
|
+
Each evaluator function takes the output of the executor _and_
|
53
|
+
the target data, and returns a score. The score can be a
|
54
|
+
single number or a record of string keys and number values.
|
55
|
+
If the score is a single number, it will be named after the
|
56
|
+
evaluator function. If the function is anonymous, it will be
|
57
|
+
named `evaluator_${index}`, where index is the index of the
|
58
|
+
evaluator function in the list starting from 1.
|
59
|
+
batch_size (int, optional): The batch size for evaluation.
|
60
|
+
Defaults to DEFAULT_BATCH_SIZE.
|
61
|
+
project_api_key (str, optional): The project API key.
|
62
|
+
Defaults to an empty string.
|
63
|
+
base_url (str, optional): The base URL for the LMNR API.
|
64
|
+
Useful if self-hosted elsewhere.
|
65
|
+
Defaults to "https://api.lmnr.ai".
|
66
|
+
"""
|
67
|
+
|
68
|
+
self.name = name
|
69
|
+
self.executor = executor
|
70
|
+
self.evaluators = dict(
|
71
|
+
zip(
|
72
|
+
[
|
73
|
+
(
|
74
|
+
e.__name__
|
75
|
+
if e.__name__ and e.__name__ != "<lambda>"
|
76
|
+
else f"evaluator_{i+1}"
|
77
|
+
)
|
78
|
+
for i, e in enumerate(evaluators)
|
79
|
+
],
|
80
|
+
evaluators,
|
81
|
+
)
|
82
|
+
)
|
83
|
+
self.evaluator_names = list(self.evaluators.keys())
|
84
|
+
if isinstance(data, list):
|
85
|
+
self.data = [
|
86
|
+
(
|
87
|
+
EvaluationDatapoint.model_validate(point)
|
88
|
+
if isinstance(point, dict)
|
89
|
+
else point
|
90
|
+
)
|
91
|
+
for point in data
|
92
|
+
]
|
93
|
+
else:
|
94
|
+
self.data = data
|
95
|
+
self.batch_size = batch_size
|
96
|
+
L.initialize(project_api_key=project_api_key, base_url=base_url)
|
97
|
+
|
98
|
+
async def run(self):
|
99
|
+
"""Runs the evaluation.
|
100
|
+
|
101
|
+
Creates a new evaluation if no evaluation with such name exists, or
|
102
|
+
adds data to an existing one otherwise. Evaluates data points in
|
103
|
+
batches of `self.batch_size`. The executor
|
104
|
+
function is called on each data point to get the output,
|
105
|
+
and then evaluate it by each evaluator function.
|
106
|
+
"""
|
107
|
+
response = L.create_evaluation(self.name)
|
108
|
+
batch_promises = []
|
109
|
+
|
110
|
+
for i in range(0, len(self.data), self.batch_size):
|
111
|
+
batch = (
|
112
|
+
self.data[i : i + self.batch_size]
|
113
|
+
if isinstance(self.data, list)
|
114
|
+
else self.data.slice(i, i + self.batch_size)
|
115
|
+
)
|
116
|
+
batch_promises.append(self._evaluate_batch(batch))
|
117
|
+
|
118
|
+
try:
|
119
|
+
await asyncio.gather(*batch_promises)
|
120
|
+
L.update_evaluation_status(response.name, "Finished")
|
121
|
+
print(f"Evaluation {response.id} complete")
|
122
|
+
except Exception as e:
|
123
|
+
print(f"Error evaluating batch: {e}")
|
124
|
+
|
125
|
+
async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
|
126
|
+
results = []
|
127
|
+
for datapoint in batch:
|
128
|
+
output = (
|
129
|
+
await self.executor(datapoint.data)
|
130
|
+
if is_async(self.executor)
|
131
|
+
else self.executor(datapoint.data)
|
132
|
+
)
|
133
|
+
target = datapoint.target
|
134
|
+
|
135
|
+
# iterate in order of evaluators
|
136
|
+
scores = {}
|
137
|
+
for evaluator_name in self.evaluator_names:
|
138
|
+
evaluator = self.evaluators[evaluator_name]
|
139
|
+
value = (
|
140
|
+
await evaluator(output, target)
|
141
|
+
if is_async(evaluator)
|
142
|
+
else evaluator(output, target)
|
143
|
+
)
|
144
|
+
|
145
|
+
# if the evaluator returns a single number,
|
146
|
+
# use the evaluator name as the key
|
147
|
+
if isinstance(value, Numeric):
|
148
|
+
scores[evaluator_name] = value
|
149
|
+
else:
|
150
|
+
# if the evaluator returns an object,
|
151
|
+
# use the object keys as the keys
|
152
|
+
scores.update(value)
|
153
|
+
|
154
|
+
results.append(
|
155
|
+
{
|
156
|
+
"executorOutput": output,
|
157
|
+
"data": datapoint.data,
|
158
|
+
"target": target,
|
159
|
+
"scores": scores,
|
160
|
+
}
|
161
|
+
)
|
162
|
+
|
163
|
+
return L.post_evaluation_results(self.name, results)
|