lmnr 0.4.12b4__tar.gz → 0.4.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.12b4 → lmnr-0.4.14}/PKG-INFO +73 -105
- {lmnr-0.4.12b4 → lmnr-0.4.14}/README.md +72 -105
- {lmnr-0.4.12b4 → lmnr-0.4.14}/pyproject.toml +2 -2
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/evaluations.py +56 -49
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/laminar.py +20 -96
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/types.py +2 -9
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/decorators/base.py +14 -4
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/attributes.py +1 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/tracing.py +15 -1
- {lmnr-0.4.12b4 → lmnr-0.4.14}/LICENSE +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/utils.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.14
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -59,63 +59,37 @@ Description-Content-Type: text/markdown
|
|
59
59
|
|
60
60
|
# Laminar Python
|
61
61
|
|
62
|
-
|
62
|
+
Python SDK for [Laminar](https://www.lmnr.ai).
|
63
|
+
|
64
|
+
[Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
|
65
|
+
|
66
|
+
Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
|
63
67
|
|
64
68
|
<a href="https://pypi.org/project/lmnr/">  </a>
|
65
69
|

|
66
70
|

|
67
71
|
|
68
72
|
|
69
|
-
|
70
73
|
## Quickstart
|
71
74
|
|
72
75
|
First, install the package:
|
73
76
|
|
74
77
|
```sh
|
75
|
-
python3 -m venv .myenv
|
76
|
-
source .myenv/bin/activate # or use your favorite env management tool
|
77
|
-
|
78
78
|
pip install lmnr
|
79
79
|
```
|
80
80
|
|
81
|
-
|
81
|
+
And then in the code
|
82
82
|
|
83
83
|
```python
|
84
|
-
import os
|
85
|
-
from openai import OpenAI
|
86
84
|
from lmnr import Laminar as L
|
87
85
|
|
88
|
-
L.initialize(
|
89
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
90
|
-
)
|
91
|
-
|
92
|
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
93
|
-
|
94
|
-
def poem_writer(topic: str):
|
95
|
-
prompt = f"write a poem about {topic}"
|
96
|
-
|
97
|
-
# OpenAI calls are automatically instrumented
|
98
|
-
response = client.chat.completions.create(
|
99
|
-
model="gpt-4o",
|
100
|
-
messages=[
|
101
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
102
|
-
{"role": "user", "content": prompt},
|
103
|
-
],
|
104
|
-
)
|
105
|
-
poem = response.choices[0].message.content
|
106
|
-
return poem
|
107
|
-
|
108
|
-
if __name__ == "__main__":
|
109
|
-
print(poem_writer("laminar flow"))
|
110
|
-
|
86
|
+
L.initialize(project_api_key="<PROJECT_API_KEY>")
|
111
87
|
```
|
112
88
|
|
113
|
-
|
114
|
-
|
115
|
-
### Project API key
|
89
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
90
|
+
calls with OpenTelemetry-compatible instrumentation.
|
116
91
|
|
117
|
-
|
118
|
-
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
92
|
+
Note that you need to only initialize Laminar once in your application.
|
119
93
|
|
120
94
|
## Instrumentation
|
121
95
|
|
@@ -224,6 +198,68 @@ L.event("topic alignment", topic in poem)
|
|
224
198
|
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
225
199
|
```
|
226
200
|
|
201
|
+
## Evaluations
|
202
|
+
|
203
|
+
### Quickstart
|
204
|
+
|
205
|
+
Install the package:
|
206
|
+
|
207
|
+
```sh
|
208
|
+
pip install lmnr
|
209
|
+
```
|
210
|
+
|
211
|
+
Create a file named `my_first_eval.py` with the following code:
|
212
|
+
|
213
|
+
```python
|
214
|
+
from lmnr import evaluate
|
215
|
+
|
216
|
+
def write_poem(data):
|
217
|
+
return f"This is a good poem about {data['topic']}"
|
218
|
+
|
219
|
+
def contains_poem(output, target):
|
220
|
+
return 1 if output in target['poem'] else 0
|
221
|
+
|
222
|
+
# Evaluation data
|
223
|
+
data = [
|
224
|
+
{"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
|
225
|
+
{"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
|
226
|
+
]
|
227
|
+
|
228
|
+
evaluate(
|
229
|
+
data=data,
|
230
|
+
executor=write_poem,
|
231
|
+
evaluators={
|
232
|
+
"containsPoem": contains_poem
|
233
|
+
},
|
234
|
+
group_id="my_first_feature"
|
235
|
+
)
|
236
|
+
```
|
237
|
+
|
238
|
+
Run the following commands:
|
239
|
+
|
240
|
+
```sh
|
241
|
+
export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
|
242
|
+
lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
|
243
|
+
```
|
244
|
+
|
245
|
+
Visit the URL printed in the console to see the results.
|
246
|
+
|
247
|
+
### Overview
|
248
|
+
|
249
|
+
Bring rigor to the development of your LLM applications with evaluations.
|
250
|
+
|
251
|
+
You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
|
252
|
+
|
253
|
+
`evaluate` takes in the following parameters:
|
254
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
255
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
256
|
+
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
257
|
+
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
258
|
+
|
259
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
260
|
+
|
261
|
+
[Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
|
262
|
+
|
227
263
|
## Laminar pipelines as prompt chain managers
|
228
264
|
|
229
265
|
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
@@ -258,71 +294,3 @@ PipelineRunResponse(
|
|
258
294
|
)
|
259
295
|
```
|
260
296
|
|
261
|
-
## Running offline evaluations on your data
|
262
|
-
|
263
|
-
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
264
|
-
|
265
|
-
Evaluation takes in the following parameters:
|
266
|
-
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
267
|
-
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
268
|
-
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
269
|
-
- `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
270
|
-
|
271
|
-
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
272
|
-
|
273
|
-
### Example code
|
274
|
-
|
275
|
-
```python
|
276
|
-
from lmnr import evaluate
|
277
|
-
from openai import AsyncOpenAI
|
278
|
-
import asyncio
|
279
|
-
import os
|
280
|
-
|
281
|
-
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
282
|
-
|
283
|
-
async def get_capital(data):
|
284
|
-
country = data["country"]
|
285
|
-
response = await openai_client.chat.completions.create(
|
286
|
-
model="gpt-4o-mini",
|
287
|
-
messages=[
|
288
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
289
|
-
{
|
290
|
-
"role": "user",
|
291
|
-
"content": f"What is the capital of {country}? Just name the "
|
292
|
-
"city and nothing else",
|
293
|
-
},
|
294
|
-
],
|
295
|
-
)
|
296
|
-
return response.choices[0].message.content.strip()
|
297
|
-
|
298
|
-
|
299
|
-
# Evaluation data
|
300
|
-
data = [
|
301
|
-
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
302
|
-
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
303
|
-
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
304
|
-
]
|
305
|
-
|
306
|
-
|
307
|
-
def correctness(output, target):
|
308
|
-
return 1 if output == target["capital"] else 0
|
309
|
-
|
310
|
-
|
311
|
-
# Create an Evaluation instance
|
312
|
-
e = evaluate(
|
313
|
-
name="my-evaluation",
|
314
|
-
data=data,
|
315
|
-
executor=get_capital,
|
316
|
-
evaluators={"correctness": correctness},
|
317
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
318
|
-
)
|
319
|
-
```
|
320
|
-
|
321
|
-
### Running from CLI.
|
322
|
-
|
323
|
-
1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
|
324
|
-
1. Run `lmnr path/to/my/eval.py`
|
325
|
-
|
326
|
-
### Running from code
|
327
|
-
|
328
|
-
Simply execute the function, e.g. `python3 path/to/my/eval.py`
|
@@ -1,62 +1,36 @@
|
|
1
1
|
# Laminar Python
|
2
2
|
|
3
|
-
|
3
|
+
Python SDK for [Laminar](https://www.lmnr.ai).
|
4
|
+
|
5
|
+
[Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
|
6
|
+
|
7
|
+
Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
|
4
8
|
|
5
9
|
<a href="https://pypi.org/project/lmnr/">  </a>
|
6
10
|

|
7
11
|

|
8
12
|
|
9
13
|
|
10
|
-
|
11
14
|
## Quickstart
|
12
15
|
|
13
16
|
First, install the package:
|
14
17
|
|
15
18
|
```sh
|
16
|
-
python3 -m venv .myenv
|
17
|
-
source .myenv/bin/activate # or use your favorite env management tool
|
18
|
-
|
19
19
|
pip install lmnr
|
20
20
|
```
|
21
21
|
|
22
|
-
|
22
|
+
And then in the code
|
23
23
|
|
24
24
|
```python
|
25
|
-
import os
|
26
|
-
from openai import OpenAI
|
27
25
|
from lmnr import Laminar as L
|
28
26
|
|
29
|
-
L.initialize(
|
30
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
31
|
-
)
|
32
|
-
|
33
|
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
34
|
-
|
35
|
-
def poem_writer(topic: str):
|
36
|
-
prompt = f"write a poem about {topic}"
|
37
|
-
|
38
|
-
# OpenAI calls are automatically instrumented
|
39
|
-
response = client.chat.completions.create(
|
40
|
-
model="gpt-4o",
|
41
|
-
messages=[
|
42
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
43
|
-
{"role": "user", "content": prompt},
|
44
|
-
],
|
45
|
-
)
|
46
|
-
poem = response.choices[0].message.content
|
47
|
-
return poem
|
48
|
-
|
49
|
-
if __name__ == "__main__":
|
50
|
-
print(poem_writer("laminar flow"))
|
51
|
-
|
27
|
+
L.initialize(project_api_key="<PROJECT_API_KEY>")
|
52
28
|
```
|
53
29
|
|
54
|
-
|
55
|
-
|
56
|
-
### Project API key
|
30
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
31
|
+
calls with OpenTelemetry-compatible instrumentation.
|
57
32
|
|
58
|
-
|
59
|
-
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
33
|
+
Note that you need to only initialize Laminar once in your application.
|
60
34
|
|
61
35
|
## Instrumentation
|
62
36
|
|
@@ -165,6 +139,68 @@ L.event("topic alignment", topic in poem)
|
|
165
139
|
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
166
140
|
```
|
167
141
|
|
142
|
+
## Evaluations
|
143
|
+
|
144
|
+
### Quickstart
|
145
|
+
|
146
|
+
Install the package:
|
147
|
+
|
148
|
+
```sh
|
149
|
+
pip install lmnr
|
150
|
+
```
|
151
|
+
|
152
|
+
Create a file named `my_first_eval.py` with the following code:
|
153
|
+
|
154
|
+
```python
|
155
|
+
from lmnr import evaluate
|
156
|
+
|
157
|
+
def write_poem(data):
|
158
|
+
return f"This is a good poem about {data['topic']}"
|
159
|
+
|
160
|
+
def contains_poem(output, target):
|
161
|
+
return 1 if output in target['poem'] else 0
|
162
|
+
|
163
|
+
# Evaluation data
|
164
|
+
data = [
|
165
|
+
{"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
|
166
|
+
{"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
|
167
|
+
]
|
168
|
+
|
169
|
+
evaluate(
|
170
|
+
data=data,
|
171
|
+
executor=write_poem,
|
172
|
+
evaluators={
|
173
|
+
"containsPoem": contains_poem
|
174
|
+
},
|
175
|
+
group_id="my_first_feature"
|
176
|
+
)
|
177
|
+
```
|
178
|
+
|
179
|
+
Run the following commands:
|
180
|
+
|
181
|
+
```sh
|
182
|
+
export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
|
183
|
+
lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
|
184
|
+
```
|
185
|
+
|
186
|
+
Visit the URL printed in the console to see the results.
|
187
|
+
|
188
|
+
### Overview
|
189
|
+
|
190
|
+
Bring rigor to the development of your LLM applications with evaluations.
|
191
|
+
|
192
|
+
You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
|
193
|
+
|
194
|
+
`evaluate` takes in the following parameters:
|
195
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
196
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
197
|
+
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
198
|
+
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
199
|
+
|
200
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
201
|
+
|
202
|
+
[Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
|
203
|
+
|
168
204
|
## Laminar pipelines as prompt chain managers
|
169
205
|
|
170
206
|
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
@@ -198,72 +234,3 @@ PipelineRunResponse(
|
|
198
234
|
run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
|
199
235
|
)
|
200
236
|
```
|
201
|
-
|
202
|
-
## Running offline evaluations on your data
|
203
|
-
|
204
|
-
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
205
|
-
|
206
|
-
Evaluation takes in the following parameters:
|
207
|
-
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
208
|
-
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
209
|
-
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
210
|
-
- `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
211
|
-
|
212
|
-
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
213
|
-
|
214
|
-
### Example code
|
215
|
-
|
216
|
-
```python
|
217
|
-
from lmnr import evaluate
|
218
|
-
from openai import AsyncOpenAI
|
219
|
-
import asyncio
|
220
|
-
import os
|
221
|
-
|
222
|
-
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
223
|
-
|
224
|
-
async def get_capital(data):
|
225
|
-
country = data["country"]
|
226
|
-
response = await openai_client.chat.completions.create(
|
227
|
-
model="gpt-4o-mini",
|
228
|
-
messages=[
|
229
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
230
|
-
{
|
231
|
-
"role": "user",
|
232
|
-
"content": f"What is the capital of {country}? Just name the "
|
233
|
-
"city and nothing else",
|
234
|
-
},
|
235
|
-
],
|
236
|
-
)
|
237
|
-
return response.choices[0].message.content.strip()
|
238
|
-
|
239
|
-
|
240
|
-
# Evaluation data
|
241
|
-
data = [
|
242
|
-
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
243
|
-
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
244
|
-
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
245
|
-
]
|
246
|
-
|
247
|
-
|
248
|
-
def correctness(output, target):
|
249
|
-
return 1 if output == target["capital"] else 0
|
250
|
-
|
251
|
-
|
252
|
-
# Create an Evaluation instance
|
253
|
-
e = evaluate(
|
254
|
-
name="my-evaluation",
|
255
|
-
data=data,
|
256
|
-
executor=get_capital,
|
257
|
-
evaluators={"correctness": correctness},
|
258
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
259
|
-
)
|
260
|
-
```
|
261
|
-
|
262
|
-
### Running from CLI.
|
263
|
-
|
264
|
-
1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
|
265
|
-
1. Run `lmnr path/to/my/eval.py`
|
266
|
-
|
267
|
-
### Running from code
|
268
|
-
|
269
|
-
Simply execute the function, e.g. `python3 path/to/my/eval.py`
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.14"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.14"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import re
|
2
3
|
import sys
|
3
4
|
from abc import ABC, abstractmethod
|
4
5
|
from contextlib import contextmanager
|
@@ -12,7 +13,6 @@ from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
|
|
12
13
|
|
13
14
|
from .laminar import Laminar as L
|
14
15
|
from .types import (
|
15
|
-
CreateEvaluationResponse,
|
16
16
|
Datapoint,
|
17
17
|
EvaluationResultDatapoint,
|
18
18
|
EvaluatorFunction,
|
@@ -46,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
|
|
46
46
|
return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
|
47
47
|
|
48
48
|
|
49
|
+
def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
|
50
|
+
per_score_values = {}
|
51
|
+
for result in results:
|
52
|
+
for key, value in result.scores.items():
|
53
|
+
if key not in per_score_values:
|
54
|
+
per_score_values[key] = []
|
55
|
+
per_score_values[key].append(value)
|
56
|
+
|
57
|
+
average_scores = {}
|
58
|
+
for key, values in per_score_values.items():
|
59
|
+
average_scores[key] = sum(values) / len(values)
|
60
|
+
|
61
|
+
return average_scores
|
62
|
+
|
63
|
+
|
49
64
|
class EvaluationReporter:
|
50
65
|
def __init__(self):
|
51
66
|
pass
|
52
67
|
|
53
|
-
def start(self,
|
54
|
-
print(f"Running evaluation {name}...\n")
|
55
|
-
print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
|
68
|
+
def start(self, length: int):
|
56
69
|
self.cli_progress = tqdm(
|
57
70
|
total=length,
|
58
71
|
bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
|
@@ -66,9 +79,10 @@ class EvaluationReporter:
|
|
66
79
|
self.cli_progress.close()
|
67
80
|
sys.stderr.write(f"\nError: {error}\n")
|
68
81
|
|
69
|
-
def stop(self, average_scores: dict[str, Numeric]):
|
82
|
+
def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
|
70
83
|
self.cli_progress.close()
|
71
|
-
print("\
|
84
|
+
print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
|
85
|
+
print("Average scores:")
|
72
86
|
for name, score in average_scores.items():
|
73
87
|
print(f"{name}: {score}")
|
74
88
|
print("\n")
|
@@ -97,6 +111,7 @@ class Evaluation:
|
|
97
111
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
98
112
|
executor: Any,
|
99
113
|
evaluators: dict[str, EvaluatorFunction],
|
114
|
+
group_id: Optional[str] = None,
|
100
115
|
name: Optional[str] = None,
|
101
116
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
102
117
|
project_api_key: Optional[str] = None,
|
@@ -123,6 +138,8 @@ class Evaluation:
|
|
123
138
|
evaluator function. If the function is anonymous, it will be
|
124
139
|
named `evaluator_${index}`, where index is the index of the
|
125
140
|
evaluator function in the list starting from 1.
|
141
|
+
group_id (Optional[str], optional): Group id of the evaluation.
|
142
|
+
Defaults to "default".
|
126
143
|
name (Optional[str], optional): The name of the evaluation.
|
127
144
|
It will be auto-generated if not provided.
|
128
145
|
batch_size (int, optional): The batch size for evaluation.
|
@@ -138,11 +155,16 @@ class Evaluation:
|
|
138
155
|
Defaults to None. If None, all available instruments will be used.
|
139
156
|
"""
|
140
157
|
|
158
|
+
if not evaluators:
|
159
|
+
raise ValueError("No evaluators provided")
|
160
|
+
|
161
|
+
# TODO: Compile regex once and then reuse it
|
162
|
+
for evaluator_name in evaluators:
|
163
|
+
if not re.match(r'^[\w\s-]+$', evaluator_name):
|
164
|
+
raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
|
165
|
+
|
141
166
|
self.is_finished = False
|
142
|
-
self.name = name
|
143
167
|
self.reporter = EvaluationReporter()
|
144
|
-
self.executor = executor
|
145
|
-
self.evaluators = evaluators
|
146
168
|
if isinstance(data, list):
|
147
169
|
self.data = [
|
148
170
|
(Datapoint.model_validate(point) if isinstance(point, dict) else point)
|
@@ -150,6 +172,10 @@ class Evaluation:
|
|
150
172
|
]
|
151
173
|
else:
|
152
174
|
self.data = data
|
175
|
+
self.executor = executor
|
176
|
+
self.evaluators = evaluators
|
177
|
+
self.group_id = group_id
|
178
|
+
self.name = name
|
153
179
|
self.batch_size = batch_size
|
154
180
|
L.initialize(
|
155
181
|
project_api_key=project_api_key,
|
@@ -160,23 +186,6 @@ class Evaluation:
|
|
160
186
|
)
|
161
187
|
|
162
188
|
def run(self) -> Union[None, Awaitable[None]]:
|
163
|
-
"""Runs the evaluation.
|
164
|
-
|
165
|
-
Creates a new evaluation if no evaluation with such name exists, or
|
166
|
-
adds data to an existing one otherwise. Evaluates data points in
|
167
|
-
batches of `self.batch_size`. The executor
|
168
|
-
function is called on each data point to get the output,
|
169
|
-
and then evaluate it by each evaluator function.
|
170
|
-
|
171
|
-
Usage:
|
172
|
-
```python
|
173
|
-
# in a synchronous context:
|
174
|
-
e.run()
|
175
|
-
# in an asynchronous context:
|
176
|
-
await e.run()
|
177
|
-
```
|
178
|
-
|
179
|
-
"""
|
180
189
|
if self.is_finished:
|
181
190
|
raise Exception("Evaluation is already finished")
|
182
191
|
|
@@ -187,41 +196,34 @@ class Evaluation:
|
|
187
196
|
return loop.run_until_complete(self._run())
|
188
197
|
|
189
198
|
async def _run(self) -> None:
|
190
|
-
evaluation = L.create_evaluation(self.name)
|
191
199
|
self.reporter.start(
|
192
|
-
evaluation.name,
|
193
|
-
evaluation.projectId,
|
194
|
-
evaluation.id,
|
195
200
|
len(self.data),
|
196
201
|
)
|
197
202
|
|
198
203
|
try:
|
199
|
-
await self.evaluate_in_batches(
|
204
|
+
result_datapoints = await self.evaluate_in_batches()
|
200
205
|
except Exception as e:
|
201
|
-
L.update_evaluation_status(evaluation.id, "Error")
|
202
206
|
self.reporter.stopWithError(e)
|
203
207
|
self.is_finished = True
|
204
208
|
return
|
209
|
+
else:
|
210
|
+
evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
|
211
|
+
average_scores = get_average_scores(result_datapoints)
|
212
|
+
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
213
|
+
self.is_finished = True
|
205
214
|
|
206
|
-
|
207
|
-
|
208
|
-
self.reporter.stop(updated_evaluation.averageScores)
|
209
|
-
self.is_finished = True
|
210
|
-
|
211
|
-
async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
|
215
|
+
async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
216
|
+
result_datapoints = []
|
212
217
|
for i in range(0, len(self.data), self.batch_size):
|
213
218
|
batch = (
|
214
|
-
self.data[i
|
219
|
+
self.data[i: i + self.batch_size]
|
215
220
|
if isinstance(self.data, list)
|
216
221
|
else self.data.slice(i, i + self.batch_size)
|
217
222
|
)
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
print(f"Error evaluating batch: {e}")
|
223
|
-
finally:
|
224
|
-
self.reporter.update(len(batch))
|
223
|
+
batch_datapoints = await self._evaluate_batch(batch)
|
224
|
+
result_datapoints.extend(batch_datapoints)
|
225
|
+
self.reporter.update(len(batch))
|
226
|
+
return result_datapoints
|
225
227
|
|
226
228
|
async def _evaluate_batch(
|
227
229
|
self, batch: list[Datapoint]
|
@@ -252,7 +254,7 @@ class Evaluation:
|
|
252
254
|
scores: dict[str, Numeric] = {}
|
253
255
|
for evaluator_name, evaluator in self.evaluators.items():
|
254
256
|
with L.start_as_current_span(
|
255
|
-
|
257
|
+
evaluator_name, input={"output": output, "target": target}
|
256
258
|
) as evaluator_span:
|
257
259
|
evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
|
258
260
|
value = (
|
@@ -282,6 +284,7 @@ def evaluate(
|
|
282
284
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
283
285
|
executor: ExecutorFunction,
|
284
286
|
evaluators: dict[str, EvaluatorFunction],
|
287
|
+
group_id: Optional[str] = None,
|
285
288
|
name: Optional[str] = None,
|
286
289
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
287
290
|
project_api_key: Optional[str] = None,
|
@@ -310,8 +313,11 @@ def evaluate(
|
|
310
313
|
evaluator function. If the function is anonymous, it will be
|
311
314
|
named `evaluator_${index}`, where index is the index of the
|
312
315
|
evaluator function in the list starting from 1.
|
313
|
-
|
314
|
-
|
316
|
+
group_id (Optional[str], optional): Group name which is same
|
317
|
+
as the feature you are evaluating in your project or application.
|
318
|
+
Defaults to "default".
|
319
|
+
name (Optional[str], optional): Optional name of the evaluation. Used to easily
|
320
|
+
identify the evaluation in the group.
|
315
321
|
batch_size (int, optional): The batch size for evaluation.
|
316
322
|
Defaults to DEFAULT_BATCH_SIZE.
|
317
323
|
project_api_key (Optional[str], optional): The project API key.
|
@@ -331,6 +337,7 @@ def evaluate(
|
|
331
337
|
data=data,
|
332
338
|
executor=executor,
|
333
339
|
evaluators=evaluators,
|
340
|
+
group_id=group_id,
|
334
341
|
name=name,
|
335
342
|
batch_size=batch_size,
|
336
343
|
project_api_key=project_api_key,
|
@@ -3,11 +3,9 @@ from opentelemetry import context
|
|
3
3
|
from opentelemetry.trace import (
|
4
4
|
INVALID_SPAN,
|
5
5
|
get_current_span,
|
6
|
-
SpanKind,
|
7
6
|
)
|
8
7
|
from opentelemetry.util.types import AttributeValue
|
9
|
-
from opentelemetry.context
|
10
|
-
from opentelemetry.util import types
|
8
|
+
from opentelemetry.context import set_value, attach, detach
|
11
9
|
from lmnr.traceloop_sdk import Traceloop
|
12
10
|
from lmnr.traceloop_sdk.tracing import get_tracer
|
13
11
|
from contextlib import contextmanager
|
@@ -29,10 +27,12 @@ from lmnr.traceloop_sdk.tracing.attributes import (
|
|
29
27
|
SESSION_ID,
|
30
28
|
SPAN_INPUT,
|
31
29
|
SPAN_OUTPUT,
|
30
|
+
SPAN_PATH,
|
32
31
|
TRACE_TYPE,
|
33
32
|
USER_ID,
|
34
33
|
)
|
35
34
|
from lmnr.traceloop_sdk.tracing.tracing import (
|
35
|
+
get_span_path,
|
36
36
|
set_association_properties,
|
37
37
|
update_association_properties,
|
38
38
|
)
|
@@ -47,7 +47,6 @@ from .types import (
|
|
47
47
|
NodeInput,
|
48
48
|
PipelineRunRequest,
|
49
49
|
TraceType,
|
50
|
-
UpdateEvaluationResponse,
|
51
50
|
)
|
52
51
|
|
53
52
|
|
@@ -315,14 +314,6 @@ class Laminar:
|
|
315
314
|
cls,
|
316
315
|
name: str,
|
317
316
|
input: Any = None,
|
318
|
-
context: Optional[Context] = None,
|
319
|
-
kind: SpanKind = SpanKind.INTERNAL,
|
320
|
-
attributes: types.Attributes = None,
|
321
|
-
links=None,
|
322
|
-
start_time: Optional[int] = None,
|
323
|
-
record_exception: bool = True,
|
324
|
-
set_status_on_exception: bool = True,
|
325
|
-
end_on_exit: bool = True,
|
326
317
|
):
|
327
318
|
"""Start a new span as the current span. Useful for manual instrumentation.
|
328
319
|
This is the preferred and more stable way to use manual instrumentation.
|
@@ -337,32 +328,15 @@ class Laminar:
|
|
337
328
|
name (str): name of the span
|
338
329
|
input (Any, optional): input to the span. Will be sent as an
|
339
330
|
attribute, so must be json serializable. Defaults to None.
|
340
|
-
context (Optional[Context], optional): context to start the span in.
|
341
|
-
Defaults to None.
|
342
|
-
kind (SpanKind, optional): kind of the span. Defaults to SpanKind.INTERNAL.
|
343
|
-
attributes (types.Attributes, optional): attributes to set on the span.
|
344
|
-
Defaults to None.
|
345
|
-
links ([type], optional): links to set on the span. Defaults to None.
|
346
|
-
start_time (Optional[int], optional): start time of the span.
|
347
|
-
Defaults to None.
|
348
|
-
record_exception (bool, optional): whether to record exceptions.
|
349
|
-
Defaults to True.
|
350
|
-
set_status_on_exception (bool, optional): whether to set status on exception.
|
351
|
-
Defaults to True.
|
352
|
-
end_on_exit (bool, optional): whether to end the span on exit.
|
353
|
-
Defaults to True.
|
354
331
|
"""
|
355
332
|
with get_tracer() as tracer:
|
333
|
+
span_path = get_span_path(name)
|
334
|
+
ctx = set_value("span_path", span_path)
|
335
|
+
ctx_token = attach(set_value("span_path", span_path))
|
356
336
|
with tracer.start_as_current_span(
|
357
337
|
name,
|
358
|
-
context=
|
359
|
-
|
360
|
-
attributes=attributes,
|
361
|
-
links=links,
|
362
|
-
start_time=start_time,
|
363
|
-
record_exception=record_exception,
|
364
|
-
set_status_on_exception=set_status_on_exception,
|
365
|
-
end_on_exit=end_on_exit,
|
338
|
+
context=ctx,
|
339
|
+
attributes={SPAN_PATH: span_path},
|
366
340
|
) as span:
|
367
341
|
if input is not None:
|
368
342
|
span.set_attribute(
|
@@ -371,6 +345,12 @@ class Laminar:
|
|
371
345
|
)
|
372
346
|
yield span
|
373
347
|
|
348
|
+
# TODO: Figure out if this is necessary
|
349
|
+
try:
|
350
|
+
detach(ctx_token)
|
351
|
+
except Exception:
|
352
|
+
pass
|
353
|
+
|
374
354
|
@classmethod
|
375
355
|
def set_span_output(cls, output: Any = None):
|
376
356
|
"""Set the output of the current span. Useful for manual instrumentation.
|
@@ -432,10 +412,14 @@ class Laminar:
|
|
432
412
|
set_association_properties(props)
|
433
413
|
|
434
414
|
@classmethod
|
435
|
-
def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
|
415
|
+
def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
|
436
416
|
response = requests.post(
|
437
417
|
cls.__base_http_url + "/v1/evaluations",
|
438
|
-
data=json.dumps({
|
418
|
+
data=json.dumps({
|
419
|
+
"groupId": group_id,
|
420
|
+
"name": name,
|
421
|
+
"points": [datapoint.to_dict() for datapoint in data]
|
422
|
+
}),
|
439
423
|
headers=cls._headers(),
|
440
424
|
)
|
441
425
|
if response.status_code != 200:
|
@@ -446,66 +430,6 @@ class Laminar:
|
|
446
430
|
raise ValueError(f"Error creating evaluation {response.text}")
|
447
431
|
return CreateEvaluationResponse.model_validate(response.json())
|
448
432
|
|
449
|
-
@classmethod
|
450
|
-
def post_evaluation_results(
|
451
|
-
cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
|
452
|
-
) -> requests.Response:
|
453
|
-
body = {
|
454
|
-
"evaluationId": str(evaluation_id),
|
455
|
-
"points": [datapoint.to_dict() for datapoint in data],
|
456
|
-
}
|
457
|
-
response = requests.post(
|
458
|
-
cls.__base_http_url + "/v1/evaluation-datapoints",
|
459
|
-
data=json.dumps(body),
|
460
|
-
headers=cls._headers(),
|
461
|
-
)
|
462
|
-
if response.status_code != 200:
|
463
|
-
try:
|
464
|
-
resp_json = response.json()
|
465
|
-
raise ValueError(
|
466
|
-
f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
|
467
|
-
)
|
468
|
-
except Exception:
|
469
|
-
raise ValueError(
|
470
|
-
f"Failed to send evaluation results. Error: {response.text}"
|
471
|
-
)
|
472
|
-
return response
|
473
|
-
|
474
|
-
@classmethod
|
475
|
-
def update_evaluation_status(
|
476
|
-
cls, evaluation_id: str, status: str
|
477
|
-
) -> UpdateEvaluationResponse:
|
478
|
-
"""
|
479
|
-
Updates the status of an evaluation. Returns the updated evaluation object.
|
480
|
-
|
481
|
-
Args:
|
482
|
-
evaluation_id (str): The ID of the evaluation to update.
|
483
|
-
status (str): The status to set for the evaluation.
|
484
|
-
|
485
|
-
Returns:
|
486
|
-
UpdateEvaluationResponse: The updated evaluation response.
|
487
|
-
|
488
|
-
Raises:
|
489
|
-
ValueError: If the request fails.
|
490
|
-
"""
|
491
|
-
body = {
|
492
|
-
"status": status,
|
493
|
-
}
|
494
|
-
url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
|
495
|
-
|
496
|
-
response = requests.post(
|
497
|
-
url,
|
498
|
-
data=json.dumps(body),
|
499
|
-
headers=cls._headers(),
|
500
|
-
)
|
501
|
-
if response.status_code != 200:
|
502
|
-
raise ValueError(
|
503
|
-
f"Failed to update evaluation status {evaluation_id}. "
|
504
|
-
f"Response: {response.text}"
|
505
|
-
)
|
506
|
-
|
507
|
-
return UpdateEvaluationResponse.model_validate(response.json())
|
508
|
-
|
509
433
|
@classmethod
|
510
434
|
def _headers(cls):
|
511
435
|
assert cls.__project_api_key is not None, "Project API key is not set"
|
@@ -2,7 +2,7 @@ import datetime
|
|
2
2
|
from enum import Enum
|
3
3
|
import pydantic
|
4
4
|
import requests
|
5
|
-
from typing import Any, Awaitable, Callable,
|
5
|
+
from typing import Any, Awaitable, Callable, Optional, Union
|
6
6
|
import uuid
|
7
7
|
|
8
8
|
from .utils import serialize
|
@@ -107,20 +107,13 @@ EvaluatorFunction = Callable[
|
|
107
107
|
Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
|
108
108
|
]
|
109
109
|
|
110
|
-
EvaluationStatus = Literal["Started", "Finished", "Error"]
|
111
|
-
|
112
110
|
|
113
111
|
class CreateEvaluationResponse(pydantic.BaseModel):
|
114
112
|
id: uuid.UUID
|
115
113
|
createdAt: datetime.datetime
|
114
|
+
groupId: str
|
116
115
|
name: str
|
117
|
-
status: EvaluationStatus
|
118
116
|
projectId: uuid.UUID
|
119
|
-
metadata: Optional[dict[str, Any]] = None
|
120
|
-
averageScores: Optional[dict[str, Numeric]] = None
|
121
|
-
|
122
|
-
|
123
|
-
UpdateEvaluationResponse = CreateEvaluationResponse
|
124
117
|
|
125
118
|
|
126
119
|
class EvaluationResultDatapoint(pydantic.BaseModel):
|
@@ -10,8 +10,8 @@ from opentelemetry import context as context_api
|
|
10
10
|
|
11
11
|
from lmnr.sdk.utils import get_input_from_func_args, is_method
|
12
12
|
from lmnr.traceloop_sdk.tracing import get_tracer
|
13
|
-
from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT
|
14
|
-
from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper
|
13
|
+
from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT, SPAN_PATH
|
14
|
+
from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper, get_span_path
|
15
15
|
from lmnr.traceloop_sdk.utils.json_encoder import JSONEncoder
|
16
16
|
|
17
17
|
|
@@ -47,7 +47,12 @@ def entity_method(
|
|
47
47
|
|
48
48
|
with get_tracer() as tracer:
|
49
49
|
span = tracer.start_span(span_name)
|
50
|
-
|
50
|
+
|
51
|
+
span_path = get_span_path(span_name)
|
52
|
+
span.set_attribute(SPAN_PATH, span_path)
|
53
|
+
ctx = context_api.set_value("span_path", span_path)
|
54
|
+
|
55
|
+
ctx = trace.set_span_in_context(span, ctx)
|
51
56
|
ctx_token = context_api.attach(ctx)
|
52
57
|
|
53
58
|
try:
|
@@ -104,7 +109,12 @@ def aentity_method(
|
|
104
109
|
|
105
110
|
with get_tracer() as tracer:
|
106
111
|
span = tracer.start_span(span_name)
|
107
|
-
|
112
|
+
|
113
|
+
span_path = get_span_path(span_name)
|
114
|
+
span.set_attribute(SPAN_PATH, span_path)
|
115
|
+
ctx = context_api.set_value("span_path", span_path)
|
116
|
+
|
117
|
+
ctx = trace.set_span_in_context(span, ctx)
|
108
118
|
ctx_token = context_api.attach(ctx)
|
109
119
|
|
110
120
|
try:
|
@@ -25,7 +25,7 @@ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
|
|
25
25
|
|
26
26
|
# from lmnr.traceloop_sdk import Telemetry
|
27
27
|
from lmnr.traceloop_sdk.instruments import Instruments
|
28
|
-
from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES
|
28
|
+
from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES, SPAN_PATH
|
29
29
|
from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
|
30
30
|
from lmnr.traceloop_sdk.utils import is_notebook
|
31
31
|
from lmnr.traceloop_sdk.utils.package_check import is_package_installed
|
@@ -245,6 +245,14 @@ class TracerWrapper(object):
|
|
245
245
|
self.flush()
|
246
246
|
|
247
247
|
def _span_processor_on_start(self, span, parent_context):
|
248
|
+
span_path = get_value("span_path")
|
249
|
+
if span_path is not None:
|
250
|
+
# This is done redundantly here for most decorated functions
|
251
|
+
# However, need to do this for auto-instrumented libraries.
|
252
|
+
# Then, for auto-instrumented ones, they'll attach
|
253
|
+
# the final part of the name to the span on the backend.
|
254
|
+
span.set_attribute(SPAN_PATH, span_path)
|
255
|
+
|
248
256
|
association_properties = get_value("association_properties")
|
249
257
|
if association_properties is not None:
|
250
258
|
_set_association_properties_attributes(span, association_properties)
|
@@ -318,6 +326,12 @@ def _set_association_properties_attributes(span, properties: dict) -> None:
|
|
318
326
|
)
|
319
327
|
|
320
328
|
|
329
|
+
def get_span_path(span_name: str) -> str:
|
330
|
+
current_span_path = get_value("span_path")
|
331
|
+
span_path = f"{current_span_path}.{span_name}" if current_span_path else span_name
|
332
|
+
return span_path
|
333
|
+
|
334
|
+
|
321
335
|
def set_managed_prompt_tracing_context(
|
322
336
|
key: str,
|
323
337
|
version: int,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|