lmnr 0.4.12b3__tar.gz → 0.4.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.12b3 → lmnr-0.4.13}/PKG-INFO +73 -101
- {lmnr-0.4.12b3 → lmnr-0.4.13}/README.md +72 -99
- {lmnr-0.4.12b3 → lmnr-0.4.13}/pyproject.toml +2 -3
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/decorators.py +3 -2
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/evaluations.py +94 -63
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/laminar.py +46 -43
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/types.py +44 -7
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/utils.py +4 -5
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/__init__.py +3 -29
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/config/__init__.py +0 -4
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/decorators/base.py +29 -12
- lmnr-0.4.13/src/lmnr/traceloop_sdk/tracing/attributes.py +9 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tracing/tracing.py +45 -142
- lmnr-0.4.12b3/src/lmnr/traceloop_sdk/metrics/__init__.py +0 -0
- lmnr-0.4.12b3/src/lmnr/traceloop_sdk/metrics/metrics.py +0 -176
- lmnr-0.4.12b3/src/lmnr/traceloop_sdk/tracing/manual.py +0 -57
- {lmnr-0.4.12b3 → lmnr-0.4.13}/LICENSE +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.12b3 → lmnr-0.4.13}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.13
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.12
|
|
14
14
|
Requires-Dist: argparse (>=1.0,<2.0)
|
15
15
|
Requires-Dist: asyncio (>=3.0,<4.0)
|
16
16
|
Requires-Dist: backoff (>=2.0,<3.0)
|
17
|
-
Requires-Dist: colorama (>=0.4,<0.5)
|
18
17
|
Requires-Dist: deprecated (>=1.0,<2.0)
|
19
18
|
Requires-Dist: jinja2 (>=3.0,<4.0)
|
20
19
|
Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
|
@@ -60,63 +59,37 @@ Description-Content-Type: text/markdown
|
|
60
59
|
|
61
60
|
# Laminar Python
|
62
61
|
|
63
|
-
|
62
|
+
Python SDK for [Laminar](https://www.lmnr.ai).
|
63
|
+
|
64
|
+
[Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
|
65
|
+
|
66
|
+
Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
|
64
67
|
|
65
68
|
<a href="https://pypi.org/project/lmnr/">  </a>
|
66
69
|

|
67
70
|

|
68
71
|
|
69
72
|
|
70
|
-
|
71
73
|
## Quickstart
|
72
74
|
|
73
75
|
First, install the package:
|
74
76
|
|
75
77
|
```sh
|
76
|
-
python3 -m venv .myenv
|
77
|
-
source .myenv/bin/activate # or use your favorite env management tool
|
78
|
-
|
79
78
|
pip install lmnr
|
80
79
|
```
|
81
80
|
|
82
|
-
|
81
|
+
And then in the code
|
83
82
|
|
84
83
|
```python
|
85
|
-
import os
|
86
|
-
from openai import OpenAI
|
87
84
|
from lmnr import Laminar as L
|
88
85
|
|
89
|
-
L.initialize(
|
90
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
91
|
-
)
|
92
|
-
|
93
|
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
94
|
-
|
95
|
-
def poem_writer(topic: str):
|
96
|
-
prompt = f"write a poem about {topic}"
|
97
|
-
|
98
|
-
# OpenAI calls are automatically instrumented
|
99
|
-
response = client.chat.completions.create(
|
100
|
-
model="gpt-4o",
|
101
|
-
messages=[
|
102
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
103
|
-
{"role": "user", "content": prompt},
|
104
|
-
],
|
105
|
-
)
|
106
|
-
poem = response.choices[0].message.content
|
107
|
-
return poem
|
108
|
-
|
109
|
-
if __name__ == "__main__":
|
110
|
-
print(poem_writer("laminar flow"))
|
111
|
-
|
86
|
+
L.initialize(project_api_key="<PROJECT_API_KEY>")
|
112
87
|
```
|
113
88
|
|
114
|
-
|
89
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
90
|
+
calls with OpenTelemetry-compatible instrumentation.
|
115
91
|
|
116
|
-
|
117
|
-
|
118
|
-
Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
|
119
|
-
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
92
|
+
Note that you need to only initialize Laminar once in your application.
|
120
93
|
|
121
94
|
## Instrumentation
|
122
95
|
|
@@ -197,7 +170,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
|
|
197
170
|
|
198
171
|
If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
|
199
172
|
|
200
|
-
|
173
|
+
Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
|
201
174
|
|
202
175
|
## Sending events
|
203
176
|
|
@@ -225,6 +198,67 @@ L.event("topic alignment", topic in poem)
|
|
225
198
|
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
226
199
|
```
|
227
200
|
|
201
|
+
## Evaluations
|
202
|
+
|
203
|
+
### Quickstart
|
204
|
+
|
205
|
+
Install the package:
|
206
|
+
|
207
|
+
```sh
|
208
|
+
pip install lmnr
|
209
|
+
```
|
210
|
+
|
211
|
+
Create a file named `my_first_eval.py` with the following code:
|
212
|
+
|
213
|
+
```python
|
214
|
+
from lmnr import evaluate
|
215
|
+
|
216
|
+
def write_poem(data):
|
217
|
+
return f"This is a good poem about {data['topic']}"
|
218
|
+
|
219
|
+
def contains_poem(output, target):
|
220
|
+
return 1 if output in target['poem'] else 0
|
221
|
+
|
222
|
+
# Evaluation data
|
223
|
+
data = [
|
224
|
+
{"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
|
225
|
+
{"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
|
226
|
+
]
|
227
|
+
|
228
|
+
evaluate(
|
229
|
+
data=data,
|
230
|
+
executor=write_poem,
|
231
|
+
evaluators={
|
232
|
+
"containsPoem": contains_poem
|
233
|
+
}
|
234
|
+
)
|
235
|
+
```
|
236
|
+
|
237
|
+
Run the following commands:
|
238
|
+
|
239
|
+
```sh
|
240
|
+
export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
|
241
|
+
lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
|
242
|
+
```
|
243
|
+
|
244
|
+
Visit the URL printed in the console to see the results.
|
245
|
+
|
246
|
+
### Overview
|
247
|
+
|
248
|
+
Bring rigor to the development of your LLM applications with evaluations.
|
249
|
+
|
250
|
+
You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
|
251
|
+
|
252
|
+
`evaluate` takes in the following parameters:
|
253
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
254
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
255
|
+
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
256
|
+
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
257
|
+
|
258
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
259
|
+
|
260
|
+
[Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
|
261
|
+
|
228
262
|
## Laminar pipelines as prompt chain managers
|
229
263
|
|
230
264
|
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
@@ -259,65 +293,3 @@ PipelineRunResponse(
|
|
259
293
|
)
|
260
294
|
```
|
261
295
|
|
262
|
-
## Running offline evaluations on your data
|
263
|
-
|
264
|
-
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
265
|
-
|
266
|
-
Evaluation takes in the following parameters:
|
267
|
-
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
268
|
-
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
269
|
-
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
270
|
-
- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
271
|
-
|
272
|
-
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
273
|
-
|
274
|
-
### Example
|
275
|
-
|
276
|
-
```python
|
277
|
-
from openai import AsyncOpenAI
|
278
|
-
import asyncio
|
279
|
-
import os
|
280
|
-
|
281
|
-
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
282
|
-
|
283
|
-
async def get_capital(data):
|
284
|
-
country = data["country"]
|
285
|
-
response = await openai_client.chat.completions.create(
|
286
|
-
model="gpt-4o-mini",
|
287
|
-
messages=[
|
288
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
289
|
-
{
|
290
|
-
"role": "user",
|
291
|
-
"content": f"What is the capital of {country}? Just name the "
|
292
|
-
"city and nothing else",
|
293
|
-
},
|
294
|
-
],
|
295
|
-
)
|
296
|
-
return response.choices[0].message.content.strip()
|
297
|
-
|
298
|
-
|
299
|
-
# Evaluation data
|
300
|
-
data = [
|
301
|
-
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
302
|
-
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
303
|
-
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
304
|
-
]
|
305
|
-
|
306
|
-
|
307
|
-
def evaluator_A(output, target):
|
308
|
-
return 1 if output == target["capital"] else 0
|
309
|
-
|
310
|
-
|
311
|
-
# Create an Evaluation instance
|
312
|
-
e = Evaluation(
|
313
|
-
name="py-evaluation-async",
|
314
|
-
data=data,
|
315
|
-
executor=get_capital,
|
316
|
-
evaluators=[evaluator_A],
|
317
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
318
|
-
)
|
319
|
-
|
320
|
-
# Run the evaluation
|
321
|
-
asyncio.run(e.run())
|
322
|
-
```
|
323
|
-
|
@@ -1,62 +1,36 @@
|
|
1
1
|
# Laminar Python
|
2
2
|
|
3
|
-
|
3
|
+
Python SDK for [Laminar](https://www.lmnr.ai).
|
4
|
+
|
5
|
+
[Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
|
6
|
+
|
7
|
+
Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
|
4
8
|
|
5
9
|
<a href="https://pypi.org/project/lmnr/">  </a>
|
6
10
|

|
7
11
|

|
8
12
|
|
9
13
|
|
10
|
-
|
11
14
|
## Quickstart
|
12
15
|
|
13
16
|
First, install the package:
|
14
17
|
|
15
18
|
```sh
|
16
|
-
python3 -m venv .myenv
|
17
|
-
source .myenv/bin/activate # or use your favorite env management tool
|
18
|
-
|
19
19
|
pip install lmnr
|
20
20
|
```
|
21
21
|
|
22
|
-
|
22
|
+
And then in the code
|
23
23
|
|
24
24
|
```python
|
25
|
-
import os
|
26
|
-
from openai import OpenAI
|
27
25
|
from lmnr import Laminar as L
|
28
26
|
|
29
|
-
L.initialize(
|
30
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
31
|
-
)
|
32
|
-
|
33
|
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
34
|
-
|
35
|
-
def poem_writer(topic: str):
|
36
|
-
prompt = f"write a poem about {topic}"
|
37
|
-
|
38
|
-
# OpenAI calls are automatically instrumented
|
39
|
-
response = client.chat.completions.create(
|
40
|
-
model="gpt-4o",
|
41
|
-
messages=[
|
42
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
43
|
-
{"role": "user", "content": prompt},
|
44
|
-
],
|
45
|
-
)
|
46
|
-
poem = response.choices[0].message.content
|
47
|
-
return poem
|
48
|
-
|
49
|
-
if __name__ == "__main__":
|
50
|
-
print(poem_writer("laminar flow"))
|
51
|
-
|
27
|
+
L.initialize(project_api_key="<PROJECT_API_KEY>")
|
52
28
|
```
|
53
29
|
|
54
|
-
|
30
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
31
|
+
calls with OpenTelemetry-compatible instrumentation.
|
55
32
|
|
56
|
-
|
57
|
-
|
58
|
-
Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
|
59
|
-
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
33
|
+
Note that you need to only initialize Laminar once in your application.
|
60
34
|
|
61
35
|
## Instrumentation
|
62
36
|
|
@@ -137,7 +111,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
|
|
137
111
|
|
138
112
|
If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
|
139
113
|
|
140
|
-
|
114
|
+
Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
|
141
115
|
|
142
116
|
## Sending events
|
143
117
|
|
@@ -165,6 +139,67 @@ L.event("topic alignment", topic in poem)
|
|
165
139
|
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
166
140
|
```
|
167
141
|
|
142
|
+
## Evaluations
|
143
|
+
|
144
|
+
### Quickstart
|
145
|
+
|
146
|
+
Install the package:
|
147
|
+
|
148
|
+
```sh
|
149
|
+
pip install lmnr
|
150
|
+
```
|
151
|
+
|
152
|
+
Create a file named `my_first_eval.py` with the following code:
|
153
|
+
|
154
|
+
```python
|
155
|
+
from lmnr import evaluate
|
156
|
+
|
157
|
+
def write_poem(data):
|
158
|
+
return f"This is a good poem about {data['topic']}"
|
159
|
+
|
160
|
+
def contains_poem(output, target):
|
161
|
+
return 1 if output in target['poem'] else 0
|
162
|
+
|
163
|
+
# Evaluation data
|
164
|
+
data = [
|
165
|
+
{"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
|
166
|
+
{"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
|
167
|
+
]
|
168
|
+
|
169
|
+
evaluate(
|
170
|
+
data=data,
|
171
|
+
executor=write_poem,
|
172
|
+
evaluators={
|
173
|
+
"containsPoem": contains_poem
|
174
|
+
}
|
175
|
+
)
|
176
|
+
```
|
177
|
+
|
178
|
+
Run the following commands:
|
179
|
+
|
180
|
+
```sh
|
181
|
+
export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
|
182
|
+
lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
|
183
|
+
```
|
184
|
+
|
185
|
+
Visit the URL printed in the console to see the results.
|
186
|
+
|
187
|
+
### Overview
|
188
|
+
|
189
|
+
Bring rigor to the development of your LLM applications with evaluations.
|
190
|
+
|
191
|
+
You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
|
192
|
+
|
193
|
+
`evaluate` takes in the following parameters:
|
194
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
195
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
196
|
+
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
197
|
+
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
198
|
+
|
199
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
200
|
+
|
201
|
+
[Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
|
202
|
+
|
168
203
|
## Laminar pipelines as prompt chain managers
|
169
204
|
|
170
205
|
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
@@ -198,65 +233,3 @@ PipelineRunResponse(
|
|
198
233
|
run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
|
199
234
|
)
|
200
235
|
```
|
201
|
-
|
202
|
-
## Running offline evaluations on your data
|
203
|
-
|
204
|
-
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
205
|
-
|
206
|
-
Evaluation takes in the following parameters:
|
207
|
-
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
208
|
-
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
209
|
-
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
210
|
-
- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
211
|
-
|
212
|
-
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
213
|
-
|
214
|
-
### Example
|
215
|
-
|
216
|
-
```python
|
217
|
-
from openai import AsyncOpenAI
|
218
|
-
import asyncio
|
219
|
-
import os
|
220
|
-
|
221
|
-
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
222
|
-
|
223
|
-
async def get_capital(data):
|
224
|
-
country = data["country"]
|
225
|
-
response = await openai_client.chat.completions.create(
|
226
|
-
model="gpt-4o-mini",
|
227
|
-
messages=[
|
228
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
229
|
-
{
|
230
|
-
"role": "user",
|
231
|
-
"content": f"What is the capital of {country}? Just name the "
|
232
|
-
"city and nothing else",
|
233
|
-
},
|
234
|
-
],
|
235
|
-
)
|
236
|
-
return response.choices[0].message.content.strip()
|
237
|
-
|
238
|
-
|
239
|
-
# Evaluation data
|
240
|
-
data = [
|
241
|
-
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
242
|
-
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
243
|
-
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
244
|
-
]
|
245
|
-
|
246
|
-
|
247
|
-
def evaluator_A(output, target):
|
248
|
-
return 1 if output == target["capital"] else 0
|
249
|
-
|
250
|
-
|
251
|
-
# Create an Evaluation instance
|
252
|
-
e = Evaluation(
|
253
|
-
name="py-evaluation-async",
|
254
|
-
data=data,
|
255
|
-
executor=get_capital,
|
256
|
-
evaluators=[evaluator_A],
|
257
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
258
|
-
)
|
259
|
-
|
260
|
-
# Run the evaluation
|
261
|
-
asyncio.run(e.run())
|
262
|
-
```
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.13"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.13"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -33,7 +33,6 @@ opentelemetry-instrumentation-sqlalchemy = "^0.48b0"
|
|
33
33
|
opentelemetry-instrumentation-urllib3 = "^0.48b0"
|
34
34
|
opentelemetry-instrumentation-threading = "^0.48b0"
|
35
35
|
opentelemetry-semantic-conventions-ai = "0.4.1"
|
36
|
-
colorama = "^0.4"
|
37
36
|
tenacity = "~=8.0"
|
38
37
|
jinja2 = "~=3.0"
|
39
38
|
deprecated = "~=1.0"
|
@@ -6,6 +6,7 @@ from opentelemetry.trace import INVALID_SPAN, get_current_span
|
|
6
6
|
|
7
7
|
from typing import Callable, Optional, cast
|
8
8
|
|
9
|
+
from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
|
9
10
|
from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
|
10
11
|
|
11
12
|
from .utils import is_async
|
@@ -43,11 +44,11 @@ def observe(
|
|
43
44
|
if current_span != INVALID_SPAN:
|
44
45
|
if session_id is not None:
|
45
46
|
current_span.set_attribute(
|
46
|
-
|
47
|
+
SESSION_ID, session_id
|
47
48
|
)
|
48
49
|
if user_id is not None:
|
49
50
|
current_span.set_attribute(
|
50
|
-
|
51
|
+
USER_ID, user_id
|
51
52
|
)
|
52
53
|
association_properties = {}
|
53
54
|
if session_id is not None:
|