lmnr 0.4.12b4__tar.gz → 0.4.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {lmnr-0.4.12b4 → lmnr-0.4.14}/PKG-INFO +73 -105
  2. {lmnr-0.4.12b4 → lmnr-0.4.14}/README.md +72 -105
  3. {lmnr-0.4.12b4 → lmnr-0.4.14}/pyproject.toml +2 -2
  4. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/evaluations.py +56 -49
  5. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/laminar.py +20 -96
  6. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/types.py +2 -9
  7. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/decorators/base.py +14 -4
  8. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/attributes.py +1 -0
  9. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/tracing.py +15 -1
  10. {lmnr-0.4.12b4 → lmnr-0.4.14}/LICENSE +0 -0
  11. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/__init__.py +0 -0
  12. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/cli.py +0 -0
  13. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/__init__.py +0 -0
  14. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/decorators.py +0 -0
  15. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/log.py +0 -0
  16. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/sdk/utils.py +0 -0
  17. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  18. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/.python-version +0 -0
  19. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/__init__.py +0 -0
  20. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
  21. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  22. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  23. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  24. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  25. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  26. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  27. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  28. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  29. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  30. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  31. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  32. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  33. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  34. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  35. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  36. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  37. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  38. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  39. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  40. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  41. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  42. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  43. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  44. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  45. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  46. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  47. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  48. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  49. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  50. {lmnr-0.4.12b4 → lmnr-0.4.14}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.12b4
3
+ Version: 0.4.14
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -59,63 +59,37 @@ Description-Content-Type: text/markdown
59
59
 
60
60
  # Laminar Python
61
61
 
62
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
62
+ Python SDK for [Laminar](https://www.lmnr.ai).
63
+
64
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
65
+
66
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
63
67
 
64
68
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
65
69
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
66
70
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
67
71
 
68
72
 
69
-
70
73
  ## Quickstart
71
74
 
72
75
  First, install the package:
73
76
 
74
77
  ```sh
75
- python3 -m venv .myenv
76
- source .myenv/bin/activate # or use your favorite env management tool
77
-
78
78
  pip install lmnr
79
79
  ```
80
80
 
81
- Then, you can initialize Laminar in your main file and instrument your code.
81
+ And then in the code
82
82
 
83
83
  ```python
84
- import os
85
- from openai import OpenAI
86
84
  from lmnr import Laminar as L
87
85
 
88
- L.initialize(
89
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
90
- )
91
-
92
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
93
-
94
- def poem_writer(topic: str):
95
- prompt = f"write a poem about {topic}"
96
-
97
- # OpenAI calls are automatically instrumented
98
- response = client.chat.completions.create(
99
- model="gpt-4o",
100
- messages=[
101
- {"role": "system", "content": "You are a helpful assistant."},
102
- {"role": "user", "content": prompt},
103
- ],
104
- )
105
- poem = response.choices[0].message.content
106
- return poem
107
-
108
- if __name__ == "__main__":
109
- print(poem_writer("laminar flow"))
110
-
86
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
111
87
  ```
112
88
 
113
- Note that you need to only initialize Laminar once in your application.
114
-
115
- ### Project API key
89
+ This will automatically instrument most of the LLM, Vector DB, and related
90
+ calls with OpenTelemetry-compatible instrumentation.
116
91
 
117
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
118
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
92
+ Note that you need to only initialize Laminar once in your application.
119
93
 
120
94
  ## Instrumentation
121
95
 
@@ -224,6 +198,68 @@ L.event("topic alignment", topic in poem)
224
198
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
225
199
  ```
226
200
 
201
+ ## Evaluations
202
+
203
+ ### Quickstart
204
+
205
+ Install the package:
206
+
207
+ ```sh
208
+ pip install lmnr
209
+ ```
210
+
211
+ Create a file named `my_first_eval.py` with the following code:
212
+
213
+ ```python
214
+ from lmnr import evaluate
215
+
216
+ def write_poem(data):
217
+ return f"This is a good poem about {data['topic']}"
218
+
219
+ def contains_poem(output, target):
220
+ return 1 if output in target['poem'] else 0
221
+
222
+ # Evaluation data
223
+ data = [
224
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
225
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
226
+ ]
227
+
228
+ evaluate(
229
+ data=data,
230
+ executor=write_poem,
231
+ evaluators={
232
+ "containsPoem": contains_poem
233
+ },
234
+ group_id="my_first_feature"
235
+ )
236
+ ```
237
+
238
+ Run the following commands:
239
+
240
+ ```sh
241
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
242
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
243
+ ```
244
+
245
+ Visit the URL printed in the console to see the results.
246
+
247
+ ### Overview
248
+
249
+ Bring rigor to the development of your LLM applications with evaluations.
250
+
251
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
252
+
253
+ `evaluate` takes in the following parameters:
254
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
255
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
256
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
257
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
258
+
259
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
260
+
261
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
262
+
227
263
  ## Laminar pipelines as prompt chain managers
228
264
 
229
265
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -258,71 +294,3 @@ PipelineRunResponse(
258
294
  )
259
295
  ```
260
296
 
261
- ## Running offline evaluations on your data
262
-
263
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
264
-
265
- Evaluation takes in the following parameters:
266
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
267
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
268
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
269
- - `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
270
-
271
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
272
-
273
- ### Example code
274
-
275
- ```python
276
- from lmnr import evaluate
277
- from openai import AsyncOpenAI
278
- import asyncio
279
- import os
280
-
281
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
282
-
283
- async def get_capital(data):
284
- country = data["country"]
285
- response = await openai_client.chat.completions.create(
286
- model="gpt-4o-mini",
287
- messages=[
288
- {"role": "system", "content": "You are a helpful assistant."},
289
- {
290
- "role": "user",
291
- "content": f"What is the capital of {country}? Just name the "
292
- "city and nothing else",
293
- },
294
- ],
295
- )
296
- return response.choices[0].message.content.strip()
297
-
298
-
299
- # Evaluation data
300
- data = [
301
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
302
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
303
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
304
- ]
305
-
306
-
307
- def correctness(output, target):
308
- return 1 if output == target["capital"] else 0
309
-
310
-
311
- # Create an Evaluation instance
312
- e = evaluate(
313
- name="my-evaluation",
314
- data=data,
315
- executor=get_capital,
316
- evaluators={"correctness": correctness},
317
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
318
- )
319
- ```
320
-
321
- ### Running from CLI.
322
-
323
- 1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
324
- 1. Run `lmnr path/to/my/eval.py`
325
-
326
- ### Running from code
327
-
328
- Simply execute the function, e.g. `python3 path/to/my/eval.py`
@@ -1,62 +1,36 @@
1
1
  # Laminar Python
2
2
 
3
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
3
+ Python SDK for [Laminar](https://www.lmnr.ai).
4
+
5
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
6
+
7
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
4
8
 
5
9
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
6
10
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
7
11
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
8
12
 
9
13
 
10
-
11
14
  ## Quickstart
12
15
 
13
16
  First, install the package:
14
17
 
15
18
  ```sh
16
- python3 -m venv .myenv
17
- source .myenv/bin/activate # or use your favorite env management tool
18
-
19
19
  pip install lmnr
20
20
  ```
21
21
 
22
- Then, you can initialize Laminar in your main file and instrument your code.
22
+ And then in the code
23
23
 
24
24
  ```python
25
- import os
26
- from openai import OpenAI
27
25
  from lmnr import Laminar as L
28
26
 
29
- L.initialize(
30
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
31
- )
32
-
33
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
34
-
35
- def poem_writer(topic: str):
36
- prompt = f"write a poem about {topic}"
37
-
38
- # OpenAI calls are automatically instrumented
39
- response = client.chat.completions.create(
40
- model="gpt-4o",
41
- messages=[
42
- {"role": "system", "content": "You are a helpful assistant."},
43
- {"role": "user", "content": prompt},
44
- ],
45
- )
46
- poem = response.choices[0].message.content
47
- return poem
48
-
49
- if __name__ == "__main__":
50
- print(poem_writer("laminar flow"))
51
-
27
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
52
28
  ```
53
29
 
54
- Note that you need to only initialize Laminar once in your application.
55
-
56
- ### Project API key
30
+ This will automatically instrument most of the LLM, Vector DB, and related
31
+ calls with OpenTelemetry-compatible instrumentation.
57
32
 
58
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
59
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
33
+ Note that you need to only initialize Laminar once in your application.
60
34
 
61
35
  ## Instrumentation
62
36
 
@@ -165,6 +139,68 @@ L.event("topic alignment", topic in poem)
165
139
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
166
140
  ```
167
141
 
142
+ ## Evaluations
143
+
144
+ ### Quickstart
145
+
146
+ Install the package:
147
+
148
+ ```sh
149
+ pip install lmnr
150
+ ```
151
+
152
+ Create a file named `my_first_eval.py` with the following code:
153
+
154
+ ```python
155
+ from lmnr import evaluate
156
+
157
+ def write_poem(data):
158
+ return f"This is a good poem about {data['topic']}"
159
+
160
+ def contains_poem(output, target):
161
+ return 1 if output in target['poem'] else 0
162
+
163
+ # Evaluation data
164
+ data = [
165
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
166
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
167
+ ]
168
+
169
+ evaluate(
170
+ data=data,
171
+ executor=write_poem,
172
+ evaluators={
173
+ "containsPoem": contains_poem
174
+ },
175
+ group_id="my_first_feature"
176
+ )
177
+ ```
178
+
179
+ Run the following commands:
180
+
181
+ ```sh
182
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
183
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
184
+ ```
185
+
186
+ Visit the URL printed in the console to see the results.
187
+
188
+ ### Overview
189
+
190
+ Bring rigor to the development of your LLM applications with evaluations.
191
+
192
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
193
+
194
+ `evaluate` takes in the following parameters:
195
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
196
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
197
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
198
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
199
+
200
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
201
+
202
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
203
+
168
204
  ## Laminar pipelines as prompt chain managers
169
205
 
170
206
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -198,72 +234,3 @@ PipelineRunResponse(
198
234
  run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
199
235
  )
200
236
  ```
201
-
202
- ## Running offline evaluations on your data
203
-
204
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
205
-
206
- Evaluation takes in the following parameters:
207
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
208
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
209
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
210
- - `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
211
-
212
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
213
-
214
- ### Example code
215
-
216
- ```python
217
- from lmnr import evaluate
218
- from openai import AsyncOpenAI
219
- import asyncio
220
- import os
221
-
222
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
223
-
224
- async def get_capital(data):
225
- country = data["country"]
226
- response = await openai_client.chat.completions.create(
227
- model="gpt-4o-mini",
228
- messages=[
229
- {"role": "system", "content": "You are a helpful assistant."},
230
- {
231
- "role": "user",
232
- "content": f"What is the capital of {country}? Just name the "
233
- "city and nothing else",
234
- },
235
- ],
236
- )
237
- return response.choices[0].message.content.strip()
238
-
239
-
240
- # Evaluation data
241
- data = [
242
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
243
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
244
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
245
- ]
246
-
247
-
248
- def correctness(output, target):
249
- return 1 if output == target["capital"] else 0
250
-
251
-
252
- # Create an Evaluation instance
253
- e = evaluate(
254
- name="my-evaluation",
255
- data=data,
256
- executor=get_capital,
257
- evaluators={"correctness": correctness},
258
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
259
- )
260
- ```
261
-
262
- ### Running from CLI.
263
-
264
- 1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
265
- 1. Run `lmnr path/to/my/eval.py`
266
-
267
- ### Running from code
268
-
269
- Simply execute the function, e.g. `python3 path/to/my/eval.py`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.12b4"
3
+ version = "0.4.14"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.12b4"
14
+ version = "0.4.14"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import re
2
3
  import sys
3
4
  from abc import ABC, abstractmethod
4
5
  from contextlib import contextmanager
@@ -12,7 +13,6 @@ from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
12
13
 
13
14
  from .laminar import Laminar as L
14
15
  from .types import (
15
- CreateEvaluationResponse,
16
16
  Datapoint,
17
17
  EvaluationResultDatapoint,
18
18
  EvaluatorFunction,
@@ -46,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
46
46
  return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
47
47
 
48
48
 
49
+ def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
50
+ per_score_values = {}
51
+ for result in results:
52
+ for key, value in result.scores.items():
53
+ if key not in per_score_values:
54
+ per_score_values[key] = []
55
+ per_score_values[key].append(value)
56
+
57
+ average_scores = {}
58
+ for key, values in per_score_values.items():
59
+ average_scores[key] = sum(values) / len(values)
60
+
61
+ return average_scores
62
+
63
+
49
64
  class EvaluationReporter:
50
65
  def __init__(self):
51
66
  pass
52
67
 
53
- def start(self, name: str, project_id: str, id: str, length: int):
54
- print(f"Running evaluation {name}...\n")
55
- print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
68
+ def start(self, length: int):
56
69
  self.cli_progress = tqdm(
57
70
  total=length,
58
71
  bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
@@ -66,9 +79,10 @@ class EvaluationReporter:
66
79
  self.cli_progress.close()
67
80
  sys.stderr.write(f"\nError: {error}\n")
68
81
 
69
- def stop(self, average_scores: dict[str, Numeric]):
82
+ def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
70
83
  self.cli_progress.close()
71
- print("\nAverage scores:")
84
+ print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
85
+ print("Average scores:")
72
86
  for name, score in average_scores.items():
73
87
  print(f"{name}: {score}")
74
88
  print("\n")
@@ -97,6 +111,7 @@ class Evaluation:
97
111
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
98
112
  executor: Any,
99
113
  evaluators: dict[str, EvaluatorFunction],
114
+ group_id: Optional[str] = None,
100
115
  name: Optional[str] = None,
101
116
  batch_size: int = DEFAULT_BATCH_SIZE,
102
117
  project_api_key: Optional[str] = None,
@@ -123,6 +138,8 @@ class Evaluation:
123
138
  evaluator function. If the function is anonymous, it will be
124
139
  named `evaluator_${index}`, where index is the index of the
125
140
  evaluator function in the list starting from 1.
141
+ group_id (Optional[str], optional): Group id of the evaluation.
142
+ Defaults to "default".
126
143
  name (Optional[str], optional): The name of the evaluation.
127
144
  It will be auto-generated if not provided.
128
145
  batch_size (int, optional): The batch size for evaluation.
@@ -138,11 +155,16 @@ class Evaluation:
138
155
  Defaults to None. If None, all available instruments will be used.
139
156
  """
140
157
 
158
+ if not evaluators:
159
+ raise ValueError("No evaluators provided")
160
+
161
+ # TODO: Compile regex once and then reuse it
162
+ for evaluator_name in evaluators:
163
+ if not re.match(r'^[\w\s-]+$', evaluator_name):
164
+ raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
165
+
141
166
  self.is_finished = False
142
- self.name = name
143
167
  self.reporter = EvaluationReporter()
144
- self.executor = executor
145
- self.evaluators = evaluators
146
168
  if isinstance(data, list):
147
169
  self.data = [
148
170
  (Datapoint.model_validate(point) if isinstance(point, dict) else point)
@@ -150,6 +172,10 @@ class Evaluation:
150
172
  ]
151
173
  else:
152
174
  self.data = data
175
+ self.executor = executor
176
+ self.evaluators = evaluators
177
+ self.group_id = group_id
178
+ self.name = name
153
179
  self.batch_size = batch_size
154
180
  L.initialize(
155
181
  project_api_key=project_api_key,
@@ -160,23 +186,6 @@ class Evaluation:
160
186
  )
161
187
 
162
188
  def run(self) -> Union[None, Awaitable[None]]:
163
- """Runs the evaluation.
164
-
165
- Creates a new evaluation if no evaluation with such name exists, or
166
- adds data to an existing one otherwise. Evaluates data points in
167
- batches of `self.batch_size`. The executor
168
- function is called on each data point to get the output,
169
- and then evaluate it by each evaluator function.
170
-
171
- Usage:
172
- ```python
173
- # in a synchronous context:
174
- e.run()
175
- # in an asynchronous context:
176
- await e.run()
177
- ```
178
-
179
- """
180
189
  if self.is_finished:
181
190
  raise Exception("Evaluation is already finished")
182
191
 
@@ -187,41 +196,34 @@ class Evaluation:
187
196
  return loop.run_until_complete(self._run())
188
197
 
189
198
  async def _run(self) -> None:
190
- evaluation = L.create_evaluation(self.name)
191
199
  self.reporter.start(
192
- evaluation.name,
193
- evaluation.projectId,
194
- evaluation.id,
195
200
  len(self.data),
196
201
  )
197
202
 
198
203
  try:
199
- await self.evaluate_in_batches(evaluation)
204
+ result_datapoints = await self.evaluate_in_batches()
200
205
  except Exception as e:
201
- L.update_evaluation_status(evaluation.id, "Error")
202
206
  self.reporter.stopWithError(e)
203
207
  self.is_finished = True
204
208
  return
209
+ else:
210
+ evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
211
+ average_scores = get_average_scores(result_datapoints)
212
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
213
+ self.is_finished = True
205
214
 
206
- # If we update with status "Finished", we expect averageScores to be not empty
207
- updated_evaluation = L.update_evaluation_status(evaluation.id, "Finished")
208
- self.reporter.stop(updated_evaluation.averageScores)
209
- self.is_finished = True
210
-
211
- async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
215
+ async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
216
+ result_datapoints = []
212
217
  for i in range(0, len(self.data), self.batch_size):
213
218
  batch = (
214
- self.data[i : i + self.batch_size]
219
+ self.data[i: i + self.batch_size]
215
220
  if isinstance(self.data, list)
216
221
  else self.data.slice(i, i + self.batch_size)
217
222
  )
218
- try:
219
- results = await self._evaluate_batch(batch)
220
- L.post_evaluation_results(evaluation.id, results)
221
- except Exception as e:
222
- print(f"Error evaluating batch: {e}")
223
- finally:
224
- self.reporter.update(len(batch))
223
+ batch_datapoints = await self._evaluate_batch(batch)
224
+ result_datapoints.extend(batch_datapoints)
225
+ self.reporter.update(len(batch))
226
+ return result_datapoints
225
227
 
226
228
  async def _evaluate_batch(
227
229
  self, batch: list[Datapoint]
@@ -252,7 +254,7 @@ class Evaluation:
252
254
  scores: dict[str, Numeric] = {}
253
255
  for evaluator_name, evaluator in self.evaluators.items():
254
256
  with L.start_as_current_span(
255
- "evaluator", input={"output": output, "target": target}
257
+ evaluator_name, input={"output": output, "target": target}
256
258
  ) as evaluator_span:
257
259
  evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
258
260
  value = (
@@ -282,6 +284,7 @@ def evaluate(
282
284
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
283
285
  executor: ExecutorFunction,
284
286
  evaluators: dict[str, EvaluatorFunction],
287
+ group_id: Optional[str] = None,
285
288
  name: Optional[str] = None,
286
289
  batch_size: int = DEFAULT_BATCH_SIZE,
287
290
  project_api_key: Optional[str] = None,
@@ -310,8 +313,11 @@ def evaluate(
310
313
  evaluator function. If the function is anonymous, it will be
311
314
  named `evaluator_${index}`, where index is the index of the
312
315
  evaluator function in the list starting from 1.
313
- name (Optional[str], optional): The name of the evaluation.
314
- It will be auto-generated if not provided.
316
+ group_id (Optional[str], optional): Group name which is same
317
+ as the feature you are evaluating in your project or application.
318
+ Defaults to "default".
319
+ name (Optional[str], optional): Optional name of the evaluation. Used to easily
320
+ identify the evaluation in the group.
315
321
  batch_size (int, optional): The batch size for evaluation.
316
322
  Defaults to DEFAULT_BATCH_SIZE.
317
323
  project_api_key (Optional[str], optional): The project API key.
@@ -331,6 +337,7 @@ def evaluate(
331
337
  data=data,
332
338
  executor=executor,
333
339
  evaluators=evaluators,
340
+ group_id=group_id,
334
341
  name=name,
335
342
  batch_size=batch_size,
336
343
  project_api_key=project_api_key,
@@ -3,11 +3,9 @@ from opentelemetry import context
3
3
  from opentelemetry.trace import (
4
4
  INVALID_SPAN,
5
5
  get_current_span,
6
- SpanKind,
7
6
  )
8
7
  from opentelemetry.util.types import AttributeValue
9
- from opentelemetry.context.context import Context
10
- from opentelemetry.util import types
8
+ from opentelemetry.context import set_value, attach, detach
11
9
  from lmnr.traceloop_sdk import Traceloop
12
10
  from lmnr.traceloop_sdk.tracing import get_tracer
13
11
  from contextlib import contextmanager
@@ -29,10 +27,12 @@ from lmnr.traceloop_sdk.tracing.attributes import (
29
27
  SESSION_ID,
30
28
  SPAN_INPUT,
31
29
  SPAN_OUTPUT,
30
+ SPAN_PATH,
32
31
  TRACE_TYPE,
33
32
  USER_ID,
34
33
  )
35
34
  from lmnr.traceloop_sdk.tracing.tracing import (
35
+ get_span_path,
36
36
  set_association_properties,
37
37
  update_association_properties,
38
38
  )
@@ -47,7 +47,6 @@ from .types import (
47
47
  NodeInput,
48
48
  PipelineRunRequest,
49
49
  TraceType,
50
- UpdateEvaluationResponse,
51
50
  )
52
51
 
53
52
 
@@ -315,14 +314,6 @@ class Laminar:
315
314
  cls,
316
315
  name: str,
317
316
  input: Any = None,
318
- context: Optional[Context] = None,
319
- kind: SpanKind = SpanKind.INTERNAL,
320
- attributes: types.Attributes = None,
321
- links=None,
322
- start_time: Optional[int] = None,
323
- record_exception: bool = True,
324
- set_status_on_exception: bool = True,
325
- end_on_exit: bool = True,
326
317
  ):
327
318
  """Start a new span as the current span. Useful for manual instrumentation.
328
319
  This is the preferred and more stable way to use manual instrumentation.
@@ -337,32 +328,15 @@ class Laminar:
337
328
  name (str): name of the span
338
329
  input (Any, optional): input to the span. Will be sent as an
339
330
  attribute, so must be json serializable. Defaults to None.
340
- context (Optional[Context], optional): context to start the span in.
341
- Defaults to None.
342
- kind (SpanKind, optional): kind of the span. Defaults to SpanKind.INTERNAL.
343
- attributes (types.Attributes, optional): attributes to set on the span.
344
- Defaults to None.
345
- links ([type], optional): links to set on the span. Defaults to None.
346
- start_time (Optional[int], optional): start time of the span.
347
- Defaults to None.
348
- record_exception (bool, optional): whether to record exceptions.
349
- Defaults to True.
350
- set_status_on_exception (bool, optional): whether to set status on exception.
351
- Defaults to True.
352
- end_on_exit (bool, optional): whether to end the span on exit.
353
- Defaults to True.
354
331
  """
355
332
  with get_tracer() as tracer:
333
+ span_path = get_span_path(name)
334
+ ctx = set_value("span_path", span_path)
335
+ ctx_token = attach(set_value("span_path", span_path))
356
336
  with tracer.start_as_current_span(
357
337
  name,
358
- context=context,
359
- kind=kind,
360
- attributes=attributes,
361
- links=links,
362
- start_time=start_time,
363
- record_exception=record_exception,
364
- set_status_on_exception=set_status_on_exception,
365
- end_on_exit=end_on_exit,
338
+ context=ctx,
339
+ attributes={SPAN_PATH: span_path},
366
340
  ) as span:
367
341
  if input is not None:
368
342
  span.set_attribute(
@@ -371,6 +345,12 @@ class Laminar:
371
345
  )
372
346
  yield span
373
347
 
348
+ # TODO: Figure out if this is necessary
349
+ try:
350
+ detach(ctx_token)
351
+ except Exception:
352
+ pass
353
+
374
354
  @classmethod
375
355
  def set_span_output(cls, output: Any = None):
376
356
  """Set the output of the current span. Useful for manual instrumentation.
@@ -432,10 +412,14 @@ class Laminar:
432
412
  set_association_properties(props)
433
413
 
434
414
  @classmethod
435
- def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
415
+ def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
436
416
  response = requests.post(
437
417
  cls.__base_http_url + "/v1/evaluations",
438
- data=json.dumps({"name": name}),
418
+ data=json.dumps({
419
+ "groupId": group_id,
420
+ "name": name,
421
+ "points": [datapoint.to_dict() for datapoint in data]
422
+ }),
439
423
  headers=cls._headers(),
440
424
  )
441
425
  if response.status_code != 200:
@@ -446,66 +430,6 @@ class Laminar:
446
430
  raise ValueError(f"Error creating evaluation {response.text}")
447
431
  return CreateEvaluationResponse.model_validate(response.json())
448
432
 
449
- @classmethod
450
- def post_evaluation_results(
451
- cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
452
- ) -> requests.Response:
453
- body = {
454
- "evaluationId": str(evaluation_id),
455
- "points": [datapoint.to_dict() for datapoint in data],
456
- }
457
- response = requests.post(
458
- cls.__base_http_url + "/v1/evaluation-datapoints",
459
- data=json.dumps(body),
460
- headers=cls._headers(),
461
- )
462
- if response.status_code != 200:
463
- try:
464
- resp_json = response.json()
465
- raise ValueError(
466
- f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
467
- )
468
- except Exception:
469
- raise ValueError(
470
- f"Failed to send evaluation results. Error: {response.text}"
471
- )
472
- return response
473
-
474
- @classmethod
475
- def update_evaluation_status(
476
- cls, evaluation_id: str, status: str
477
- ) -> UpdateEvaluationResponse:
478
- """
479
- Updates the status of an evaluation. Returns the updated evaluation object.
480
-
481
- Args:
482
- evaluation_id (str): The ID of the evaluation to update.
483
- status (str): The status to set for the evaluation.
484
-
485
- Returns:
486
- UpdateEvaluationResponse: The updated evaluation response.
487
-
488
- Raises:
489
- ValueError: If the request fails.
490
- """
491
- body = {
492
- "status": status,
493
- }
494
- url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
495
-
496
- response = requests.post(
497
- url,
498
- data=json.dumps(body),
499
- headers=cls._headers(),
500
- )
501
- if response.status_code != 200:
502
- raise ValueError(
503
- f"Failed to update evaluation status {evaluation_id}. "
504
- f"Response: {response.text}"
505
- )
506
-
507
- return UpdateEvaluationResponse.model_validate(response.json())
508
-
509
433
  @classmethod
510
434
  def _headers(cls):
511
435
  assert cls.__project_api_key is not None, "Project API key is not set"
@@ -2,7 +2,7 @@ import datetime
2
2
  from enum import Enum
3
3
  import pydantic
4
4
  import requests
5
- from typing import Any, Awaitable, Callable, Literal, Optional, Union
5
+ from typing import Any, Awaitable, Callable, Optional, Union
6
6
  import uuid
7
7
 
8
8
  from .utils import serialize
@@ -107,20 +107,13 @@ EvaluatorFunction = Callable[
107
107
  Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
108
108
  ]
109
109
 
110
- EvaluationStatus = Literal["Started", "Finished", "Error"]
111
-
112
110
 
113
111
  class CreateEvaluationResponse(pydantic.BaseModel):
114
112
  id: uuid.UUID
115
113
  createdAt: datetime.datetime
114
+ groupId: str
116
115
  name: str
117
- status: EvaluationStatus
118
116
  projectId: uuid.UUID
119
- metadata: Optional[dict[str, Any]] = None
120
- averageScores: Optional[dict[str, Numeric]] = None
121
-
122
-
123
- UpdateEvaluationResponse = CreateEvaluationResponse
124
117
 
125
118
 
126
119
  class EvaluationResultDatapoint(pydantic.BaseModel):
@@ -10,8 +10,8 @@ from opentelemetry import context as context_api
10
10
 
11
11
  from lmnr.sdk.utils import get_input_from_func_args, is_method
12
12
  from lmnr.traceloop_sdk.tracing import get_tracer
13
- from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT
14
- from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper
13
+ from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT, SPAN_PATH
14
+ from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper, get_span_path
15
15
  from lmnr.traceloop_sdk.utils.json_encoder import JSONEncoder
16
16
 
17
17
 
@@ -47,7 +47,12 @@ def entity_method(
47
47
 
48
48
  with get_tracer() as tracer:
49
49
  span = tracer.start_span(span_name)
50
- ctx = trace.set_span_in_context(span)
50
+
51
+ span_path = get_span_path(span_name)
52
+ span.set_attribute(SPAN_PATH, span_path)
53
+ ctx = context_api.set_value("span_path", span_path)
54
+
55
+ ctx = trace.set_span_in_context(span, ctx)
51
56
  ctx_token = context_api.attach(ctx)
52
57
 
53
58
  try:
@@ -104,7 +109,12 @@ def aentity_method(
104
109
 
105
110
  with get_tracer() as tracer:
106
111
  span = tracer.start_span(span_name)
107
- ctx = trace.set_span_in_context(span)
112
+
113
+ span_path = get_span_path(span_name)
114
+ span.set_attribute(SPAN_PATH, span_path)
115
+ ctx = context_api.set_value("span_path", span_path)
116
+
117
+ ctx = trace.set_span_in_context(span, ctx)
108
118
  ctx_token = context_api.attach(ctx)
109
119
 
110
120
  try:
@@ -1,6 +1,7 @@
1
1
  SPAN_INPUT = "lmnr.span.input"
2
2
  SPAN_OUTPUT = "lmnr.span.output"
3
3
  SPAN_TYPE = "lmnr.span.type"
4
+ SPAN_PATH = "lmnr.span.path"
4
5
 
5
6
  ASSOCIATION_PROPERTIES = "lmnr.association.properties"
6
7
  SESSION_ID = "session_id"
@@ -25,7 +25,7 @@ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
25
25
 
26
26
  # from lmnr.traceloop_sdk import Telemetry
27
27
  from lmnr.traceloop_sdk.instruments import Instruments
28
- from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES
28
+ from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES, SPAN_PATH
29
29
  from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
30
30
  from lmnr.traceloop_sdk.utils import is_notebook
31
31
  from lmnr.traceloop_sdk.utils.package_check import is_package_installed
@@ -245,6 +245,14 @@ class TracerWrapper(object):
245
245
  self.flush()
246
246
 
247
247
  def _span_processor_on_start(self, span, parent_context):
248
+ span_path = get_value("span_path")
249
+ if span_path is not None:
250
+ # This is done redundantly here for most decorated functions
251
+ # However, need to do this for auto-instrumented libraries.
252
+ # Then, for auto-instrumented ones, they'll attach
253
+ # the final part of the name to the span on the backend.
254
+ span.set_attribute(SPAN_PATH, span_path)
255
+
248
256
  association_properties = get_value("association_properties")
249
257
  if association_properties is not None:
250
258
  _set_association_properties_attributes(span, association_properties)
@@ -318,6 +326,12 @@ def _set_association_properties_attributes(span, properties: dict) -> None:
318
326
  )
319
327
 
320
328
 
329
+ def get_span_path(span_name: str) -> str:
330
+ current_span_path = get_value("span_path")
331
+ span_path = f"{current_span_path}.{span_name}" if current_span_path else span_name
332
+ return span_path
333
+
334
+
321
335
  def set_managed_prompt_tracing_context(
322
336
  key: str,
323
337
  version: int,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes