lmnr 0.4.11__tar.gz → 0.4.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {lmnr-0.4.11 → lmnr-0.4.12}/PKG-INFO +75 -101
  2. {lmnr-0.4.11 → lmnr-0.4.12}/README.md +72 -99
  3. {lmnr-0.4.11 → lmnr-0.4.12}/pyproject.toml +5 -7
  4. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/__init__.py +1 -1
  5. lmnr-0.4.12/src/lmnr/cli.py +39 -0
  6. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/decorators.py +3 -2
  7. lmnr-0.4.12/src/lmnr/sdk/evaluations.py +347 -0
  8. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/laminar.py +81 -44
  9. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/types.py +44 -5
  10. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/utils.py +4 -5
  11. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/__init__.py +3 -42
  12. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/config/__init__.py +0 -4
  13. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/decorators/base.py +16 -9
  14. lmnr-0.4.12/src/lmnr/traceloop_sdk/tracing/attributes.py +8 -0
  15. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tracing/tracing.py +31 -201
  16. lmnr-0.4.11/src/lmnr/sdk/evaluations.py +0 -178
  17. lmnr-0.4.11/src/lmnr/traceloop_sdk/metrics/__init__.py +0 -0
  18. lmnr-0.4.11/src/lmnr/traceloop_sdk/metrics/metrics.py +0 -176
  19. lmnr-0.4.11/src/lmnr/traceloop_sdk/tracing/manual.py +0 -57
  20. {lmnr-0.4.11 → lmnr-0.4.12}/LICENSE +0 -0
  21. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/__init__.py +0 -0
  22. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/sdk/log.py +0 -0
  23. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  24. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/.python-version +0 -0
  25. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  26. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  27. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  28. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  29. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  30. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  31. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  32. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  33. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  34. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  35. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  36. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  37. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  38. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  39. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  40. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  41. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  42. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  43. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  44. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  45. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  46. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  47. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  48. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  49. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  50. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  51. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  52. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  53. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  54. {lmnr-0.4.11 → lmnr-0.4.12}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.11
3
+ Version: 0.4.12
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -11,9 +11,9 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: argparse (>=1.0,<2.0)
14
15
  Requires-Dist: asyncio (>=3.0,<4.0)
15
16
  Requires-Dist: backoff (>=2.0,<3.0)
16
- Requires-Dist: colorama (>=0.4,<0.5)
17
17
  Requires-Dist: deprecated (>=1.0,<2.0)
18
18
  Requires-Dist: jinja2 (>=3.0,<4.0)
19
19
  Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
@@ -54,67 +54,42 @@ Requires-Dist: pydantic (>=2.7,<3.0)
54
54
  Requires-Dist: python-dotenv (>=1.0,<2.0)
55
55
  Requires-Dist: requests (>=2.0,<3.0)
56
56
  Requires-Dist: tenacity (>=8.0,<9.0)
57
+ Requires-Dist: tqdm (>=4.0,<5.0)
57
58
  Description-Content-Type: text/markdown
58
59
 
59
60
  # Laminar Python
60
61
 
61
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
62
+ Python SDK for [Laminar](https://www.lmnr.ai).
63
+
64
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
65
+
66
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
62
67
 
63
68
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
64
69
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
65
70
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
66
71
 
67
72
 
68
-
69
73
  ## Quickstart
70
74
 
71
75
  First, install the package:
72
76
 
73
77
  ```sh
74
- python3 -m venv .myenv
75
- source .myenv/bin/activate # or use your favorite env management tool
76
-
77
78
  pip install lmnr
78
79
  ```
79
80
 
80
- Then, you can initialize Laminar in your main file and instrument your code.
81
+ And then in the code
81
82
 
82
83
  ```python
83
- import os
84
- from openai import OpenAI
85
84
  from lmnr import Laminar as L
86
85
 
87
- L.initialize(
88
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
89
- )
90
-
91
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
92
-
93
- def poem_writer(topic: str):
94
- prompt = f"write a poem about {topic}"
95
-
96
- # OpenAI calls are automatically instrumented
97
- response = client.chat.completions.create(
98
- model="gpt-4o",
99
- messages=[
100
- {"role": "system", "content": "You are a helpful assistant."},
101
- {"role": "user", "content": prompt},
102
- ],
103
- )
104
- poem = response.choices[0].message.content
105
- return poem
106
-
107
- if __name__ == "__main__":
108
- print(poem_writer("laminar flow"))
109
-
86
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
110
87
  ```
111
88
 
112
- Note that you need to only initialize Laminar once in your application.
89
+ This will automatically instrument most of the LLM, Vector DB, and related
90
+ calls with OpenTelemetry-compatible instrumentation.
113
91
 
114
- ### Project API key
115
-
116
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
117
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
92
+ Note that you need to only initialize Laminar once in your application.
118
93
 
119
94
  ## Instrumentation
120
95
 
@@ -195,7 +170,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
195
170
 
196
171
  If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
197
172
 
198
- Majority of the autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
173
+ Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
199
174
 
200
175
  ## Sending events
201
176
 
@@ -223,6 +198,67 @@ L.event("topic alignment", topic in poem)
223
198
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
224
199
  ```
225
200
 
201
+ ## Evaluations
202
+
203
+ ### Quickstart
204
+
205
+ Install the package:
206
+
207
+ ```sh
208
+ pip install lmnr
209
+ ```
210
+
211
+ Create a file named `my_first_eval.py` with the following code:
212
+
213
+ ```python
214
+ from lmnr import evaluate
215
+
216
+ def write_poem(data):
217
+ return f"This is a good poem about {data['topic']}"
218
+
219
+ def contains_poem(output, target):
220
+ return 1 if output in target['poem'] else 0
221
+
222
+ # Evaluation data
223
+ data = [
224
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
225
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
226
+ ]
227
+
228
+ evaluate(
229
+ data=data,
230
+ executor=write_poem,
231
+ evaluators={
232
+ "containsPoem": contains_poem
233
+ }
234
+ )
235
+ ```
236
+
237
+ Run the following commands:
238
+
239
+ ```sh
240
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
241
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
242
+ ```
243
+
244
+ Visit the URL printed in the console to see the results.
245
+
246
+ ### Overview
247
+
248
+ Bring rigor to the development of your LLM applications with evaluations.
249
+
250
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
251
+
252
+ `evaluate` takes in the following parameters:
253
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
254
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
255
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
256
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
257
+
258
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
259
+
260
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
261
+
226
262
  ## Laminar pipelines as prompt chain managers
227
263
 
228
264
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -257,65 +293,3 @@ PipelineRunResponse(
257
293
  )
258
294
  ```
259
295
 
260
- ## Running offline evaluations on your data
261
-
262
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
263
-
264
- Evaluation takes in the following parameters:
265
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
266
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
267
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
268
- - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
269
-
270
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
271
-
272
- ### Example
273
-
274
- ```python
275
- from openai import AsyncOpenAI
276
- import asyncio
277
- import os
278
-
279
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
280
-
281
- async def get_capital(data):
282
- country = data["country"]
283
- response = await openai_client.chat.completions.create(
284
- model="gpt-4o-mini",
285
- messages=[
286
- {"role": "system", "content": "You are a helpful assistant."},
287
- {
288
- "role": "user",
289
- "content": f"What is the capital of {country}? Just name the "
290
- "city and nothing else",
291
- },
292
- ],
293
- )
294
- return response.choices[0].message.content.strip()
295
-
296
-
297
- # Evaluation data
298
- data = [
299
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
300
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
301
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
302
- ]
303
-
304
-
305
- def evaluator_A(output, target):
306
- return 1 if output == target["capital"] else 0
307
-
308
-
309
- # Create an Evaluation instance
310
- e = Evaluation(
311
- name="py-evaluation-async",
312
- data=data,
313
- executor=get_capital,
314
- evaluators=[evaluator_A],
315
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
316
- )
317
-
318
- # Run the evaluation
319
- asyncio.run(e.run())
320
- ```
321
-
@@ -1,62 +1,36 @@
1
1
  # Laminar Python
2
2
 
3
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
3
+ Python SDK for [Laminar](https://www.lmnr.ai).
4
+
5
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
6
+
7
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
4
8
 
5
9
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
6
10
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
7
11
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
8
12
 
9
13
 
10
-
11
14
  ## Quickstart
12
15
 
13
16
  First, install the package:
14
17
 
15
18
  ```sh
16
- python3 -m venv .myenv
17
- source .myenv/bin/activate # or use your favorite env management tool
18
-
19
19
  pip install lmnr
20
20
  ```
21
21
 
22
- Then, you can initialize Laminar in your main file and instrument your code.
22
+ And then in the code
23
23
 
24
24
  ```python
25
- import os
26
- from openai import OpenAI
27
25
  from lmnr import Laminar as L
28
26
 
29
- L.initialize(
30
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
31
- )
32
-
33
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
34
-
35
- def poem_writer(topic: str):
36
- prompt = f"write a poem about {topic}"
37
-
38
- # OpenAI calls are automatically instrumented
39
- response = client.chat.completions.create(
40
- model="gpt-4o",
41
- messages=[
42
- {"role": "system", "content": "You are a helpful assistant."},
43
- {"role": "user", "content": prompt},
44
- ],
45
- )
46
- poem = response.choices[0].message.content
47
- return poem
48
-
49
- if __name__ == "__main__":
50
- print(poem_writer("laminar flow"))
51
-
27
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
52
28
  ```
53
29
 
54
- Note that you need to only initialize Laminar once in your application.
30
+ This will automatically instrument most of the LLM, Vector DB, and related
31
+ calls with OpenTelemetry-compatible instrumentation.
55
32
 
56
- ### Project API key
57
-
58
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
59
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
33
+ Note that you need to only initialize Laminar once in your application.
60
34
 
61
35
  ## Instrumentation
62
36
 
@@ -137,7 +111,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
137
111
 
138
112
  If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
139
113
 
140
- Majority of the autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
114
+ Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
141
115
 
142
116
  ## Sending events
143
117
 
@@ -165,6 +139,67 @@ L.event("topic alignment", topic in poem)
165
139
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
166
140
  ```
167
141
 
142
+ ## Evaluations
143
+
144
+ ### Quickstart
145
+
146
+ Install the package:
147
+
148
+ ```sh
149
+ pip install lmnr
150
+ ```
151
+
152
+ Create a file named `my_first_eval.py` with the following code:
153
+
154
+ ```python
155
+ from lmnr import evaluate
156
+
157
+ def write_poem(data):
158
+ return f"This is a good poem about {data['topic']}"
159
+
160
+ def contains_poem(output, target):
161
+ return 1 if output in target['poem'] else 0
162
+
163
+ # Evaluation data
164
+ data = [
165
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
166
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
167
+ ]
168
+
169
+ evaluate(
170
+ data=data,
171
+ executor=write_poem,
172
+ evaluators={
173
+ "containsPoem": contains_poem
174
+ }
175
+ )
176
+ ```
177
+
178
+ Run the following commands:
179
+
180
+ ```sh
181
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
182
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
183
+ ```
184
+
185
+ Visit the URL printed in the console to see the results.
186
+
187
+ ### Overview
188
+
189
+ Bring rigor to the development of your LLM applications with evaluations.
190
+
191
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
192
+
193
+ `evaluate` takes in the following parameters:
194
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
195
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
196
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
197
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
198
+
199
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
200
+
201
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
202
+
168
203
  ## Laminar pipelines as prompt chain managers
169
204
 
170
205
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -198,65 +233,3 @@ PipelineRunResponse(
198
233
  run_id='53b012d5-5759-48a6-a9c5-0011610e3669'
199
234
  )
200
235
  ```
201
-
202
- ## Running offline evaluations on your data
203
-
204
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
205
-
206
- Evaluation takes in the following parameters:
207
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
208
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
209
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
210
- - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
211
-
212
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
213
-
214
- ### Example
215
-
216
- ```python
217
- from openai import AsyncOpenAI
218
- import asyncio
219
- import os
220
-
221
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
222
-
223
- async def get_capital(data):
224
- country = data["country"]
225
- response = await openai_client.chat.completions.create(
226
- model="gpt-4o-mini",
227
- messages=[
228
- {"role": "system", "content": "You are a helpful assistant."},
229
- {
230
- "role": "user",
231
- "content": f"What is the capital of {country}? Just name the "
232
- "city and nothing else",
233
- },
234
- ],
235
- )
236
- return response.choices[0].message.content.strip()
237
-
238
-
239
- # Evaluation data
240
- data = [
241
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
242
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
243
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
244
- ]
245
-
246
-
247
- def evaluator_A(output, target):
248
- return 1 if output == target["capital"] else 0
249
-
250
-
251
- # Create an Evaluation instance
252
- e = Evaluation(
253
- name="py-evaluation-async",
254
- data=data,
255
- executor=get_capital,
256
- evaluators=[evaluator_A],
257
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
258
- )
259
-
260
- # Run the evaluation
261
- asyncio.run(e.run())
262
- ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.11"
3
+ version = "0.4.12"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.11"
14
+ version = "0.4.12"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -33,7 +33,6 @@ opentelemetry-instrumentation-sqlalchemy = "^0.48b0"
33
33
  opentelemetry-instrumentation-urllib3 = "^0.48b0"
34
34
  opentelemetry-instrumentation-threading = "^0.48b0"
35
35
  opentelemetry-semantic-conventions-ai = "0.4.1"
36
- colorama = "^0.4"
37
36
  tenacity = "~=8.0"
38
37
  jinja2 = "~=3.0"
39
38
  deprecated = "~=1.0"
@@ -62,6 +61,8 @@ opentelemetry-instrumentation-weaviate = "^0.30.0"
62
61
  opentelemetry-instrumentation-alephalpha = "^0.30.0"
63
62
  opentelemetry-instrumentation-marqo = "^0.30.0"
64
63
  opentelemetry-instrumentation-groq = "^0.30.0"
64
+ tqdm = "~=4.0"
65
+ argparse = "~=1.0"
65
66
 
66
67
  [tool.poetry.group.dev.dependencies]
67
68
  autopep8 = "^2.2.0"
@@ -83,11 +84,8 @@ langchain-openai = "^0.1.15"
83
84
  requires = ["poetry-core"]
84
85
  build-backend = "poetry.core.masonry.api"
85
86
 
86
- [project.entry-points.console_scripts]
87
- lmnr = "lmnr.cli.cli:cli"
88
-
89
87
  [tool.poetry.scripts]
90
- lmnr = "lmnr.cli.cli:cli"
88
+ lmnr = "lmnr.cli:cli"
91
89
 
92
90
  [project.optional-dependencies]
93
91
  test = ["pytest"]
@@ -1,4 +1,4 @@
1
- from .sdk.evaluations import Evaluation
1
+ from .sdk.evaluations import evaluate
2
2
  from .sdk.laminar import Laminar
3
3
  from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
4
  from .sdk.decorators import observe
@@ -0,0 +1,39 @@
1
+ from argparse import ArgumentParser
2
+ import asyncio
3
+ import importlib
4
+ import os
5
+ import sys
6
+
7
+ from lmnr.sdk.evaluations import set_global_evaluation
8
+
9
+
10
+ # TODO: Refactor this code
11
+ async def run_evaluation(args):
12
+ sys.path.insert(0, os.getcwd())
13
+
14
+ with set_global_evaluation(True):
15
+ file = os.path.abspath(args.file)
16
+
17
+ spec = importlib.util.spec_from_file_location("run_eval", file)
18
+ mod = importlib.util.module_from_spec(spec)
19
+ spec.loader.exec_module(mod)
20
+
21
+ from lmnr.sdk.evaluations import _evaluation
22
+ evaluation = _evaluation
23
+ await evaluation.run()
24
+
25
+
26
+ def cli():
27
+ parser = ArgumentParser(
28
+ prog="lmnr",
29
+ description="CLI for Laminar",
30
+ )
31
+
32
+ subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
33
+
34
+ parser_eval = subparsers.add_parser("eval", description="Run an evaluation")
35
+ parser_eval.add_argument("file", help="A file containing the evaluation to run")
36
+ parser_eval.set_defaults(func=run_evaluation)
37
+
38
+ parsed = parser.parse_args()
39
+ asyncio.run(parsed.func(parsed))
@@ -6,6 +6,7 @@ from opentelemetry.trace import INVALID_SPAN, get_current_span
6
6
 
7
7
  from typing import Callable, Optional, cast
8
8
 
9
+ from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
9
10
  from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
10
11
 
11
12
  from .utils import is_async
@@ -43,11 +44,11 @@ def observe(
43
44
  if current_span != INVALID_SPAN:
44
45
  if session_id is not None:
45
46
  current_span.set_attribute(
46
- "traceloop.association.properties.session_id", session_id
47
+ SESSION_ID, session_id
47
48
  )
48
49
  if user_id is not None:
49
50
  current_span.set_attribute(
50
- "traceloop.association.properties.user_id", user_id
51
+ USER_ID, user_id
51
52
  )
52
53
  association_properties = {}
53
54
  if session_id is not None: