lmnr 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ import logging
3
3
  import os
4
4
 
5
5
 
6
- from colorama import Fore
7
6
  from opentelemetry import trace
8
7
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
9
8
  OTLPSpanExporter as HTTPExporter,
@@ -24,17 +23,16 @@ from opentelemetry.trace import get_tracer_provider, ProxyTracerProvider
24
23
  from opentelemetry.context import get_value, attach, set_value
25
24
  from opentelemetry.instrumentation.threading import ThreadingInstrumentor
26
25
 
27
- from opentelemetry.semconv_ai import SpanAttributes
28
-
29
26
  # from lmnr.traceloop_sdk import Telemetry
30
27
  from lmnr.traceloop_sdk.instruments import Instruments
28
+ from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES
31
29
  from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
32
30
  from lmnr.traceloop_sdk.utils import is_notebook
33
31
  from lmnr.traceloop_sdk.utils.package_check import is_package_installed
34
32
  from typing import Dict, Optional, Set
35
33
 
36
34
 
37
- TRACER_NAME = "traceloop.tracer"
35
+ TRACER_NAME = "lmnr.tracer"
38
36
  EXCLUDED_URLS = """
39
37
  iam.cloud.ibm.com,
40
38
  dataplatform.cloud.ibm.com,
@@ -44,7 +42,7 @@ EXCLUDED_URLS = """
44
42
  api.anthropic.com,
45
43
  api.cohere.ai,
46
44
  pinecone.io,
47
- traceloop.com,
45
+ api.lmnr.ai,
48
46
  posthog.com,
49
47
  sentry.io,
50
48
  bedrock-runtime,
@@ -124,196 +122,109 @@ class TracerWrapper(object):
124
122
  # this makes sure otel context is propagated so we always want it
125
123
  ThreadingInstrumentor().instrument()
126
124
 
127
- instrument_set = False
128
125
  if instruments is None:
129
126
  init_instrumentations(should_enrich_metrics)
130
- instrument_set = True
131
127
  else:
132
128
  for instrument in instruments:
133
129
  if instrument == Instruments.OPENAI:
134
130
  if not init_openai_instrumentor(should_enrich_metrics):
135
- print(Fore.RED + "Warning: OpenAI library does not exist.")
136
- print(Fore.RESET)
137
- else:
138
- instrument_set = True
131
+ print("Warning: OpenAI library does not exist.")
139
132
  elif instrument == Instruments.ANTHROPIC:
140
133
  if not init_anthropic_instrumentor(should_enrich_metrics):
141
134
  print(
142
- Fore.RED + "Warning: Anthropic library does not exist."
135
+ "Warning: Anthropic library does not exist."
143
136
  )
144
- print(Fore.RESET)
145
- else:
146
- instrument_set = True
147
137
  elif instrument == Instruments.COHERE:
148
138
  if not init_cohere_instrumentor():
149
- print(Fore.RED + "Warning: Cohere library does not exist.")
150
- print(Fore.RESET)
151
- else:
152
- instrument_set = True
139
+ print("Warning: Cohere library does not exist.")
153
140
  elif instrument == Instruments.PINECONE:
154
141
  if not init_pinecone_instrumentor():
155
142
  print(
156
- Fore.RED + "Warning: Pinecone library does not exist."
143
+ "Warning: Pinecone library does not exist."
157
144
  )
158
- print(Fore.RESET)
159
- else:
160
- instrument_set = True
161
145
  elif instrument == Instruments.CHROMA:
162
146
  if not init_chroma_instrumentor():
163
- print(Fore.RED + "Warning: Chroma library does not exist.")
164
- print(Fore.RESET)
165
- else:
166
- instrument_set = True
147
+ print("Warning: Chroma library does not exist.")
167
148
  elif instrument == Instruments.GOOGLE_GENERATIVEAI:
168
149
  if not init_google_generativeai_instrumentor():
169
- print(
170
- Fore.RED
171
- + "Warning: Google Generative AI library does not exist."
172
- )
173
- print(Fore.RESET)
174
- else:
175
- instrument_set = True
150
+ print("Warning: Google Generative AI library does not exist.")
176
151
  elif instrument == Instruments.LANGCHAIN:
177
152
  if not init_langchain_instrumentor():
178
153
  print(
179
- Fore.RED + "Warning: LangChain library does not exist."
154
+ "Warning: LangChain library does not exist."
180
155
  )
181
- print(Fore.RESET)
182
- else:
183
- instrument_set = True
184
156
  elif instrument == Instruments.MISTRAL:
185
157
  if not init_mistralai_instrumentor():
186
158
  print(
187
- Fore.RED + "Warning: MistralAI library does not exist."
159
+ "Warning: MistralAI library does not exist."
188
160
  )
189
- print(Fore.RESET)
190
- else:
191
- instrument_set = True
192
161
  elif instrument == Instruments.OLLAMA:
193
162
  if not init_ollama_instrumentor():
194
- print(Fore.RED + "Warning: Ollama library does not exist.")
195
- print(Fore.RESET)
196
- else:
197
- instrument_set = True
163
+ print("Warning: Ollama library does not exist.")
198
164
  elif instrument == Instruments.LLAMA_INDEX:
199
165
  if not init_llama_index_instrumentor():
200
166
  print(
201
- Fore.RED + "Warning: LlamaIndex library does not exist."
167
+ "Warning: LlamaIndex library does not exist."
202
168
  )
203
- print(Fore.RESET)
204
- else:
205
- instrument_set = True
206
169
  elif instrument == Instruments.MILVUS:
207
170
  if not init_milvus_instrumentor():
208
- print(Fore.RED + "Warning: Milvus library does not exist.")
209
- print(Fore.RESET)
210
- else:
211
- instrument_set = True
171
+ print("Warning: Milvus library does not exist.")
212
172
  elif instrument == Instruments.TRANSFORMERS:
213
173
  if not init_transformers_instrumentor():
214
- print(
215
- Fore.RED
216
- + "Warning: Transformers library does not exist."
217
- )
218
- print(Fore.RESET)
219
- else:
220
- instrument_set = True
174
+ print("Warning: Transformers library does not exist.")
221
175
  elif instrument == Instruments.TOGETHER:
222
176
  if not init_together_instrumentor():
223
177
  print(
224
- Fore.RED + "Warning: TogetherAI library does not exist."
178
+ "Warning: TogetherAI library does not exist."
225
179
  )
226
- print(Fore.RESET)
227
- else:
228
- instrument_set = True
229
180
  elif instrument == Instruments.REQUESTS:
230
181
  if not init_requests_instrumentor():
231
182
  print(
232
- Fore.RED + "Warning: Requests library does not exist."
183
+ "Warning: Requests library does not exist."
233
184
  )
234
- print(Fore.RESET)
235
- else:
236
- instrument_set = True
237
185
  elif instrument == Instruments.URLLIB3:
238
186
  if not init_urllib3_instrumentor():
239
- print(Fore.RED + "Warning: urllib3 library does not exist.")
240
- print(Fore.RESET)
241
- else:
242
- instrument_set = True
187
+ print("Warning: urllib3 library does not exist.")
243
188
  elif instrument == Instruments.PYMYSQL:
244
189
  if not init_pymysql_instrumentor():
245
- print(Fore.RED + "Warning: PyMySQL library does not exist.")
246
- print(Fore.RESET)
247
- else:
248
- instrument_set = True
190
+ print("Warning: PyMySQL library does not exist.")
249
191
  elif instrument == Instruments.BEDROCK:
250
192
  if not init_bedrock_instrumentor(should_enrich_metrics):
251
- print(Fore.RED + "Warning: Bedrock library does not exist.")
252
- print(Fore.RESET)
253
- else:
254
- instrument_set = True
193
+ print("Warning: Bedrock library does not exist.")
255
194
  elif instrument == Instruments.REPLICATE:
256
195
  if not init_replicate_instrumentor():
257
196
  print(
258
- Fore.RED + "Warning: Replicate library does not exist."
197
+ "Warning: Replicate library does not exist."
259
198
  )
260
- print(Fore.RESET)
261
- else:
262
- instrument_set = True
263
199
  elif instrument == Instruments.VERTEXAI:
264
200
  if not init_vertexai_instrumentor():
265
201
  print(
266
- Fore.RED + "Warning: Vertex AI library does not exist."
202
+ "Warning: Vertex AI library does not exist."
267
203
  )
268
- print(Fore.RESET)
269
- else:
270
- instrument_set = True
271
204
  elif instrument == Instruments.WATSONX:
272
205
  if not init_watsonx_instrumentor():
273
- print(Fore.RED + "Warning: Watsonx library does not exist.")
274
- print(Fore.RESET)
275
- else:
276
- instrument_set = True
206
+ print("Warning: Watsonx library does not exist.")
277
207
  elif instrument == Instruments.WEAVIATE:
278
208
  if not init_weaviate_instrumentor():
279
209
  print(
280
- Fore.RED + "Warning: Weaviate library does not exist."
210
+ "Warning: Weaviate library does not exist."
281
211
  )
282
- print(Fore.RESET)
283
- else:
284
- instrument_set = True
285
212
  elif instrument == Instruments.ALEPHALPHA:
286
213
  if not init_alephalpha_instrumentor():
287
- print(
288
- Fore.RED
289
- + "Warning: Aleph Alpha library does not exist."
290
- )
291
- print(Fore.RESET)
292
- else:
293
- instrument_set = True
214
+ print("Warning: Aleph Alpha library does not exist.")
294
215
  elif instrument == Instruments.MARQO:
295
216
  if not init_marqo_instrumentor():
296
- print(Fore.RED + "Warning: marqo library does not exist.")
297
- print(Fore.RESET)
298
- else:
299
- instrument_set = True
217
+ print("Warning: marqo library does not exist.")
300
218
  elif instrument == Instruments.LANCEDB:
301
219
  if not init_lancedb_instrumentor():
302
- print(Fore.RED + "Warning: LanceDB library does not exist.")
303
- print(Fore.RESET)
304
- else:
305
- instrument_set = True
220
+ print("Warning: LanceDB library does not exist.")
306
221
  elif instrument == Instruments.REDIS:
307
222
  if not init_redis_instrumentor():
308
- print(Fore.RED + "Warning: redis library does not exist.")
309
- print(Fore.RESET)
310
- else:
311
- instrument_set = True
223
+ print("Warning: redis library does not exist.")
312
224
 
313
225
  else:
314
226
  print(
315
- Fore.RED
316
- + "Warning: "
227
+ "Warning: "
317
228
  + instrument
318
229
  + " instrumentation does not exist."
319
230
  )
@@ -322,14 +233,6 @@ class TracerWrapper(object):
322
233
  + "from lmnr.traceloop_sdk.instruments import Instruments\n"
323
234
  + 'Traceloop.init(app_name="...", instruments=set([Instruments.OPENAI]))'
324
235
  )
325
- print(Fore.RESET)
326
-
327
- if not instrument_set:
328
- print(
329
- Fore.RED + "Warning: No valid instruments set. Remove 'instrument' "
330
- "argument to use all instruments, or set a valid instrument."
331
- )
332
- print(Fore.RESET)
333
236
 
334
237
  obj.__content_allow_list = ContentAllowList()
335
238
 
@@ -352,49 +255,6 @@ class TracerWrapper(object):
352
255
  else:
353
256
  attach(set_value("override_enable_content_tracing", False))
354
257
 
355
- if is_llm_span(span):
356
- managed_prompt = get_value("managed_prompt")
357
- if managed_prompt is not None:
358
- span.set_attribute(
359
- SpanAttributes.TRACELOOP_PROMPT_MANAGED, managed_prompt
360
- )
361
-
362
- prompt_key = get_value("prompt_key")
363
- if prompt_key is not None:
364
- span.set_attribute(SpanAttributes.TRACELOOP_PROMPT_KEY, prompt_key)
365
-
366
- prompt_version = get_value("prompt_version")
367
- if prompt_version is not None:
368
- span.set_attribute(
369
- SpanAttributes.TRACELOOP_PROMPT_VERSION, prompt_version
370
- )
371
-
372
- prompt_version_name = get_value("prompt_version_name")
373
- if prompt_version_name is not None:
374
- span.set_attribute(
375
- SpanAttributes.TRACELOOP_PROMPT_VERSION_NAME, prompt_version_name
376
- )
377
-
378
- prompt_version_hash = get_value("prompt_version_hash")
379
- if prompt_version_hash is not None:
380
- span.set_attribute(
381
- SpanAttributes.TRACELOOP_PROMPT_VERSION_HASH, prompt_version_hash
382
- )
383
-
384
- prompt_template = get_value("prompt_template")
385
- if prompt_template is not None:
386
- span.set_attribute(
387
- SpanAttributes.TRACELOOP_PROMPT_TEMPLATE, prompt_template
388
- )
389
-
390
- prompt_template_variables = get_value("prompt_template_variables")
391
- if prompt_template_variables is not None:
392
- for key, value in prompt_template_variables.items():
393
- span.set_attribute(
394
- f"{SpanAttributes.TRACELOOP_PROMPT_TEMPLATE_VARIABLES}.{key}",
395
- value,
396
- )
397
-
398
258
  # Call original on_start method if it exists in custom processor
399
259
  if self.__spans_processor_original_on_start:
400
260
  self.__spans_processor_original_on_start(span, parent_context)
@@ -419,11 +279,7 @@ class TracerWrapper(object):
419
279
  if (os.getenv("TRACELOOP_SUPPRESS_WARNINGS") or "false").lower() == "true":
420
280
  return False
421
281
 
422
- print(
423
- Fore.RED
424
- + "Warning: Traceloop not initialized, make sure you call Traceloop.init()"
425
- )
426
- print(Fore.RESET)
282
+ print("Warning: Laminar not initialized, make sure to initialize")
427
283
  return False
428
284
 
429
285
  def flush(self):
@@ -458,7 +314,7 @@ def update_association_properties(properties: dict) -> None:
458
314
  def _set_association_properties_attributes(span, properties: dict) -> None:
459
315
  for key, value in properties.items():
460
316
  span.set_attribute(
461
- f"{SpanAttributes.TRACELOOP_ASSOCIATION_PROPERTIES}.{key}", value
317
+ f"{ASSOCIATION_PROPERTIES}.{key}", value
462
318
  )
463
319
 
464
320
 
@@ -486,10 +342,6 @@ def set_external_prompt_tracing_context(
486
342
  attach(set_value("prompt_template_variables", variables))
487
343
 
488
344
 
489
- def is_llm_span(span) -> bool:
490
- return span.attributes.get(SpanAttributes.LLM_REQUEST_TYPE) is not None
491
-
492
-
493
345
  def init_spans_exporter(api_endpoint: str, headers: Dict[str, str]) -> SpanExporter:
494
346
  if "http" in api_endpoint.lower() or "https" in api_endpoint.lower():
495
347
  return HTTPExporter(endpoint=f"{api_endpoint}/v1/traces", headers=headers)
@@ -558,7 +410,6 @@ def init_openai_instrumentor(should_enrich_metrics: bool):
558
410
  # exception_logger=lambda e: Telemetry().log_exception(e),
559
411
  enrich_assistant=should_enrich_metrics,
560
412
  enrich_token_usage=should_enrich_metrics,
561
- get_common_metrics_attributes=metrics_common_attributes,
562
413
  )
563
414
  if not instrumentor.is_instrumented_by_opentelemetry:
564
415
  instrumentor.instrument()
@@ -579,7 +430,6 @@ def init_anthropic_instrumentor(should_enrich_metrics: bool):
579
430
  instrumentor = AnthropicInstrumentor(
580
431
  # exception_logger=lambda e: Telemetry().log_exception(e),
581
432
  enrich_token_usage=should_enrich_metrics,
582
- get_common_metrics_attributes=metrics_common_attributes,
583
433
  )
584
434
  if not instrumentor.is_instrumented_by_opentelemetry:
585
435
  instrumentor.instrument()
@@ -1047,23 +897,3 @@ def init_groq_instrumentor():
1047
897
  logging.error(f"Error initializing Groq instrumentor: {e}")
1048
898
  # Telemetry().log_exception(e)
1049
899
  return False
1050
-
1051
-
1052
- def metrics_common_attributes():
1053
- common_attributes = {}
1054
- workflow_name = get_value("workflow_name")
1055
- if workflow_name is not None:
1056
- common_attributes[SpanAttributes.TRACELOOP_WORKFLOW_NAME] = workflow_name
1057
-
1058
- entity_name = get_value("entity_name")
1059
- if entity_name is not None:
1060
- common_attributes[SpanAttributes.TRACELOOP_ENTITY_NAME] = entity_name
1061
-
1062
- association_properties = get_value("association_properties")
1063
- if association_properties is not None:
1064
- for key, value in association_properties.items():
1065
- common_attributes[
1066
- f"{SpanAttributes.TRACELOOP_ASSOCIATION_PROPERTIES}.{key}"
1067
- ] = value
1068
-
1069
- return common_attributes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.11
3
+ Version: 0.4.12
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -11,9 +11,9 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: argparse (>=1.0,<2.0)
14
15
  Requires-Dist: asyncio (>=3.0,<4.0)
15
16
  Requires-Dist: backoff (>=2.0,<3.0)
16
- Requires-Dist: colorama (>=0.4,<0.5)
17
17
  Requires-Dist: deprecated (>=1.0,<2.0)
18
18
  Requires-Dist: jinja2 (>=3.0,<4.0)
19
19
  Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
@@ -54,67 +54,42 @@ Requires-Dist: pydantic (>=2.7,<3.0)
54
54
  Requires-Dist: python-dotenv (>=1.0,<2.0)
55
55
  Requires-Dist: requests (>=2.0,<3.0)
56
56
  Requires-Dist: tenacity (>=8.0,<9.0)
57
+ Requires-Dist: tqdm (>=4.0,<5.0)
57
58
  Description-Content-Type: text/markdown
58
59
 
59
60
  # Laminar Python
60
61
 
61
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
62
+ Python SDK for [Laminar](https://www.lmnr.ai).
63
+
64
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
65
+
66
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
62
67
 
63
68
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
64
69
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
65
70
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
66
71
 
67
72
 
68
-
69
73
  ## Quickstart
70
74
 
71
75
  First, install the package:
72
76
 
73
77
  ```sh
74
- python3 -m venv .myenv
75
- source .myenv/bin/activate # or use your favorite env management tool
76
-
77
78
  pip install lmnr
78
79
  ```
79
80
 
80
- Then, you can initialize Laminar in your main file and instrument your code.
81
+ And then in the code
81
82
 
82
83
  ```python
83
- import os
84
- from openai import OpenAI
85
84
  from lmnr import Laminar as L
86
85
 
87
- L.initialize(
88
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
89
- )
90
-
91
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
92
-
93
- def poem_writer(topic: str):
94
- prompt = f"write a poem about {topic}"
95
-
96
- # OpenAI calls are automatically instrumented
97
- response = client.chat.completions.create(
98
- model="gpt-4o",
99
- messages=[
100
- {"role": "system", "content": "You are a helpful assistant."},
101
- {"role": "user", "content": prompt},
102
- ],
103
- )
104
- poem = response.choices[0].message.content
105
- return poem
106
-
107
- if __name__ == "__main__":
108
- print(poem_writer("laminar flow"))
109
-
86
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
110
87
  ```
111
88
 
112
- Note that you need to only initialize Laminar once in your application.
89
+ This will automatically instrument most of the LLM, Vector DB, and related
90
+ calls with OpenTelemetry-compatible instrumentation.
113
91
 
114
- ### Project API key
115
-
116
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
117
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
92
+ Note that you need to only initialize Laminar once in your application.
118
93
 
119
94
  ## Instrumentation
120
95
 
@@ -195,7 +170,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
195
170
 
196
171
  If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
197
172
 
198
- Majority of the autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
173
+ Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
199
174
 
200
175
  ## Sending events
201
176
 
@@ -223,6 +198,67 @@ L.event("topic alignment", topic in poem)
223
198
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
224
199
  ```
225
200
 
201
+ ## Evaluations
202
+
203
+ ### Quickstart
204
+
205
+ Install the package:
206
+
207
+ ```sh
208
+ pip install lmnr
209
+ ```
210
+
211
+ Create a file named `my_first_eval.py` with the following code:
212
+
213
+ ```python
214
+ from lmnr import evaluate
215
+
216
+ def write_poem(data):
217
+ return f"This is a good poem about {data['topic']}"
218
+
219
+ def contains_poem(output, target):
220
+ return 1 if output in target['poem'] else 0
221
+
222
+ # Evaluation data
223
+ data = [
224
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
225
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
226
+ ]
227
+
228
+ evaluate(
229
+ data=data,
230
+ executor=write_poem,
231
+ evaluators={
232
+ "containsPoem": contains_poem
233
+ }
234
+ )
235
+ ```
236
+
237
+ Run the following commands:
238
+
239
+ ```sh
240
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
241
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
242
+ ```
243
+
244
+ Visit the URL printed in the console to see the results.
245
+
246
+ ### Overview
247
+
248
+ Bring rigor to the development of your LLM applications with evaluations.
249
+
250
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
251
+
252
+ `evaluate` takes in the following parameters:
253
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
254
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
255
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
256
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
257
+
258
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
259
+
260
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
261
+
226
262
  ## Laminar pipelines as prompt chain managers
227
263
 
228
264
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -257,65 +293,3 @@ PipelineRunResponse(
257
293
  )
258
294
  ```
259
295
 
260
- ## Running offline evaluations on your data
261
-
262
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
263
-
264
- Evaluation takes in the following parameters:
265
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
266
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
267
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
268
- - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
269
-
270
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
271
-
272
- ### Example
273
-
274
- ```python
275
- from openai import AsyncOpenAI
276
- import asyncio
277
- import os
278
-
279
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
280
-
281
- async def get_capital(data):
282
- country = data["country"]
283
- response = await openai_client.chat.completions.create(
284
- model="gpt-4o-mini",
285
- messages=[
286
- {"role": "system", "content": "You are a helpful assistant."},
287
- {
288
- "role": "user",
289
- "content": f"What is the capital of {country}? Just name the "
290
- "city and nothing else",
291
- },
292
- ],
293
- )
294
- return response.choices[0].message.content.strip()
295
-
296
-
297
- # Evaluation data
298
- data = [
299
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
300
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
301
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
302
- ]
303
-
304
-
305
- def evaluator_A(output, target):
306
- return 1 if output == target["capital"] else 0
307
-
308
-
309
- # Create an Evaluation instance
310
- e = Evaluation(
311
- name="py-evaluation-async",
312
- data=data,
313
- executor=get_capital,
314
- evaluators=[evaluator_A],
315
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
316
- )
317
-
318
- # Run the evaluation
319
- asyncio.run(e.run())
320
- ```
321
-