lmnr 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +1 -1
- lmnr/cli.py +39 -0
- lmnr/sdk/decorators.py +3 -2
- lmnr/sdk/evaluations.py +245 -76
- lmnr/sdk/laminar.py +81 -44
- lmnr/sdk/types.py +44 -5
- lmnr/sdk/utils.py +4 -5
- lmnr/traceloop_sdk/__init__.py +3 -42
- lmnr/traceloop_sdk/config/__init__.py +0 -4
- lmnr/traceloop_sdk/decorators/base.py +16 -9
- lmnr/traceloop_sdk/tracing/attributes.py +8 -0
- lmnr/traceloop_sdk/tracing/tracing.py +31 -201
- {lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/METADATA +75 -101
- {lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/RECORD +17 -18
- lmnr-0.4.12.dist-info/entry_points.txt +3 -0
- lmnr/traceloop_sdk/metrics/__init__.py +0 -0
- lmnr/traceloop_sdk/metrics/metrics.py +0 -176
- lmnr/traceloop_sdk/tracing/manual.py +0 -57
- lmnr-0.4.11.dist-info/entry_points.txt +0 -3
- {lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/LICENSE +0 -0
- {lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/WHEEL +0 -0
@@ -3,7 +3,6 @@ import logging
|
|
3
3
|
import os
|
4
4
|
|
5
5
|
|
6
|
-
from colorama import Fore
|
7
6
|
from opentelemetry import trace
|
8
7
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
9
8
|
OTLPSpanExporter as HTTPExporter,
|
@@ -24,17 +23,16 @@ from opentelemetry.trace import get_tracer_provider, ProxyTracerProvider
|
|
24
23
|
from opentelemetry.context import get_value, attach, set_value
|
25
24
|
from opentelemetry.instrumentation.threading import ThreadingInstrumentor
|
26
25
|
|
27
|
-
from opentelemetry.semconv_ai import SpanAttributes
|
28
|
-
|
29
26
|
# from lmnr.traceloop_sdk import Telemetry
|
30
27
|
from lmnr.traceloop_sdk.instruments import Instruments
|
28
|
+
from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES
|
31
29
|
from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
|
32
30
|
from lmnr.traceloop_sdk.utils import is_notebook
|
33
31
|
from lmnr.traceloop_sdk.utils.package_check import is_package_installed
|
34
32
|
from typing import Dict, Optional, Set
|
35
33
|
|
36
34
|
|
37
|
-
TRACER_NAME = "
|
35
|
+
TRACER_NAME = "lmnr.tracer"
|
38
36
|
EXCLUDED_URLS = """
|
39
37
|
iam.cloud.ibm.com,
|
40
38
|
dataplatform.cloud.ibm.com,
|
@@ -44,7 +42,7 @@ EXCLUDED_URLS = """
|
|
44
42
|
api.anthropic.com,
|
45
43
|
api.cohere.ai,
|
46
44
|
pinecone.io,
|
47
|
-
|
45
|
+
api.lmnr.ai,
|
48
46
|
posthog.com,
|
49
47
|
sentry.io,
|
50
48
|
bedrock-runtime,
|
@@ -124,196 +122,109 @@ class TracerWrapper(object):
|
|
124
122
|
# this makes sure otel context is propagated so we always want it
|
125
123
|
ThreadingInstrumentor().instrument()
|
126
124
|
|
127
|
-
instrument_set = False
|
128
125
|
if instruments is None:
|
129
126
|
init_instrumentations(should_enrich_metrics)
|
130
|
-
instrument_set = True
|
131
127
|
else:
|
132
128
|
for instrument in instruments:
|
133
129
|
if instrument == Instruments.OPENAI:
|
134
130
|
if not init_openai_instrumentor(should_enrich_metrics):
|
135
|
-
print(
|
136
|
-
print(Fore.RESET)
|
137
|
-
else:
|
138
|
-
instrument_set = True
|
131
|
+
print("Warning: OpenAI library does not exist.")
|
139
132
|
elif instrument == Instruments.ANTHROPIC:
|
140
133
|
if not init_anthropic_instrumentor(should_enrich_metrics):
|
141
134
|
print(
|
142
|
-
|
135
|
+
"Warning: Anthropic library does not exist."
|
143
136
|
)
|
144
|
-
print(Fore.RESET)
|
145
|
-
else:
|
146
|
-
instrument_set = True
|
147
137
|
elif instrument == Instruments.COHERE:
|
148
138
|
if not init_cohere_instrumentor():
|
149
|
-
print(
|
150
|
-
print(Fore.RESET)
|
151
|
-
else:
|
152
|
-
instrument_set = True
|
139
|
+
print("Warning: Cohere library does not exist.")
|
153
140
|
elif instrument == Instruments.PINECONE:
|
154
141
|
if not init_pinecone_instrumentor():
|
155
142
|
print(
|
156
|
-
|
143
|
+
"Warning: Pinecone library does not exist."
|
157
144
|
)
|
158
|
-
print(Fore.RESET)
|
159
|
-
else:
|
160
|
-
instrument_set = True
|
161
145
|
elif instrument == Instruments.CHROMA:
|
162
146
|
if not init_chroma_instrumentor():
|
163
|
-
print(
|
164
|
-
print(Fore.RESET)
|
165
|
-
else:
|
166
|
-
instrument_set = True
|
147
|
+
print("Warning: Chroma library does not exist.")
|
167
148
|
elif instrument == Instruments.GOOGLE_GENERATIVEAI:
|
168
149
|
if not init_google_generativeai_instrumentor():
|
169
|
-
print(
|
170
|
-
Fore.RED
|
171
|
-
+ "Warning: Google Generative AI library does not exist."
|
172
|
-
)
|
173
|
-
print(Fore.RESET)
|
174
|
-
else:
|
175
|
-
instrument_set = True
|
150
|
+
print("Warning: Google Generative AI library does not exist.")
|
176
151
|
elif instrument == Instruments.LANGCHAIN:
|
177
152
|
if not init_langchain_instrumentor():
|
178
153
|
print(
|
179
|
-
|
154
|
+
"Warning: LangChain library does not exist."
|
180
155
|
)
|
181
|
-
print(Fore.RESET)
|
182
|
-
else:
|
183
|
-
instrument_set = True
|
184
156
|
elif instrument == Instruments.MISTRAL:
|
185
157
|
if not init_mistralai_instrumentor():
|
186
158
|
print(
|
187
|
-
|
159
|
+
"Warning: MistralAI library does not exist."
|
188
160
|
)
|
189
|
-
print(Fore.RESET)
|
190
|
-
else:
|
191
|
-
instrument_set = True
|
192
161
|
elif instrument == Instruments.OLLAMA:
|
193
162
|
if not init_ollama_instrumentor():
|
194
|
-
print(
|
195
|
-
print(Fore.RESET)
|
196
|
-
else:
|
197
|
-
instrument_set = True
|
163
|
+
print("Warning: Ollama library does not exist.")
|
198
164
|
elif instrument == Instruments.LLAMA_INDEX:
|
199
165
|
if not init_llama_index_instrumentor():
|
200
166
|
print(
|
201
|
-
|
167
|
+
"Warning: LlamaIndex library does not exist."
|
202
168
|
)
|
203
|
-
print(Fore.RESET)
|
204
|
-
else:
|
205
|
-
instrument_set = True
|
206
169
|
elif instrument == Instruments.MILVUS:
|
207
170
|
if not init_milvus_instrumentor():
|
208
|
-
print(
|
209
|
-
print(Fore.RESET)
|
210
|
-
else:
|
211
|
-
instrument_set = True
|
171
|
+
print("Warning: Milvus library does not exist.")
|
212
172
|
elif instrument == Instruments.TRANSFORMERS:
|
213
173
|
if not init_transformers_instrumentor():
|
214
|
-
print(
|
215
|
-
Fore.RED
|
216
|
-
+ "Warning: Transformers library does not exist."
|
217
|
-
)
|
218
|
-
print(Fore.RESET)
|
219
|
-
else:
|
220
|
-
instrument_set = True
|
174
|
+
print("Warning: Transformers library does not exist.")
|
221
175
|
elif instrument == Instruments.TOGETHER:
|
222
176
|
if not init_together_instrumentor():
|
223
177
|
print(
|
224
|
-
|
178
|
+
"Warning: TogetherAI library does not exist."
|
225
179
|
)
|
226
|
-
print(Fore.RESET)
|
227
|
-
else:
|
228
|
-
instrument_set = True
|
229
180
|
elif instrument == Instruments.REQUESTS:
|
230
181
|
if not init_requests_instrumentor():
|
231
182
|
print(
|
232
|
-
|
183
|
+
"Warning: Requests library does not exist."
|
233
184
|
)
|
234
|
-
print(Fore.RESET)
|
235
|
-
else:
|
236
|
-
instrument_set = True
|
237
185
|
elif instrument == Instruments.URLLIB3:
|
238
186
|
if not init_urllib3_instrumentor():
|
239
|
-
print(
|
240
|
-
print(Fore.RESET)
|
241
|
-
else:
|
242
|
-
instrument_set = True
|
187
|
+
print("Warning: urllib3 library does not exist.")
|
243
188
|
elif instrument == Instruments.PYMYSQL:
|
244
189
|
if not init_pymysql_instrumentor():
|
245
|
-
print(
|
246
|
-
print(Fore.RESET)
|
247
|
-
else:
|
248
|
-
instrument_set = True
|
190
|
+
print("Warning: PyMySQL library does not exist.")
|
249
191
|
elif instrument == Instruments.BEDROCK:
|
250
192
|
if not init_bedrock_instrumentor(should_enrich_metrics):
|
251
|
-
print(
|
252
|
-
print(Fore.RESET)
|
253
|
-
else:
|
254
|
-
instrument_set = True
|
193
|
+
print("Warning: Bedrock library does not exist.")
|
255
194
|
elif instrument == Instruments.REPLICATE:
|
256
195
|
if not init_replicate_instrumentor():
|
257
196
|
print(
|
258
|
-
|
197
|
+
"Warning: Replicate library does not exist."
|
259
198
|
)
|
260
|
-
print(Fore.RESET)
|
261
|
-
else:
|
262
|
-
instrument_set = True
|
263
199
|
elif instrument == Instruments.VERTEXAI:
|
264
200
|
if not init_vertexai_instrumentor():
|
265
201
|
print(
|
266
|
-
|
202
|
+
"Warning: Vertex AI library does not exist."
|
267
203
|
)
|
268
|
-
print(Fore.RESET)
|
269
|
-
else:
|
270
|
-
instrument_set = True
|
271
204
|
elif instrument == Instruments.WATSONX:
|
272
205
|
if not init_watsonx_instrumentor():
|
273
|
-
print(
|
274
|
-
print(Fore.RESET)
|
275
|
-
else:
|
276
|
-
instrument_set = True
|
206
|
+
print("Warning: Watsonx library does not exist.")
|
277
207
|
elif instrument == Instruments.WEAVIATE:
|
278
208
|
if not init_weaviate_instrumentor():
|
279
209
|
print(
|
280
|
-
|
210
|
+
"Warning: Weaviate library does not exist."
|
281
211
|
)
|
282
|
-
print(Fore.RESET)
|
283
|
-
else:
|
284
|
-
instrument_set = True
|
285
212
|
elif instrument == Instruments.ALEPHALPHA:
|
286
213
|
if not init_alephalpha_instrumentor():
|
287
|
-
print(
|
288
|
-
Fore.RED
|
289
|
-
+ "Warning: Aleph Alpha library does not exist."
|
290
|
-
)
|
291
|
-
print(Fore.RESET)
|
292
|
-
else:
|
293
|
-
instrument_set = True
|
214
|
+
print("Warning: Aleph Alpha library does not exist.")
|
294
215
|
elif instrument == Instruments.MARQO:
|
295
216
|
if not init_marqo_instrumentor():
|
296
|
-
print(
|
297
|
-
print(Fore.RESET)
|
298
|
-
else:
|
299
|
-
instrument_set = True
|
217
|
+
print("Warning: marqo library does not exist.")
|
300
218
|
elif instrument == Instruments.LANCEDB:
|
301
219
|
if not init_lancedb_instrumentor():
|
302
|
-
print(
|
303
|
-
print(Fore.RESET)
|
304
|
-
else:
|
305
|
-
instrument_set = True
|
220
|
+
print("Warning: LanceDB library does not exist.")
|
306
221
|
elif instrument == Instruments.REDIS:
|
307
222
|
if not init_redis_instrumentor():
|
308
|
-
print(
|
309
|
-
print(Fore.RESET)
|
310
|
-
else:
|
311
|
-
instrument_set = True
|
223
|
+
print("Warning: redis library does not exist.")
|
312
224
|
|
313
225
|
else:
|
314
226
|
print(
|
315
|
-
|
316
|
-
+ "Warning: "
|
227
|
+
"Warning: "
|
317
228
|
+ instrument
|
318
229
|
+ " instrumentation does not exist."
|
319
230
|
)
|
@@ -322,14 +233,6 @@ class TracerWrapper(object):
|
|
322
233
|
+ "from lmnr.traceloop_sdk.instruments import Instruments\n"
|
323
234
|
+ 'Traceloop.init(app_name="...", instruments=set([Instruments.OPENAI]))'
|
324
235
|
)
|
325
|
-
print(Fore.RESET)
|
326
|
-
|
327
|
-
if not instrument_set:
|
328
|
-
print(
|
329
|
-
Fore.RED + "Warning: No valid instruments set. Remove 'instrument' "
|
330
|
-
"argument to use all instruments, or set a valid instrument."
|
331
|
-
)
|
332
|
-
print(Fore.RESET)
|
333
236
|
|
334
237
|
obj.__content_allow_list = ContentAllowList()
|
335
238
|
|
@@ -352,49 +255,6 @@ class TracerWrapper(object):
|
|
352
255
|
else:
|
353
256
|
attach(set_value("override_enable_content_tracing", False))
|
354
257
|
|
355
|
-
if is_llm_span(span):
|
356
|
-
managed_prompt = get_value("managed_prompt")
|
357
|
-
if managed_prompt is not None:
|
358
|
-
span.set_attribute(
|
359
|
-
SpanAttributes.TRACELOOP_PROMPT_MANAGED, managed_prompt
|
360
|
-
)
|
361
|
-
|
362
|
-
prompt_key = get_value("prompt_key")
|
363
|
-
if prompt_key is not None:
|
364
|
-
span.set_attribute(SpanAttributes.TRACELOOP_PROMPT_KEY, prompt_key)
|
365
|
-
|
366
|
-
prompt_version = get_value("prompt_version")
|
367
|
-
if prompt_version is not None:
|
368
|
-
span.set_attribute(
|
369
|
-
SpanAttributes.TRACELOOP_PROMPT_VERSION, prompt_version
|
370
|
-
)
|
371
|
-
|
372
|
-
prompt_version_name = get_value("prompt_version_name")
|
373
|
-
if prompt_version_name is not None:
|
374
|
-
span.set_attribute(
|
375
|
-
SpanAttributes.TRACELOOP_PROMPT_VERSION_NAME, prompt_version_name
|
376
|
-
)
|
377
|
-
|
378
|
-
prompt_version_hash = get_value("prompt_version_hash")
|
379
|
-
if prompt_version_hash is not None:
|
380
|
-
span.set_attribute(
|
381
|
-
SpanAttributes.TRACELOOP_PROMPT_VERSION_HASH, prompt_version_hash
|
382
|
-
)
|
383
|
-
|
384
|
-
prompt_template = get_value("prompt_template")
|
385
|
-
if prompt_template is not None:
|
386
|
-
span.set_attribute(
|
387
|
-
SpanAttributes.TRACELOOP_PROMPT_TEMPLATE, prompt_template
|
388
|
-
)
|
389
|
-
|
390
|
-
prompt_template_variables = get_value("prompt_template_variables")
|
391
|
-
if prompt_template_variables is not None:
|
392
|
-
for key, value in prompt_template_variables.items():
|
393
|
-
span.set_attribute(
|
394
|
-
f"{SpanAttributes.TRACELOOP_PROMPT_TEMPLATE_VARIABLES}.{key}",
|
395
|
-
value,
|
396
|
-
)
|
397
|
-
|
398
258
|
# Call original on_start method if it exists in custom processor
|
399
259
|
if self.__spans_processor_original_on_start:
|
400
260
|
self.__spans_processor_original_on_start(span, parent_context)
|
@@ -419,11 +279,7 @@ class TracerWrapper(object):
|
|
419
279
|
if (os.getenv("TRACELOOP_SUPPRESS_WARNINGS") or "false").lower() == "true":
|
420
280
|
return False
|
421
281
|
|
422
|
-
print(
|
423
|
-
Fore.RED
|
424
|
-
+ "Warning: Traceloop not initialized, make sure you call Traceloop.init()"
|
425
|
-
)
|
426
|
-
print(Fore.RESET)
|
282
|
+
print("Warning: Laminar not initialized, make sure to initialize")
|
427
283
|
return False
|
428
284
|
|
429
285
|
def flush(self):
|
@@ -458,7 +314,7 @@ def update_association_properties(properties: dict) -> None:
|
|
458
314
|
def _set_association_properties_attributes(span, properties: dict) -> None:
|
459
315
|
for key, value in properties.items():
|
460
316
|
span.set_attribute(
|
461
|
-
f"{
|
317
|
+
f"{ASSOCIATION_PROPERTIES}.{key}", value
|
462
318
|
)
|
463
319
|
|
464
320
|
|
@@ -486,10 +342,6 @@ def set_external_prompt_tracing_context(
|
|
486
342
|
attach(set_value("prompt_template_variables", variables))
|
487
343
|
|
488
344
|
|
489
|
-
def is_llm_span(span) -> bool:
|
490
|
-
return span.attributes.get(SpanAttributes.LLM_REQUEST_TYPE) is not None
|
491
|
-
|
492
|
-
|
493
345
|
def init_spans_exporter(api_endpoint: str, headers: Dict[str, str]) -> SpanExporter:
|
494
346
|
if "http" in api_endpoint.lower() or "https" in api_endpoint.lower():
|
495
347
|
return HTTPExporter(endpoint=f"{api_endpoint}/v1/traces", headers=headers)
|
@@ -558,7 +410,6 @@ def init_openai_instrumentor(should_enrich_metrics: bool):
|
|
558
410
|
# exception_logger=lambda e: Telemetry().log_exception(e),
|
559
411
|
enrich_assistant=should_enrich_metrics,
|
560
412
|
enrich_token_usage=should_enrich_metrics,
|
561
|
-
get_common_metrics_attributes=metrics_common_attributes,
|
562
413
|
)
|
563
414
|
if not instrumentor.is_instrumented_by_opentelemetry:
|
564
415
|
instrumentor.instrument()
|
@@ -579,7 +430,6 @@ def init_anthropic_instrumentor(should_enrich_metrics: bool):
|
|
579
430
|
instrumentor = AnthropicInstrumentor(
|
580
431
|
# exception_logger=lambda e: Telemetry().log_exception(e),
|
581
432
|
enrich_token_usage=should_enrich_metrics,
|
582
|
-
get_common_metrics_attributes=metrics_common_attributes,
|
583
433
|
)
|
584
434
|
if not instrumentor.is_instrumented_by_opentelemetry:
|
585
435
|
instrumentor.instrument()
|
@@ -1047,23 +897,3 @@ def init_groq_instrumentor():
|
|
1047
897
|
logging.error(f"Error initializing Groq instrumentor: {e}")
|
1048
898
|
# Telemetry().log_exception(e)
|
1049
899
|
return False
|
1050
|
-
|
1051
|
-
|
1052
|
-
def metrics_common_attributes():
|
1053
|
-
common_attributes = {}
|
1054
|
-
workflow_name = get_value("workflow_name")
|
1055
|
-
if workflow_name is not None:
|
1056
|
-
common_attributes[SpanAttributes.TRACELOOP_WORKFLOW_NAME] = workflow_name
|
1057
|
-
|
1058
|
-
entity_name = get_value("entity_name")
|
1059
|
-
if entity_name is not None:
|
1060
|
-
common_attributes[SpanAttributes.TRACELOOP_ENTITY_NAME] = entity_name
|
1061
|
-
|
1062
|
-
association_properties = get_value("association_properties")
|
1063
|
-
if association_properties is not None:
|
1064
|
-
for key, value in association_properties.items():
|
1065
|
-
common_attributes[
|
1066
|
-
f"{SpanAttributes.TRACELOOP_ASSOCIATION_PROPERTIES}.{key}"
|
1067
|
-
] = value
|
1068
|
-
|
1069
|
-
return common_attributes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.12
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -11,9 +11,9 @@ Classifier: Programming Language :: Python :: 3.9
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Requires-Dist: argparse (>=1.0,<2.0)
|
14
15
|
Requires-Dist: asyncio (>=3.0,<4.0)
|
15
16
|
Requires-Dist: backoff (>=2.0,<3.0)
|
16
|
-
Requires-Dist: colorama (>=0.4,<0.5)
|
17
17
|
Requires-Dist: deprecated (>=1.0,<2.0)
|
18
18
|
Requires-Dist: jinja2 (>=3.0,<4.0)
|
19
19
|
Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
|
@@ -54,67 +54,42 @@ Requires-Dist: pydantic (>=2.7,<3.0)
|
|
54
54
|
Requires-Dist: python-dotenv (>=1.0,<2.0)
|
55
55
|
Requires-Dist: requests (>=2.0,<3.0)
|
56
56
|
Requires-Dist: tenacity (>=8.0,<9.0)
|
57
|
+
Requires-Dist: tqdm (>=4.0,<5.0)
|
57
58
|
Description-Content-Type: text/markdown
|
58
59
|
|
59
60
|
# Laminar Python
|
60
61
|
|
61
|
-
|
62
|
+
Python SDK for [Laminar](https://www.lmnr.ai).
|
63
|
+
|
64
|
+
[Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
|
65
|
+
|
66
|
+
Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
|
62
67
|
|
63
68
|
<a href="https://pypi.org/project/lmnr/">  </a>
|
64
69
|

|
65
70
|

|
66
71
|
|
67
72
|
|
68
|
-
|
69
73
|
## Quickstart
|
70
74
|
|
71
75
|
First, install the package:
|
72
76
|
|
73
77
|
```sh
|
74
|
-
python3 -m venv .myenv
|
75
|
-
source .myenv/bin/activate # or use your favorite env management tool
|
76
|
-
|
77
78
|
pip install lmnr
|
78
79
|
```
|
79
80
|
|
80
|
-
|
81
|
+
And then in the code
|
81
82
|
|
82
83
|
```python
|
83
|
-
import os
|
84
|
-
from openai import OpenAI
|
85
84
|
from lmnr import Laminar as L
|
86
85
|
|
87
|
-
L.initialize(
|
88
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
89
|
-
)
|
90
|
-
|
91
|
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
92
|
-
|
93
|
-
def poem_writer(topic: str):
|
94
|
-
prompt = f"write a poem about {topic}"
|
95
|
-
|
96
|
-
# OpenAI calls are automatically instrumented
|
97
|
-
response = client.chat.completions.create(
|
98
|
-
model="gpt-4o",
|
99
|
-
messages=[
|
100
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
101
|
-
{"role": "user", "content": prompt},
|
102
|
-
],
|
103
|
-
)
|
104
|
-
poem = response.choices[0].message.content
|
105
|
-
return poem
|
106
|
-
|
107
|
-
if __name__ == "__main__":
|
108
|
-
print(poem_writer("laminar flow"))
|
109
|
-
|
86
|
+
L.initialize(project_api_key="<PROJECT_API_KEY>")
|
110
87
|
```
|
111
88
|
|
112
|
-
|
89
|
+
This will automatically instrument most of the LLM, Vector DB, and related
|
90
|
+
calls with OpenTelemetry-compatible instrumentation.
|
113
91
|
|
114
|
-
|
115
|
-
|
116
|
-
Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
|
117
|
-
You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
|
92
|
+
Note that you need to only initialize Laminar once in your application.
|
118
93
|
|
119
94
|
## Instrumentation
|
120
95
|
|
@@ -195,7 +170,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
|
|
195
170
|
|
196
171
|
If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
|
197
172
|
|
198
|
-
|
173
|
+
Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
|
199
174
|
|
200
175
|
## Sending events
|
201
176
|
|
@@ -223,6 +198,67 @@ L.event("topic alignment", topic in poem)
|
|
223
198
|
L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
|
224
199
|
```
|
225
200
|
|
201
|
+
## Evaluations
|
202
|
+
|
203
|
+
### Quickstart
|
204
|
+
|
205
|
+
Install the package:
|
206
|
+
|
207
|
+
```sh
|
208
|
+
pip install lmnr
|
209
|
+
```
|
210
|
+
|
211
|
+
Create a file named `my_first_eval.py` with the following code:
|
212
|
+
|
213
|
+
```python
|
214
|
+
from lmnr import evaluate
|
215
|
+
|
216
|
+
def write_poem(data):
|
217
|
+
return f"This is a good poem about {data['topic']}"
|
218
|
+
|
219
|
+
def contains_poem(output, target):
|
220
|
+
return 1 if output in target['poem'] else 0
|
221
|
+
|
222
|
+
# Evaluation data
|
223
|
+
data = [
|
224
|
+
{"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
|
225
|
+
{"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
|
226
|
+
]
|
227
|
+
|
228
|
+
evaluate(
|
229
|
+
data=data,
|
230
|
+
executor=write_poem,
|
231
|
+
evaluators={
|
232
|
+
"containsPoem": contains_poem
|
233
|
+
}
|
234
|
+
)
|
235
|
+
```
|
236
|
+
|
237
|
+
Run the following commands:
|
238
|
+
|
239
|
+
```sh
|
240
|
+
export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
|
241
|
+
lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
|
242
|
+
```
|
243
|
+
|
244
|
+
Visit the URL printed in the console to see the results.
|
245
|
+
|
246
|
+
### Overview
|
247
|
+
|
248
|
+
Bring rigor to the development of your LLM applications with evaluations.
|
249
|
+
|
250
|
+
You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
|
251
|
+
|
252
|
+
`evaluate` takes in the following parameters:
|
253
|
+
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
254
|
+
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
255
|
+
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
256
|
+
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
257
|
+
|
258
|
+
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
259
|
+
|
260
|
+
[Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
|
261
|
+
|
226
262
|
## Laminar pipelines as prompt chain managers
|
227
263
|
|
228
264
|
You can create Laminar pipelines in the UI and manage chains of LLM calls there.
|
@@ -257,65 +293,3 @@ PipelineRunResponse(
|
|
257
293
|
)
|
258
294
|
```
|
259
295
|
|
260
|
-
## Running offline evaluations on your data
|
261
|
-
|
262
|
-
You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
|
263
|
-
|
264
|
-
Evaluation takes in the following parameters:
|
265
|
-
- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
|
266
|
-
- `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
|
267
|
-
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
|
268
|
-
- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
|
269
|
-
|
270
|
-
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
271
|
-
|
272
|
-
### Example
|
273
|
-
|
274
|
-
```python
|
275
|
-
from openai import AsyncOpenAI
|
276
|
-
import asyncio
|
277
|
-
import os
|
278
|
-
|
279
|
-
openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
280
|
-
|
281
|
-
async def get_capital(data):
|
282
|
-
country = data["country"]
|
283
|
-
response = await openai_client.chat.completions.create(
|
284
|
-
model="gpt-4o-mini",
|
285
|
-
messages=[
|
286
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
287
|
-
{
|
288
|
-
"role": "user",
|
289
|
-
"content": f"What is the capital of {country}? Just name the "
|
290
|
-
"city and nothing else",
|
291
|
-
},
|
292
|
-
],
|
293
|
-
)
|
294
|
-
return response.choices[0].message.content.strip()
|
295
|
-
|
296
|
-
|
297
|
-
# Evaluation data
|
298
|
-
data = [
|
299
|
-
{"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
|
300
|
-
{"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
|
301
|
-
{"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
|
302
|
-
]
|
303
|
-
|
304
|
-
|
305
|
-
def evaluator_A(output, target):
|
306
|
-
return 1 if output == target["capital"] else 0
|
307
|
-
|
308
|
-
|
309
|
-
# Create an Evaluation instance
|
310
|
-
e = Evaluation(
|
311
|
-
name="py-evaluation-async",
|
312
|
-
data=data,
|
313
|
-
executor=get_capital,
|
314
|
-
evaluators=[evaluator_A],
|
315
|
-
project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
|
316
|
-
)
|
317
|
-
|
318
|
-
# Run the evaluation
|
319
|
-
asyncio.run(e.run())
|
320
|
-
```
|
321
|
-
|