lmnr 0.4.12b4__py3-none-any.whl → 0.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lmnr/sdk/evaluations.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import re
2
3
  import sys
3
4
  from abc import ABC, abstractmethod
4
5
  from contextlib import contextmanager
@@ -12,7 +13,6 @@ from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
12
13
 
13
14
  from .laminar import Laminar as L
14
15
  from .types import (
15
- CreateEvaluationResponse,
16
16
  Datapoint,
17
17
  EvaluationResultDatapoint,
18
18
  EvaluatorFunction,
@@ -46,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
46
46
  return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
47
47
 
48
48
 
49
+ def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
50
+ per_score_values = {}
51
+ for result in results:
52
+ for key, value in result.scores.items():
53
+ if key not in per_score_values:
54
+ per_score_values[key] = []
55
+ per_score_values[key].append(value)
56
+
57
+ average_scores = {}
58
+ for key, values in per_score_values.items():
59
+ average_scores[key] = sum(values) / len(values)
60
+
61
+ return average_scores
62
+
63
+
49
64
  class EvaluationReporter:
50
65
  def __init__(self):
51
66
  pass
52
67
 
53
- def start(self, name: str, project_id: str, id: str, length: int):
54
- print(f"Running evaluation {name}...\n")
55
- print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
68
+ def start(self, length: int):
56
69
  self.cli_progress = tqdm(
57
70
  total=length,
58
71
  bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
@@ -66,9 +79,10 @@ class EvaluationReporter:
66
79
  self.cli_progress.close()
67
80
  sys.stderr.write(f"\nError: {error}\n")
68
81
 
69
- def stop(self, average_scores: dict[str, Numeric]):
82
+ def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
70
83
  self.cli_progress.close()
71
- print("\nAverage scores:")
84
+ print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
85
+ print("Average scores:")
72
86
  for name, score in average_scores.items():
73
87
  print(f"{name}: {score}")
74
88
  print("\n")
@@ -97,6 +111,7 @@ class Evaluation:
97
111
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
98
112
  executor: Any,
99
113
  evaluators: dict[str, EvaluatorFunction],
114
+ group_id: Optional[str] = None,
100
115
  name: Optional[str] = None,
101
116
  batch_size: int = DEFAULT_BATCH_SIZE,
102
117
  project_api_key: Optional[str] = None,
@@ -123,6 +138,8 @@ class Evaluation:
123
138
  evaluator function. If the function is anonymous, it will be
124
139
  named `evaluator_${index}`, where index is the index of the
125
140
  evaluator function in the list starting from 1.
141
+ group_id (Optional[str], optional): Group id of the evaluation.
142
+ Defaults to "default".
126
143
  name (Optional[str], optional): The name of the evaluation.
127
144
  It will be auto-generated if not provided.
128
145
  batch_size (int, optional): The batch size for evaluation.
@@ -138,11 +155,16 @@ class Evaluation:
138
155
  Defaults to None. If None, all available instruments will be used.
139
156
  """
140
157
 
158
+ if not evaluators:
159
+ raise ValueError("No evaluators provided")
160
+
161
+ # TODO: Compile regex once and then reuse it
162
+ for evaluator_name in evaluators:
163
+ if not re.match(r'^[\w\s-]+$', evaluator_name):
164
+ raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
165
+
141
166
  self.is_finished = False
142
- self.name = name
143
167
  self.reporter = EvaluationReporter()
144
- self.executor = executor
145
- self.evaluators = evaluators
146
168
  if isinstance(data, list):
147
169
  self.data = [
148
170
  (Datapoint.model_validate(point) if isinstance(point, dict) else point)
@@ -150,6 +172,10 @@ class Evaluation:
150
172
  ]
151
173
  else:
152
174
  self.data = data
175
+ self.executor = executor
176
+ self.evaluators = evaluators
177
+ self.group_id = group_id
178
+ self.name = name
153
179
  self.batch_size = batch_size
154
180
  L.initialize(
155
181
  project_api_key=project_api_key,
@@ -160,23 +186,6 @@ class Evaluation:
160
186
  )
161
187
 
162
188
  def run(self) -> Union[None, Awaitable[None]]:
163
- """Runs the evaluation.
164
-
165
- Creates a new evaluation if no evaluation with such name exists, or
166
- adds data to an existing one otherwise. Evaluates data points in
167
- batches of `self.batch_size`. The executor
168
- function is called on each data point to get the output,
169
- and then evaluate it by each evaluator function.
170
-
171
- Usage:
172
- ```python
173
- # in a synchronous context:
174
- e.run()
175
- # in an asynchronous context:
176
- await e.run()
177
- ```
178
-
179
- """
180
189
  if self.is_finished:
181
190
  raise Exception("Evaluation is already finished")
182
191
 
@@ -187,41 +196,34 @@ class Evaluation:
187
196
  return loop.run_until_complete(self._run())
188
197
 
189
198
  async def _run(self) -> None:
190
- evaluation = L.create_evaluation(self.name)
191
199
  self.reporter.start(
192
- evaluation.name,
193
- evaluation.projectId,
194
- evaluation.id,
195
200
  len(self.data),
196
201
  )
197
202
 
198
203
  try:
199
- await self.evaluate_in_batches(evaluation)
204
+ result_datapoints = await self.evaluate_in_batches()
200
205
  except Exception as e:
201
- L.update_evaluation_status(evaluation.id, "Error")
202
206
  self.reporter.stopWithError(e)
203
207
  self.is_finished = True
204
208
  return
209
+ else:
210
+ evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
211
+ average_scores = get_average_scores(result_datapoints)
212
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
213
+ self.is_finished = True
205
214
 
206
- # If we update with status "Finished", we expect averageScores to be not empty
207
- updated_evaluation = L.update_evaluation_status(evaluation.id, "Finished")
208
- self.reporter.stop(updated_evaluation.averageScores)
209
- self.is_finished = True
210
-
211
- async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
215
+ async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
216
+ result_datapoints = []
212
217
  for i in range(0, len(self.data), self.batch_size):
213
218
  batch = (
214
- self.data[i : i + self.batch_size]
219
+ self.data[i: i + self.batch_size]
215
220
  if isinstance(self.data, list)
216
221
  else self.data.slice(i, i + self.batch_size)
217
222
  )
218
- try:
219
- results = await self._evaluate_batch(batch)
220
- L.post_evaluation_results(evaluation.id, results)
221
- except Exception as e:
222
- print(f"Error evaluating batch: {e}")
223
- finally:
224
- self.reporter.update(len(batch))
223
+ batch_datapoints = await self._evaluate_batch(batch)
224
+ result_datapoints.extend(batch_datapoints)
225
+ self.reporter.update(len(batch))
226
+ return result_datapoints
225
227
 
226
228
  async def _evaluate_batch(
227
229
  self, batch: list[Datapoint]
@@ -252,7 +254,7 @@ class Evaluation:
252
254
  scores: dict[str, Numeric] = {}
253
255
  for evaluator_name, evaluator in self.evaluators.items():
254
256
  with L.start_as_current_span(
255
- "evaluator", input={"output": output, "target": target}
257
+ evaluator_name, input={"output": output, "target": target}
256
258
  ) as evaluator_span:
257
259
  evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
258
260
  value = (
@@ -282,6 +284,7 @@ def evaluate(
282
284
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
283
285
  executor: ExecutorFunction,
284
286
  evaluators: dict[str, EvaluatorFunction],
287
+ group_id: Optional[str] = None,
285
288
  name: Optional[str] = None,
286
289
  batch_size: int = DEFAULT_BATCH_SIZE,
287
290
  project_api_key: Optional[str] = None,
@@ -310,8 +313,11 @@ def evaluate(
310
313
  evaluator function. If the function is anonymous, it will be
311
314
  named `evaluator_${index}`, where index is the index of the
312
315
  evaluator function in the list starting from 1.
313
- name (Optional[str], optional): The name of the evaluation.
314
- It will be auto-generated if not provided.
316
+ group_id (Optional[str], optional): Group name which is same
317
+ as the feature you are evaluating in your project or application.
318
+ Defaults to "default".
319
+ name (Optional[str], optional): Optional name of the evaluation. Used to easily
320
+ identify the evaluation in the group.
315
321
  batch_size (int, optional): The batch size for evaluation.
316
322
  Defaults to DEFAULT_BATCH_SIZE.
317
323
  project_api_key (Optional[str], optional): The project API key.
@@ -331,6 +337,7 @@ def evaluate(
331
337
  data=data,
332
338
  executor=executor,
333
339
  evaluators=evaluators,
340
+ group_id=group_id,
334
341
  name=name,
335
342
  batch_size=batch_size,
336
343
  project_api_key=project_api_key,
lmnr/sdk/laminar.py CHANGED
@@ -3,11 +3,9 @@ from opentelemetry import context
3
3
  from opentelemetry.trace import (
4
4
  INVALID_SPAN,
5
5
  get_current_span,
6
- SpanKind,
7
6
  )
8
7
  from opentelemetry.util.types import AttributeValue
9
- from opentelemetry.context.context import Context
10
- from opentelemetry.util import types
8
+ from opentelemetry.context import set_value, attach, detach
11
9
  from lmnr.traceloop_sdk import Traceloop
12
10
  from lmnr.traceloop_sdk.tracing import get_tracer
13
11
  from contextlib import contextmanager
@@ -29,10 +27,12 @@ from lmnr.traceloop_sdk.tracing.attributes import (
29
27
  SESSION_ID,
30
28
  SPAN_INPUT,
31
29
  SPAN_OUTPUT,
30
+ SPAN_PATH,
32
31
  TRACE_TYPE,
33
32
  USER_ID,
34
33
  )
35
34
  from lmnr.traceloop_sdk.tracing.tracing import (
35
+ get_span_path,
36
36
  set_association_properties,
37
37
  update_association_properties,
38
38
  )
@@ -47,7 +47,6 @@ from .types import (
47
47
  NodeInput,
48
48
  PipelineRunRequest,
49
49
  TraceType,
50
- UpdateEvaluationResponse,
51
50
  )
52
51
 
53
52
 
@@ -315,14 +314,6 @@ class Laminar:
315
314
  cls,
316
315
  name: str,
317
316
  input: Any = None,
318
- context: Optional[Context] = None,
319
- kind: SpanKind = SpanKind.INTERNAL,
320
- attributes: types.Attributes = None,
321
- links=None,
322
- start_time: Optional[int] = None,
323
- record_exception: bool = True,
324
- set_status_on_exception: bool = True,
325
- end_on_exit: bool = True,
326
317
  ):
327
318
  """Start a new span as the current span. Useful for manual instrumentation.
328
319
  This is the preferred and more stable way to use manual instrumentation.
@@ -337,32 +328,15 @@ class Laminar:
337
328
  name (str): name of the span
338
329
  input (Any, optional): input to the span. Will be sent as an
339
330
  attribute, so must be json serializable. Defaults to None.
340
- context (Optional[Context], optional): context to start the span in.
341
- Defaults to None.
342
- kind (SpanKind, optional): kind of the span. Defaults to SpanKind.INTERNAL.
343
- attributes (types.Attributes, optional): attributes to set on the span.
344
- Defaults to None.
345
- links ([type], optional): links to set on the span. Defaults to None.
346
- start_time (Optional[int], optional): start time of the span.
347
- Defaults to None.
348
- record_exception (bool, optional): whether to record exceptions.
349
- Defaults to True.
350
- set_status_on_exception (bool, optional): whether to set status on exception.
351
- Defaults to True.
352
- end_on_exit (bool, optional): whether to end the span on exit.
353
- Defaults to True.
354
331
  """
355
332
  with get_tracer() as tracer:
333
+ span_path = get_span_path(name)
334
+ ctx = set_value("span_path", span_path)
335
+ ctx_token = attach(set_value("span_path", span_path))
356
336
  with tracer.start_as_current_span(
357
337
  name,
358
- context=context,
359
- kind=kind,
360
- attributes=attributes,
361
- links=links,
362
- start_time=start_time,
363
- record_exception=record_exception,
364
- set_status_on_exception=set_status_on_exception,
365
- end_on_exit=end_on_exit,
338
+ context=ctx,
339
+ attributes={SPAN_PATH: span_path},
366
340
  ) as span:
367
341
  if input is not None:
368
342
  span.set_attribute(
@@ -371,6 +345,12 @@ class Laminar:
371
345
  )
372
346
  yield span
373
347
 
348
+ # TODO: Figure out if this is necessary
349
+ try:
350
+ detach(ctx_token)
351
+ except Exception:
352
+ pass
353
+
374
354
  @classmethod
375
355
  def set_span_output(cls, output: Any = None):
376
356
  """Set the output of the current span. Useful for manual instrumentation.
@@ -432,10 +412,14 @@ class Laminar:
432
412
  set_association_properties(props)
433
413
 
434
414
  @classmethod
435
- def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
415
+ def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
436
416
  response = requests.post(
437
417
  cls.__base_http_url + "/v1/evaluations",
438
- data=json.dumps({"name": name}),
418
+ data=json.dumps({
419
+ "groupId": group_id,
420
+ "name": name,
421
+ "points": [datapoint.to_dict() for datapoint in data]
422
+ }),
439
423
  headers=cls._headers(),
440
424
  )
441
425
  if response.status_code != 200:
@@ -446,66 +430,6 @@ class Laminar:
446
430
  raise ValueError(f"Error creating evaluation {response.text}")
447
431
  return CreateEvaluationResponse.model_validate(response.json())
448
432
 
449
- @classmethod
450
- def post_evaluation_results(
451
- cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
452
- ) -> requests.Response:
453
- body = {
454
- "evaluationId": str(evaluation_id),
455
- "points": [datapoint.to_dict() for datapoint in data],
456
- }
457
- response = requests.post(
458
- cls.__base_http_url + "/v1/evaluation-datapoints",
459
- data=json.dumps(body),
460
- headers=cls._headers(),
461
- )
462
- if response.status_code != 200:
463
- try:
464
- resp_json = response.json()
465
- raise ValueError(
466
- f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
467
- )
468
- except Exception:
469
- raise ValueError(
470
- f"Failed to send evaluation results. Error: {response.text}"
471
- )
472
- return response
473
-
474
- @classmethod
475
- def update_evaluation_status(
476
- cls, evaluation_id: str, status: str
477
- ) -> UpdateEvaluationResponse:
478
- """
479
- Updates the status of an evaluation. Returns the updated evaluation object.
480
-
481
- Args:
482
- evaluation_id (str): The ID of the evaluation to update.
483
- status (str): The status to set for the evaluation.
484
-
485
- Returns:
486
- UpdateEvaluationResponse: The updated evaluation response.
487
-
488
- Raises:
489
- ValueError: If the request fails.
490
- """
491
- body = {
492
- "status": status,
493
- }
494
- url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
495
-
496
- response = requests.post(
497
- url,
498
- data=json.dumps(body),
499
- headers=cls._headers(),
500
- )
501
- if response.status_code != 200:
502
- raise ValueError(
503
- f"Failed to update evaluation status {evaluation_id}. "
504
- f"Response: {response.text}"
505
- )
506
-
507
- return UpdateEvaluationResponse.model_validate(response.json())
508
-
509
433
  @classmethod
510
434
  def _headers(cls):
511
435
  assert cls.__project_api_key is not None, "Project API key is not set"
lmnr/sdk/types.py CHANGED
@@ -2,7 +2,7 @@ import datetime
2
2
  from enum import Enum
3
3
  import pydantic
4
4
  import requests
5
- from typing import Any, Awaitable, Callable, Literal, Optional, Union
5
+ from typing import Any, Awaitable, Callable, Optional, Union
6
6
  import uuid
7
7
 
8
8
  from .utils import serialize
@@ -107,20 +107,13 @@ EvaluatorFunction = Callable[
107
107
  Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
108
108
  ]
109
109
 
110
- EvaluationStatus = Literal["Started", "Finished", "Error"]
111
-
112
110
 
113
111
  class CreateEvaluationResponse(pydantic.BaseModel):
114
112
  id: uuid.UUID
115
113
  createdAt: datetime.datetime
114
+ groupId: str
116
115
  name: str
117
- status: EvaluationStatus
118
116
  projectId: uuid.UUID
119
- metadata: Optional[dict[str, Any]] = None
120
- averageScores: Optional[dict[str, Numeric]] = None
121
-
122
-
123
- UpdateEvaluationResponse = CreateEvaluationResponse
124
117
 
125
118
 
126
119
  class EvaluationResultDatapoint(pydantic.BaseModel):
@@ -10,8 +10,8 @@ from opentelemetry import context as context_api
10
10
 
11
11
  from lmnr.sdk.utils import get_input_from_func_args, is_method
12
12
  from lmnr.traceloop_sdk.tracing import get_tracer
13
- from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT
14
- from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper
13
+ from lmnr.traceloop_sdk.tracing.attributes import SPAN_INPUT, SPAN_OUTPUT, SPAN_PATH
14
+ from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper, get_span_path
15
15
  from lmnr.traceloop_sdk.utils.json_encoder import JSONEncoder
16
16
 
17
17
 
@@ -47,7 +47,12 @@ def entity_method(
47
47
 
48
48
  with get_tracer() as tracer:
49
49
  span = tracer.start_span(span_name)
50
- ctx = trace.set_span_in_context(span)
50
+
51
+ span_path = get_span_path(span_name)
52
+ span.set_attribute(SPAN_PATH, span_path)
53
+ ctx = context_api.set_value("span_path", span_path)
54
+
55
+ ctx = trace.set_span_in_context(span, ctx)
51
56
  ctx_token = context_api.attach(ctx)
52
57
 
53
58
  try:
@@ -104,7 +109,12 @@ def aentity_method(
104
109
 
105
110
  with get_tracer() as tracer:
106
111
  span = tracer.start_span(span_name)
107
- ctx = trace.set_span_in_context(span)
112
+
113
+ span_path = get_span_path(span_name)
114
+ span.set_attribute(SPAN_PATH, span_path)
115
+ ctx = context_api.set_value("span_path", span_path)
116
+
117
+ ctx = trace.set_span_in_context(span, ctx)
108
118
  ctx_token = context_api.attach(ctx)
109
119
 
110
120
  try:
@@ -1,6 +1,7 @@
1
1
  SPAN_INPUT = "lmnr.span.input"
2
2
  SPAN_OUTPUT = "lmnr.span.output"
3
3
  SPAN_TYPE = "lmnr.span.type"
4
+ SPAN_PATH = "lmnr.span.path"
4
5
 
5
6
  ASSOCIATION_PROPERTIES = "lmnr.association.properties"
6
7
  SESSION_ID = "session_id"
@@ -25,7 +25,7 @@ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
25
25
 
26
26
  # from lmnr.traceloop_sdk import Telemetry
27
27
  from lmnr.traceloop_sdk.instruments import Instruments
28
- from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES
28
+ from lmnr.traceloop_sdk.tracing.attributes import ASSOCIATION_PROPERTIES, SPAN_PATH
29
29
  from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
30
30
  from lmnr.traceloop_sdk.utils import is_notebook
31
31
  from lmnr.traceloop_sdk.utils.package_check import is_package_installed
@@ -245,6 +245,14 @@ class TracerWrapper(object):
245
245
  self.flush()
246
246
 
247
247
  def _span_processor_on_start(self, span, parent_context):
248
+ span_path = get_value("span_path")
249
+ if span_path is not None:
250
+ # This is done redundantly here for most decorated functions
251
+ # However, need to do this for auto-instrumented libraries.
252
+ # Then, for auto-instrumented ones, they'll attach
253
+ # the final part of the name to the span on the backend.
254
+ span.set_attribute(SPAN_PATH, span_path)
255
+
248
256
  association_properties = get_value("association_properties")
249
257
  if association_properties is not None:
250
258
  _set_association_properties_attributes(span, association_properties)
@@ -318,6 +326,12 @@ def _set_association_properties_attributes(span, properties: dict) -> None:
318
326
  )
319
327
 
320
328
 
329
+ def get_span_path(span_name: str) -> str:
330
+ current_span_path = get_value("span_path")
331
+ span_path = f"{current_span_path}.{span_name}" if current_span_path else span_name
332
+ return span_path
333
+
334
+
321
335
  def set_managed_prompt_tracing_context(
322
336
  key: str,
323
337
  version: int,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.12b4
3
+ Version: 0.4.14
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -59,63 +59,37 @@ Description-Content-Type: text/markdown
59
59
 
60
60
  # Laminar Python
61
61
 
62
- OpenTelemetry log sender for [Laminar](https://github.com/lmnr-ai/lmnr) for Python code.
62
+ Python SDK for [Laminar](https://www.lmnr.ai).
63
+
64
+ [Laminar](https://www.lmnr.ai) is an open-source platform for engineering LLM products. Trace, evaluate, annotate, and analyze LLM data. Bring LLM applications to production with confidence.
65
+
66
+ Check our [open-source repo](https://github.com/lmnr-ai/lmnr) and don't forget to star it ⭐
63
67
 
64
68
  <a href="https://pypi.org/project/lmnr/"> ![PyPI - Version](https://img.shields.io/pypi/v/lmnr?label=lmnr&logo=pypi&logoColor=3775A9) </a>
65
69
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/lmnr)
66
70
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lmnr)
67
71
 
68
72
 
69
-
70
73
  ## Quickstart
71
74
 
72
75
  First, install the package:
73
76
 
74
77
  ```sh
75
- python3 -m venv .myenv
76
- source .myenv/bin/activate # or use your favorite env management tool
77
-
78
78
  pip install lmnr
79
79
  ```
80
80
 
81
- Then, you can initialize Laminar in your main file and instrument your code.
81
+ And then in the code
82
82
 
83
83
  ```python
84
- import os
85
- from openai import OpenAI
86
84
  from lmnr import Laminar as L
87
85
 
88
- L.initialize(
89
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
90
- )
91
-
92
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
93
-
94
- def poem_writer(topic: str):
95
- prompt = f"write a poem about {topic}"
96
-
97
- # OpenAI calls are automatically instrumented
98
- response = client.chat.completions.create(
99
- model="gpt-4o",
100
- messages=[
101
- {"role": "system", "content": "You are a helpful assistant."},
102
- {"role": "user", "content": prompt},
103
- ],
104
- )
105
- poem = response.choices[0].message.content
106
- return poem
107
-
108
- if __name__ == "__main__":
109
- print(poem_writer("laminar flow"))
110
-
86
+ L.initialize(project_api_key="<PROJECT_API_KEY>")
111
87
  ```
112
88
 
113
- Note that you need to only initialize Laminar once in your application.
114
-
115
- ### Project API key
89
+ This will automatically instrument most of the LLM, Vector DB, and related
90
+ calls with OpenTelemetry-compatible instrumentation.
116
91
 
117
- Get the key from the settings page of your Laminar project ([Learn more](https://docs.lmnr.ai/api-reference/introduction#authentication)).
118
- You can either pass it to `.initialize()` or set it to `.env` at the root of your package with the key `LMNR_PROJECT_API_KEY`.
92
+ Note that you need to only initialize Laminar once in your application.
119
93
 
120
94
  ## Instrumentation
121
95
 
@@ -224,6 +198,68 @@ L.event("topic alignment", topic in poem)
224
198
  L.evaluate_event("excessive_wordiness", "check_wordy", {"text_input": poem})
225
199
  ```
226
200
 
201
+ ## Evaluations
202
+
203
+ ### Quickstart
204
+
205
+ Install the package:
206
+
207
+ ```sh
208
+ pip install lmnr
209
+ ```
210
+
211
+ Create a file named `my_first_eval.py` with the following code:
212
+
213
+ ```python
214
+ from lmnr import evaluate
215
+
216
+ def write_poem(data):
217
+ return f"This is a good poem about {data['topic']}"
218
+
219
+ def contains_poem(output, target):
220
+ return 1 if output in target['poem'] else 0
221
+
222
+ # Evaluation data
223
+ data = [
224
+ {"data": {"topic": "flowers"}, "target": {"poem": "This is a good poem about flowers"}},
225
+ {"data": {"topic": "cars"}, "target": {"poem": "I like cars"}},
226
+ ]
227
+
228
+ evaluate(
229
+ data=data,
230
+ executor=write_poem,
231
+ evaluators={
232
+ "containsPoem": contains_poem
233
+ },
234
+ group_id="my_first_feature"
235
+ )
236
+ ```
237
+
238
+ Run the following commands:
239
+
240
+ ```sh
241
+ export LMNR_PROJECT_API_KEY=<YOUR_PROJECT_API_KEY> # get from Laminar project settings
242
+ lmnr eval my_first_eval.py # run in the virtual environment where lmnr is installed
243
+ ```
244
+
245
+ Visit the URL printed in the console to see the results.
246
+
247
+ ### Overview
248
+
249
+ Bring rigor to the development of your LLM applications with evaluations.
250
+
251
+ You can run evaluations locally by providing executor (part of the logic used in your application) and evaluators (numeric scoring functions) to `evaluate` function.
252
+
253
+ `evaluate` takes in the following parameters:
254
+ - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
255
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
256
+ - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
257
+ - `name` – optional name for the evaluation. Automatically generated if not provided.
258
+
259
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
260
+
261
+ [Read docs](https://docs.lmnr.ai/evaluations/introduction) to learn more about evaluations.
262
+
227
263
  ## Laminar pipelines as prompt chain managers
228
264
 
229
265
  You can create Laminar pipelines in the UI and manage chains of LLM calls there.
@@ -258,71 +294,3 @@ PipelineRunResponse(
258
294
  )
259
295
  ```
260
296
 
261
- ## Running offline evaluations on your data
262
-
263
- You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
264
-
265
- Evaluation takes in the following parameters:
266
- - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
267
- - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
268
- - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
269
- - `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
270
-
271
- \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
272
-
273
- ### Example code
274
-
275
- ```python
276
- from lmnr import evaluate
277
- from openai import AsyncOpenAI
278
- import asyncio
279
- import os
280
-
281
- openai_client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
282
-
283
- async def get_capital(data):
284
- country = data["country"]
285
- response = await openai_client.chat.completions.create(
286
- model="gpt-4o-mini",
287
- messages=[
288
- {"role": "system", "content": "You are a helpful assistant."},
289
- {
290
- "role": "user",
291
- "content": f"What is the capital of {country}? Just name the "
292
- "city and nothing else",
293
- },
294
- ],
295
- )
296
- return response.choices[0].message.content.strip()
297
-
298
-
299
- # Evaluation data
300
- data = [
301
- {"data": {"country": "Canada"}, "target": {"capital": "Ottawa"}},
302
- {"data": {"country": "Germany"}, "target": {"capital": "Berlin"}},
303
- {"data": {"country": "Tanzania"}, "target": {"capital": "Dodoma"}},
304
- ]
305
-
306
-
307
- def correctness(output, target):
308
- return 1 if output == target["capital"] else 0
309
-
310
-
311
- # Create an Evaluation instance
312
- e = evaluate(
313
- name="my-evaluation",
314
- data=data,
315
- executor=get_capital,
316
- evaluators={"correctness": correctness},
317
- project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
318
- )
319
- ```
320
-
321
- ### Running from CLI.
322
-
323
- 1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
324
- 1. Run `lmnr path/to/my/eval.py`
325
-
326
- ### Running from code
327
-
328
- Simply execute the function, e.g. `python3 path/to/my/eval.py`
@@ -2,17 +2,17 @@ lmnr/__init__.py,sha256=5Ks8UIicCzCBgwSz0MOX3I7jVruPMUO3SmxIwUoODzQ,231
2
2
  lmnr/cli.py,sha256=Ptvm5dsNLKUY5lwnN8XkT5GtCYjzpRNi2WvefknB3OQ,1079
3
3
  lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  lmnr/sdk/decorators.py,sha256=ii7Bqp6flaanIFSK6M1_ZZV-izp4o3hkR1MmY7wnFQQ,2227
5
- lmnr/sdk/evaluations.py,sha256=kVf6cZAF53HMYIdmlaTV5YD0UdPsv_YCzvs1Mts9Zps,13587
6
- lmnr/sdk/laminar.py,sha256=3LqzqhsSOHxz11_lxAdvqy_awtOnTdPTeYxYEZ3F4Go,19407
5
+ lmnr/sdk/evaluations.py,sha256=Tukl2pW_x13ittzG5XQpF1TweYo3fpD4eLInplQ4YYI,14152
6
+ lmnr/sdk/laminar.py,sha256=d5Vn0eSVIoI_SxFcTh71T-_w7_E_odvFDFiLU2YwzZo,16509
7
7
  lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
8
- lmnr/sdk/types.py,sha256=QB89q6WeN715x15ukoRVufXk6FSP_1pGn8QsUSIJG5U,5062
8
+ lmnr/sdk/types.py,sha256=HvaZEqVRduCZbkF7Cp8rgS5oBbc1qPvOD3PP9tFrRu4,4826
9
9
  lmnr/sdk/utils.py,sha256=s81p6uJehgJSaLWy3sR5fTpEDH7vzn3i_UujUHChl6M,3346
10
10
  lmnr/traceloop_sdk/.flake8,sha256=bCxuDlGx3YQ55QHKPiGJkncHanh9qGjQJUujcFa3lAU,150
11
11
  lmnr/traceloop_sdk/.python-version,sha256=9OLQBQVbD4zE4cJsPePhnAfV_snrPSoqEQw-PXgPMOs,6
12
12
  lmnr/traceloop_sdk/__init__.py,sha256=hp3q1OsFaGgaQCEanJrL38BJN32hWqCNVCSjYpndEsY,2957
13
13
  lmnr/traceloop_sdk/config/__init__.py,sha256=DliMGp2NjYAqRFLKpWQPUKjGMHRO8QsVfazBA1qENQ8,248
14
14
  lmnr/traceloop_sdk/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- lmnr/traceloop_sdk/decorators/base.py,sha256=5YCzAErlhv1bMDO1C9LBlLWYk3bwku0RLjGLR-TkR4c,5128
15
+ lmnr/traceloop_sdk/decorators/base.py,sha256=-b8Q738m3StdLTgHARx8zw78m9htynKkZFFTYURQnOA,5524
16
16
  lmnr/traceloop_sdk/instruments.py,sha256=oMvIASueW3GeChpjIdH-DD9aFBVB8OtHZ0HawppTrlI,942
17
17
  lmnr/traceloop_sdk/tests/__init__.py,sha256=RYnG0-8zbXL0-2Ste1mEBf5sN4d_rQjGTCgPBuaZC74,20
18
18
  lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml,sha256=26g0wRA0juicHg_XrhcE8H4vhs1lawDs0o0aLFn-I7w,3103
@@ -35,17 +35,17 @@ lmnr/traceloop_sdk/tests/test_sdk_initialization.py,sha256=fRaf6lrxFzJIN94P1Tav_
35
35
  lmnr/traceloop_sdk/tests/test_tasks.py,sha256=xlEx8BKp4yG83SCjK5WkPGfyC33JSrx4h8VyjVwGbgw,906
36
36
  lmnr/traceloop_sdk/tests/test_workflows.py,sha256=RVcfY3WAFIDZC15-aSua21aoQyYeWE7KypDyUsm-2EM,9372
37
37
  lmnr/traceloop_sdk/tracing/__init__.py,sha256=Ckq7zCM26VdJVB5tIZv0GTPyMZKyfso_KWD5yPHaqdo,66
38
- lmnr/traceloop_sdk/tracing/attributes.py,sha256=Rvglt_2IeZzKJ-mumrp9qAtTwHza34CrNgv4CvYihk0,221
38
+ lmnr/traceloop_sdk/tracing/attributes.py,sha256=PXwS1GCZKdjQSypl__BSkQNZhh21RyzwTPnDOh61bnQ,250
39
39
  lmnr/traceloop_sdk/tracing/content_allow_list.py,sha256=3feztm6PBWNelc8pAZUcQyEGyeSpNiVKjOaDk65l2ps,846
40
40
  lmnr/traceloop_sdk/tracing/context_manager.py,sha256=csVlB6kDmbgSPsROHwnddvGGblx55v6lJMRj0wsSMQM,304
41
- lmnr/traceloop_sdk/tracing/tracing.py,sha256=8plGdX6nErrPERgYXQDQRyBTtVgv2Ies46ph-msLLQE,35443
41
+ lmnr/traceloop_sdk/tracing/tracing.py,sha256=pB8vImUZRMaahkHLaQP73cbMtYDyvpvEdWsa49520Yo,36061
42
42
  lmnr/traceloop_sdk/utils/__init__.py,sha256=pNhf0G3vTd5ccoc03i1MXDbricSaiqCbi1DLWhSekK8,604
43
43
  lmnr/traceloop_sdk/utils/in_memory_span_exporter.py,sha256=H_4TRaThMO1H6vUQ0OpQvzJk_fZH0OOsRAM1iZQXsR8,2112
44
44
  lmnr/traceloop_sdk/utils/json_encoder.py,sha256=dK6b_axr70IYL7Vv-bu4wntvDDuyntoqsHaddqX7P58,463
45
45
  lmnr/traceloop_sdk/utils/package_check.py,sha256=TZSngzJOpFhfUZLXIs38cpMxQiZSmp0D-sCrIyhz7BA,251
46
46
  lmnr/traceloop_sdk/version.py,sha256=OlatFEFA4ttqSSIiV8jdE-sq3KG5zu2hnC4B4mzWF3s,23
47
- lmnr-0.4.12b4.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
48
- lmnr-0.4.12b4.dist-info/METADATA,sha256=u8pLIuLaw6if3D60lp5ekTKpVoJGxt3Z-gcjVKoDj7g,12196
49
- lmnr-0.4.12b4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
50
- lmnr-0.4.12b4.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
51
- lmnr-0.4.12b4.dist-info/RECORD,,
47
+ lmnr-0.4.14.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
48
+ lmnr-0.4.14.dist-info/METADATA,sha256=WSsRng3syFI0DugYr2-V6nRZIA1F_xo3_ikuHE6PDq8,11266
49
+ lmnr-0.4.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
50
+ lmnr-0.4.14.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
51
+ lmnr-0.4.14.dist-info/RECORD,,
File without changes