lmnr 0.4.17b0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. lmnr/__init__.py +5 -0
  2. lmnr/cli.py +39 -0
  3. lmnr/sdk/__init__.py +0 -0
  4. lmnr/sdk/decorators.py +66 -0
  5. lmnr/sdk/evaluations.py +354 -0
  6. lmnr/sdk/laminar.py +403 -0
  7. lmnr/sdk/log.py +39 -0
  8. lmnr/sdk/types.py +155 -0
  9. lmnr/sdk/utils.py +99 -0
  10. lmnr/traceloop_sdk/.flake8 +12 -0
  11. lmnr/traceloop_sdk/.python-version +1 -0
  12. lmnr/traceloop_sdk/__init__.py +89 -0
  13. lmnr/traceloop_sdk/config/__init__.py +9 -0
  14. lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  15. lmnr/traceloop_sdk/decorators/base.py +178 -0
  16. lmnr/traceloop_sdk/instruments.py +34 -0
  17. lmnr/traceloop_sdk/tests/__init__.py +1 -0
  18. lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +101 -0
  19. lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +99 -0
  20. lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +98 -0
  21. lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +98 -0
  22. lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +199 -0
  23. lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +202 -0
  24. lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +199 -0
  25. lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +96 -0
  26. lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +98 -0
  27. lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +199 -0
  28. lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +167 -0
  29. lmnr/traceloop_sdk/tests/conftest.py +111 -0
  30. lmnr/traceloop_sdk/tests/test_association_properties.py +229 -0
  31. lmnr/traceloop_sdk/tests/test_manual.py +48 -0
  32. lmnr/traceloop_sdk/tests/test_nested_tasks.py +47 -0
  33. lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +50 -0
  34. lmnr/traceloop_sdk/tests/test_sdk_initialization.py +57 -0
  35. lmnr/traceloop_sdk/tests/test_tasks.py +32 -0
  36. lmnr/traceloop_sdk/tests/test_workflows.py +262 -0
  37. lmnr/traceloop_sdk/tracing/__init__.py +1 -0
  38. lmnr/traceloop_sdk/tracing/attributes.py +9 -0
  39. lmnr/traceloop_sdk/tracing/content_allow_list.py +24 -0
  40. lmnr/traceloop_sdk/tracing/context_manager.py +13 -0
  41. lmnr/traceloop_sdk/tracing/tracing.py +913 -0
  42. lmnr/traceloop_sdk/utils/__init__.py +26 -0
  43. lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +61 -0
  44. lmnr/traceloop_sdk/utils/json_encoder.py +20 -0
  45. lmnr/traceloop_sdk/utils/package_check.py +8 -0
  46. lmnr/traceloop_sdk/version.py +1 -0
  47. lmnr-0.4.17b0.dist-info/LICENSE +75 -0
  48. lmnr-0.4.17b0.dist-info/METADATA +250 -0
  49. lmnr-0.4.17b0.dist-info/RECORD +50 -0
  50. lmnr-0.4.17b0.dist-info/WHEEL +4 -0
lmnr/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .sdk.evaluations import evaluate
2
+ from .sdk.laminar import Laminar
3
+ from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
+ from .sdk.decorators import observe
5
+ from .traceloop_sdk import Instruments
lmnr/cli.py ADDED
@@ -0,0 +1,39 @@
1
+ from argparse import ArgumentParser
2
+ import asyncio
3
+ import importlib
4
+ import os
5
+ import sys
6
+
7
+ from lmnr.sdk.evaluations import set_global_evaluation
8
+
9
+
10
+ # TODO: Refactor this code
11
+ async def run_evaluation(args):
12
+ sys.path.insert(0, os.getcwd())
13
+
14
+ with set_global_evaluation(True):
15
+ file = os.path.abspath(args.file)
16
+
17
+ spec = importlib.util.spec_from_file_location("run_eval", file)
18
+ mod = importlib.util.module_from_spec(spec)
19
+ spec.loader.exec_module(mod)
20
+
21
+ from lmnr.sdk.evaluations import _evaluation
22
+ evaluation = _evaluation
23
+ await evaluation.run()
24
+
25
+
26
+ def cli():
27
+ parser = ArgumentParser(
28
+ prog="lmnr",
29
+ description="CLI for Laminar",
30
+ )
31
+
32
+ subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
33
+
34
+ parser_eval = subparsers.add_parser("eval", description="Run an evaluation")
35
+ parser_eval.add_argument("file", help="A file containing the evaluation to run")
36
+ parser_eval.set_defaults(func=run_evaluation)
37
+
38
+ parsed = parser.parse_args()
39
+ asyncio.run(parsed.func(parsed))
lmnr/sdk/__init__.py ADDED
File without changes
lmnr/sdk/decorators.py ADDED
@@ -0,0 +1,66 @@
1
+ from lmnr.traceloop_sdk.decorators.base import (
2
+ entity_method,
3
+ aentity_method,
4
+ )
5
+ from opentelemetry.trace import INVALID_SPAN, get_current_span
6
+
7
+ from typing import Callable, Optional, TypeVar, cast
8
+ from typing_extensions import ParamSpec
9
+
10
+ from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
11
+ from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
12
+
13
+ from .utils import is_async
14
+
15
+
16
+ P = ParamSpec("P")
17
+ R = TypeVar("R")
18
+
19
+
20
+ def observe(
21
+ *,
22
+ name: Optional[str] = None,
23
+ user_id: Optional[str] = None,
24
+ session_id: Optional[str] = None,
25
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]:
26
+ """The main decorator entrypoint for Laminar. This is used to wrap
27
+ functions and methods to create spans.
28
+
29
+ Args:
30
+ name (Optional[str], optional): Name of the span. Function
31
+ name is used if not specified.
32
+ Defaults to None.
33
+ user_id (Optional[str], optional): User ID to associate
34
+ with the span and the following context.
35
+ Defaults to None.
36
+ session_id (Optional[str], optional): Session ID to associate with the
37
+ span and the following context. Defaults to None.
38
+
39
+ Raises:
40
+ Exception: re-raises the exception if the wrapped function raises
41
+ an exception
42
+
43
+ Returns:
44
+ R: Returns the result of the wrapped function
45
+ """
46
+
47
+ def decorator(func: Callable) -> Callable:
48
+ current_span = get_current_span()
49
+ if current_span != INVALID_SPAN:
50
+ if session_id is not None:
51
+ current_span.set_attribute(SESSION_ID, session_id)
52
+ if user_id is not None:
53
+ current_span.set_attribute(USER_ID, user_id)
54
+ association_properties = {}
55
+ if session_id is not None:
56
+ association_properties["session_id"] = session_id
57
+ if user_id is not None:
58
+ association_properties["user_id"] = user_id
59
+ update_association_properties(association_properties)
60
+ return (
61
+ aentity_method(name=name)(func)
62
+ if is_async(func)
63
+ else entity_method(name=name)(func)
64
+ )
65
+
66
+ return cast(Callable, decorator)
@@ -0,0 +1,354 @@
1
+ import asyncio
2
+ import re
3
+ import sys
4
+ from abc import ABC, abstractmethod
5
+ from contextlib import contextmanager
6
+ from typing import Any, Awaitable, Optional, Set, Union
7
+ import uuid
8
+
9
+ from tqdm import tqdm
10
+
11
+ from ..traceloop_sdk.instruments import Instruments
12
+ from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
13
+
14
+ from .laminar import Laminar as L
15
+ from .types import (
16
+ Datapoint,
17
+ EvaluationResultDatapoint,
18
+ EvaluatorFunction,
19
+ ExecutorFunction,
20
+ Numeric,
21
+ NumericTypes,
22
+ SpanType,
23
+ TraceType,
24
+ )
25
+ from .utils import is_async
26
+
27
+ DEFAULT_BATCH_SIZE = 5
28
+
29
+ _evaluation = None
30
+ _set_global_evaluation = False
31
+
32
+
33
+ @contextmanager
34
+ def set_global_evaluation(set_global_evaluation: bool):
35
+ global _set_global_evaluation
36
+ original = _set_global_evaluation
37
+ try:
38
+ _set_global_evaluation = set_global_evaluation
39
+ yield
40
+ finally:
41
+ _set_global_evaluation = original
42
+ pass
43
+
44
+
45
+ def get_evaluation_url(project_id: str, evaluation_id: str):
46
+ return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
47
+
48
+
49
+ def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
50
+ per_score_values = {}
51
+ for result in results:
52
+ for key, value in result.scores.items():
53
+ if key not in per_score_values:
54
+ per_score_values[key] = []
55
+ per_score_values[key].append(value)
56
+
57
+ average_scores = {}
58
+ for key, values in per_score_values.items():
59
+ average_scores[key] = sum(values) / len(values)
60
+
61
+ return average_scores
62
+
63
+
64
+ class EvaluationReporter:
65
+ def __init__(self):
66
+ pass
67
+
68
+ def start(self, length: int):
69
+ self.cli_progress = tqdm(
70
+ total=length,
71
+ bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
72
+ ncols=60,
73
+ )
74
+
75
+ def update(self, batch_length: int):
76
+ self.cli_progress.update(batch_length)
77
+
78
+ def stopWithError(self, error: Exception):
79
+ self.cli_progress.close()
80
+ sys.stderr.write(f"\nError: {error}\n")
81
+
82
+ def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
83
+ self.cli_progress.close()
84
+ print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
85
+ print("Average scores:")
86
+ for name, score in average_scores.items():
87
+ print(f"{name}: {score}")
88
+ print("\n")
89
+
90
+
91
+ class EvaluationDataset(ABC):
92
+ @abstractmethod
93
+ def __init__(self, *args, **kwargs):
94
+ pass
95
+
96
+ @abstractmethod
97
+ def __len__(self) -> int:
98
+ pass
99
+
100
+ @abstractmethod
101
+ def __getitem__(self, idx) -> Datapoint:
102
+ pass
103
+
104
+ def slice(self, start: int, end: int):
105
+ return [self[i] for i in range(max(start, 0), min(end, len(self)))]
106
+
107
+
108
+ class Evaluation:
109
+ def __init__(
110
+ self,
111
+ data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
112
+ executor: Any,
113
+ evaluators: dict[str, EvaluatorFunction],
114
+ group_id: Optional[str] = None,
115
+ name: Optional[str] = None,
116
+ batch_size: int = DEFAULT_BATCH_SIZE,
117
+ project_api_key: Optional[str] = None,
118
+ base_url: Optional[str] = None,
119
+ http_port: Optional[int] = None,
120
+ grpc_port: Optional[int] = None,
121
+ instruments: Optional[Set[Instruments]] = None,
122
+ ):
123
+ """
124
+ Initializes an instance of the Evaluations class.
125
+
126
+ Parameters:
127
+ data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
128
+ `data` is the input to the executor function,
129
+ `target` is the input to the evaluator function.
130
+ executor (Callable[..., Any]): The executor function.
131
+ Takes the data point + any additional arguments
132
+ and returns the output to evaluate.
133
+ evaluators (List[Callable[..., Any]]): List of evaluator functions.
134
+ Each evaluator function takes the output of the executor _and_
135
+ the target data, and returns a score. The score can be a
136
+ single number or a record of string keys and number values.
137
+ If the score is a single number, it will be named after the
138
+ evaluator function. If the function is anonymous, it will be
139
+ named `evaluator_${index}`, where index is the index of the
140
+ evaluator function in the list starting from 1.
141
+ group_id (Optional[str], optional): Group id of the evaluation.
142
+ Defaults to "default".
143
+ name (Optional[str], optional): The name of the evaluation.
144
+ It will be auto-generated if not provided.
145
+ batch_size (int, optional): The batch size for evaluation.
146
+ Defaults to DEFAULT_BATCH_SIZE.
147
+ project_api_key (Optional[str], optional): The project API key.
148
+ Defaults to an empty string.
149
+ base_url (Optional[str], optional): The base URL for the Laminar API.
150
+ Useful if self-hosted elsewhere.
151
+ Defaults to "https://api.lmnr.ai".
152
+ http_port (Optional[int], optional): The port for the Laminar API HTTP service.
153
+ Defaults to 443.
154
+ instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
155
+ Defaults to None. If None, all available instruments will be used.
156
+ """
157
+
158
+ if not evaluators:
159
+ raise ValueError("No evaluators provided")
160
+
161
+ # TODO: Compile regex once and then reuse it
162
+ for evaluator_name in evaluators:
163
+ if not re.match(r'^[\w\s-]+$', evaluator_name):
164
+ raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
165
+
166
+ self.is_finished = False
167
+ self.reporter = EvaluationReporter()
168
+ if isinstance(data, list):
169
+ self.data = [
170
+ (Datapoint.model_validate(point) if isinstance(point, dict) else point)
171
+ for point in data
172
+ ]
173
+ else:
174
+ self.data = data
175
+ self.executor = executor
176
+ self.evaluators = evaluators
177
+ self.group_id = group_id
178
+ self.name = name
179
+ self.batch_size = batch_size
180
+ L.initialize(
181
+ project_api_key=project_api_key,
182
+ base_url=base_url,
183
+ http_port=http_port,
184
+ grpc_port=grpc_port,
185
+ instruments=instruments,
186
+ )
187
+
188
+ def run(self) -> Union[None, Awaitable[None]]:
189
+ if self.is_finished:
190
+ raise Exception("Evaluation is already finished")
191
+
192
+ loop = asyncio.get_event_loop()
193
+ if loop.is_running():
194
+ return loop.create_task(self._run())
195
+ else:
196
+ return loop.run_until_complete(self._run())
197
+
198
+ async def _run(self) -> None:
199
+ self.reporter.start(
200
+ len(self.data),
201
+ )
202
+
203
+ try:
204
+ result_datapoints = await self.evaluate_in_batches()
205
+ except Exception as e:
206
+ self.reporter.stopWithError(e)
207
+ self.is_finished = True
208
+ return
209
+ else:
210
+ evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
211
+ average_scores = get_average_scores(result_datapoints)
212
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
213
+ self.is_finished = True
214
+
215
+ async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
216
+ result_datapoints = []
217
+ for i in range(0, len(self.data), self.batch_size):
218
+ batch = (
219
+ self.data[i: i + self.batch_size]
220
+ if isinstance(self.data, list)
221
+ else self.data.slice(i, i + self.batch_size)
222
+ )
223
+ batch_datapoints = await self._evaluate_batch(batch)
224
+ result_datapoints.extend(batch_datapoints)
225
+ self.reporter.update(len(batch))
226
+ return result_datapoints
227
+
228
+ async def _evaluate_batch(
229
+ self, batch: list[Datapoint]
230
+ ) -> list[EvaluationResultDatapoint]:
231
+ batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
232
+ results = await asyncio.gather(*batch_promises)
233
+ return results
234
+
235
+ async def _evaluate_datapoint(
236
+ self, datapoint: Datapoint
237
+ ) -> EvaluationResultDatapoint:
238
+ with L.start_as_current_span("evaluation") as evaluation_span:
239
+ L._set_trace_type(trace_type=TraceType.EVALUATION)
240
+ evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
241
+ with L.start_as_current_span(
242
+ "executor", input={"data": datapoint.data}
243
+ ) as executor_span:
244
+ executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
245
+ output = (
246
+ await self.executor(datapoint.data)
247
+ if is_async(self.executor)
248
+ else self.executor(datapoint.data)
249
+ )
250
+ L.set_span_output(output)
251
+ target = datapoint.target
252
+
253
+ # Iterate over evaluators
254
+ scores: dict[str, Numeric] = {}
255
+ for evaluator_name, evaluator in self.evaluators.items():
256
+ with L.start_as_current_span(
257
+ evaluator_name, input={"output": output, "target": target}
258
+ ) as evaluator_span:
259
+ evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
260
+ value = (
261
+ await evaluator(output, target)
262
+ if is_async(evaluator)
263
+ else evaluator(output, target)
264
+ )
265
+ L.set_span_output(value)
266
+
267
+ # If evaluator returns a single number, use evaluator name as key
268
+ if isinstance(value, NumericTypes):
269
+ scores[evaluator_name] = value
270
+ else:
271
+ scores.update(value)
272
+
273
+ trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
274
+ return EvaluationResultDatapoint(
275
+ data=datapoint.data,
276
+ target=target,
277
+ executor_output=output,
278
+ scores=scores,
279
+ trace_id=trace_id,
280
+ )
281
+
282
+
283
+ def evaluate(
284
+ data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
285
+ executor: ExecutorFunction,
286
+ evaluators: dict[str, EvaluatorFunction],
287
+ group_id: Optional[str] = None,
288
+ name: Optional[str] = None,
289
+ batch_size: int = DEFAULT_BATCH_SIZE,
290
+ project_api_key: Optional[str] = None,
291
+ base_url: Optional[str] = None,
292
+ http_port: Optional[int] = None,
293
+ grpc_port: Optional[int] = None,
294
+ instruments: Optional[Set[Instruments]] = None,
295
+ ) -> Optional[Awaitable[None]]:
296
+ """
297
+ If added to the file which is called through lmnr eval command, then simply registers the evaluation.
298
+ Otherwise, if there is no event loop, creates it and runs the evaluation until completion.
299
+ If there is an event loop, schedules the evaluation as a task in the event loop and returns an awaitable handle.
300
+
301
+ Parameters:
302
+ data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
303
+ `data` is the input to the executor function,
304
+ `target` is the input to the evaluator function.
305
+ executor (Callable[..., Any]): The executor function.
306
+ Takes the data point + any additional arguments
307
+ and returns the output to evaluate.
308
+ evaluators (List[Callable[..., Any]]): List of evaluator functions.
309
+ Each evaluator function takes the output of the executor _and_
310
+ the target data, and returns a score. The score can be a
311
+ single number or a record of string keys and number values.
312
+ If the score is a single number, it will be named after the
313
+ evaluator function. If the function is anonymous, it will be
314
+ named `evaluator_${index}`, where index is the index of the
315
+ evaluator function in the list starting from 1.
316
+ group_id (Optional[str], optional): Group name which is same
317
+ as the feature you are evaluating in your project or application.
318
+ Defaults to "default".
319
+ name (Optional[str], optional): Optional name of the evaluation. Used to easily
320
+ identify the evaluation in the group.
321
+ batch_size (int, optional): The batch size for evaluation.
322
+ Defaults to DEFAULT_BATCH_SIZE.
323
+ project_api_key (Optional[str], optional): The project API key.
324
+ Defaults to an empty string.
325
+ base_url (Optional[str], optional): The base URL for the Laminar API.
326
+ Useful if self-hosted elsewhere.
327
+ Defaults to "https://api.lmnr.ai".
328
+ http_port (Optional[int], optional): The port for the Laminar API HTTP service.
329
+ Defaults to 443.
330
+ grpc_port (Optional[int], optional): The port for the Laminar API gRPC service.
331
+ Defaults to 8443.
332
+ instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
333
+ Defaults to None. If None, all available instruments will be used.
334
+ """
335
+
336
+ evaluation = Evaluation(
337
+ data=data,
338
+ executor=executor,
339
+ evaluators=evaluators,
340
+ group_id=group_id,
341
+ name=name,
342
+ batch_size=batch_size,
343
+ project_api_key=project_api_key,
344
+ base_url=base_url,
345
+ http_port=http_port,
346
+ grpc_port=grpc_port,
347
+ instruments=instruments,
348
+ )
349
+
350
+ global _evaluation
351
+ if _set_global_evaluation:
352
+ _evaluation = evaluation
353
+ else:
354
+ return evaluation.run()