lmnr 0.4.11__tar.gz → 0.4.12b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {lmnr-0.4.11 → lmnr-0.4.12b1}/PKG-INFO +3 -1
  2. {lmnr-0.4.11 → lmnr-0.4.12b1}/pyproject.toml +5 -6
  3. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/__init__.py +1 -1
  4. lmnr-0.4.12b1/src/lmnr/cli.py +39 -0
  5. lmnr-0.4.12b1/src/lmnr/sdk/evaluations.py +290 -0
  6. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/laminar.py +49 -34
  7. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/types.py +6 -2
  8. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/__init__.py +0 -13
  9. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -59
  10. lmnr-0.4.11/src/lmnr/sdk/evaluations.py +0 -178
  11. {lmnr-0.4.11 → lmnr-0.4.12b1}/LICENSE +0 -0
  12. {lmnr-0.4.11 → lmnr-0.4.12b1}/README.md +0 -0
  13. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/__init__.py +0 -0
  14. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/decorators.py +0 -0
  15. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/log.py +0 -0
  16. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/sdk/utils.py +0 -0
  17. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  18. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/.python-version +0 -0
  19. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
  20. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  21. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
  22. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  23. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/metrics/__init__.py +0 -0
  24. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/metrics/metrics.py +0 -0
  25. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  26. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  27. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  28. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  29. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  30. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  31. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  32. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  33. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  34. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  35. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  36. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  37. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  38. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  39. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  40. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  41. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  42. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  43. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  44. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  45. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  46. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  47. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  48. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/tracing/manual.py +0 -0
  49. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  50. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  51. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  52. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  53. {lmnr-0.4.11 → lmnr-0.4.12b1}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.11
3
+ Version: 0.4.12b1
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: argparse (>=1.0,<2.0)
14
15
  Requires-Dist: asyncio (>=3.0,<4.0)
15
16
  Requires-Dist: backoff (>=2.0,<3.0)
16
17
  Requires-Dist: colorama (>=0.4,<0.5)
@@ -54,6 +55,7 @@ Requires-Dist: pydantic (>=2.7,<3.0)
54
55
  Requires-Dist: python-dotenv (>=1.0,<2.0)
55
56
  Requires-Dist: requests (>=2.0,<3.0)
56
57
  Requires-Dist: tenacity (>=8.0,<9.0)
58
+ Requires-Dist: tqdm (>=4.0,<5.0)
57
59
  Description-Content-Type: text/markdown
58
60
 
59
61
  # Laminar Python
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.11"
3
+ version = "0.4.12b1"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.11"
14
+ version = "0.4.12b1"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -62,6 +62,8 @@ opentelemetry-instrumentation-weaviate = "^0.30.0"
62
62
  opentelemetry-instrumentation-alephalpha = "^0.30.0"
63
63
  opentelemetry-instrumentation-marqo = "^0.30.0"
64
64
  opentelemetry-instrumentation-groq = "^0.30.0"
65
+ tqdm = "~=4.0"
66
+ argparse = "~=1.0"
65
67
 
66
68
  [tool.poetry.group.dev.dependencies]
67
69
  autopep8 = "^2.2.0"
@@ -83,11 +85,8 @@ langchain-openai = "^0.1.15"
83
85
  requires = ["poetry-core"]
84
86
  build-backend = "poetry.core.masonry.api"
85
87
 
86
- [project.entry-points.console_scripts]
87
- lmnr = "lmnr.cli.cli:cli"
88
-
89
88
  [tool.poetry.scripts]
90
- lmnr = "lmnr.cli.cli:cli"
89
+ lmnr = "lmnr.cli:cli"
91
90
 
92
91
  [project.optional-dependencies]
93
92
  test = ["pytest"]
@@ -1,4 +1,4 @@
1
- from .sdk.evaluations import Evaluation
1
+ from .sdk.evaluations import evaluate
2
2
  from .sdk.laminar import Laminar
3
3
  from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
4
  from .sdk.decorators import observe
@@ -0,0 +1,39 @@
1
+ from argparse import ArgumentParser
2
+ import asyncio
3
+ import importlib
4
+ import os
5
+ import sys
6
+
7
+ from lmnr.sdk.evaluations import set_global_evaluation
8
+
9
+
10
+ # TODO: Refactor this code
11
+ async def run_evaluation(args):
12
+ sys.path.insert(0, os.getcwd())
13
+
14
+ with set_global_evaluation(True):
15
+ file = os.path.abspath(args.file)
16
+
17
+ spec = importlib.util.spec_from_file_location("run_eval", file)
18
+ mod = importlib.util.module_from_spec(spec)
19
+ spec.loader.exec_module(mod)
20
+
21
+ from lmnr.sdk.evaluations import _evaluation
22
+ evaluation = _evaluation
23
+ await evaluation.run()
24
+
25
+
26
+ def cli():
27
+ parser = ArgumentParser(
28
+ prog="lmnr",
29
+ description="CLI for Laminar",
30
+ )
31
+
32
+ subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
33
+
34
+ parser_eval = subparsers.add_parser("eval", description="Run an evaluation")
35
+ parser_eval.add_argument("file", help="A file containing the evaluation to run")
36
+ parser_eval.set_defaults(func=run_evaluation)
37
+
38
+ parsed = parser.parse_args()
39
+ asyncio.run(parsed.func(parsed))
@@ -0,0 +1,290 @@
1
+ import asyncio
2
+ import sys
3
+ from abc import ABC, abstractmethod
4
+ from contextlib import contextmanager
5
+ from typing import Any, Awaitable, Optional, Union
6
+
7
+ from tqdm import tqdm
8
+
9
+ from .laminar import Laminar as L
10
+ from .types import CreateEvaluationResponse, Datapoint, EvaluationResultDatapoint, Numeric
11
+ from .utils import is_async
12
+
13
+ DEFAULT_BATCH_SIZE = 5
14
+
15
+ _evaluation = None
16
+ _set_global_evaluation = False
17
+
18
+
19
+ @contextmanager
20
+ def set_global_evaluation(set_global_evaluation: bool):
21
+ global _set_global_evaluation
22
+ original = _set_global_evaluation
23
+ try:
24
+ _set_global_evaluation = set_global_evaluation
25
+ yield
26
+ finally:
27
+ _set_global_evaluation = original
28
+ pass
29
+
30
+
31
+ def get_evaluation_url(project_id: str, evaluation_id: str):
32
+ return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
33
+
34
+
35
+ class EvaluationReporter:
36
+ def __init__(self):
37
+ pass
38
+
39
+ def start(self, name: str, project_id: str, id: str, length: int):
40
+ print(f"Running evaluation {name}...\n")
41
+ print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
42
+ self.cli_progress = tqdm(total=length, bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}", ncols=60)
43
+
44
+ def update(self, batch_length: int):
45
+ self.cli_progress.update(batch_length)
46
+
47
+ def stopWithError(self, error: Exception):
48
+ self.cli_progress.close()
49
+ sys.stderr.write(f"\nError: {error}\n")
50
+
51
+ def stop(self, average_scores: dict[str, Numeric]):
52
+ self.cli_progress.close()
53
+ print("\nAverage scores:")
54
+ for (name, score) in average_scores.items():
55
+ print(f"{name}: {score}")
56
+ print("\n")
57
+
58
+
59
+ class EvaluationDataset(ABC):
60
+ @abstractmethod
61
+ def __init__(self, *args, **kwargs):
62
+ pass
63
+
64
+ @abstractmethod
65
+ def __len__(self) -> int:
66
+ pass
67
+
68
+ @abstractmethod
69
+ def __getitem__(self, idx) -> Datapoint:
70
+ pass
71
+
72
+ def slice(self, start: int, end: int):
73
+ return [self[i] for i in range(max(start, 0), min(end, len(self)))]
74
+
75
+
76
+ class Evaluation:
77
+ def __init__(
78
+ self,
79
+ name: str,
80
+ data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
81
+ executor: Any,
82
+ evaluators: list[Any],
83
+ batch_size: int = DEFAULT_BATCH_SIZE,
84
+ project_api_key: Optional[str] = None,
85
+ base_url: Optional[str] = None,
86
+ http_port: Optional[int] = None,
87
+ ):
88
+ """
89
+ Initializes an instance of the Evaluations class.
90
+
91
+ Parameters:
92
+ name (str): The name of the evaluation.
93
+ data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
94
+ `data` is the input to the executor function,
95
+ `target` is the input to the evaluator function.
96
+ executor (Callable[..., Any]): The executor function.
97
+ Takes the data point + any additional arguments
98
+ and returns the output to evaluate.
99
+ evaluators (List[Callable[..., Any]]): List of evaluator functions.
100
+ Each evaluator function takes the output of the executor _and_
101
+ the target data, and returns a score. The score can be a
102
+ single number or a record of string keys and number values.
103
+ If the score is a single number, it will be named after the
104
+ evaluator function. If the function is anonymous, it will be
105
+ named `evaluator_${index}`, where index is the index of the
106
+ evaluator function in the list starting from 1.
107
+ batch_size (int, optional): The batch size for evaluation.
108
+ Defaults to DEFAULT_BATCH_SIZE.
109
+ project_api_key (Optional[str], optional): The project API key.
110
+ Defaults to an empty string.
111
+ base_url (Optional[str], optional): The base URL for the Laminar API.
112
+ Useful if self-hosted elsewhere.
113
+ Defaults to "https://api.lmnr.ai".
114
+ http_port (Optional[int], optional): The port for the Laminar API HTTP service.
115
+ Defaults to 443.
116
+ """
117
+
118
+ self.is_finished = False
119
+ self.name = name
120
+ self.reporter = EvaluationReporter()
121
+ self.executor = executor
122
+ self.evaluators = dict(
123
+ zip(
124
+ [
125
+ (
126
+ e.__name__
127
+ if e.__name__ and e.__name__ != "<lambda>"
128
+ else f"evaluator_{i+1}"
129
+ )
130
+ for i, e in enumerate(evaluators)
131
+ ],
132
+ evaluators,
133
+ )
134
+ )
135
+ self.evaluator_names = list(self.evaluators.keys())
136
+ if isinstance(data, list):
137
+ self.data = [
138
+ (
139
+ Datapoint.model_validate(point)
140
+ if isinstance(point, dict)
141
+ else point
142
+ )
143
+ for point in data
144
+ ]
145
+ else:
146
+ self.data = data
147
+ self.batch_size = batch_size
148
+ L.initialize(
149
+ project_api_key=project_api_key,
150
+ base_url=base_url,
151
+ http_port=http_port,
152
+ instruments=set(),
153
+ )
154
+
155
+ def run(self) -> Union[None, Awaitable[None]]:
156
+ """Runs the evaluation.
157
+
158
+ Creates a new evaluation if no evaluation with such name exists, or
159
+ adds data to an existing one otherwise. Evaluates data points in
160
+ batches of `self.batch_size`. The executor
161
+ function is called on each data point to get the output,
162
+ and then evaluate it by each evaluator function.
163
+
164
+ Usage:
165
+ ```python
166
+ # in a synchronous context:
167
+ e.run()
168
+ # in an asynchronous context:
169
+ await e.run()
170
+ ```
171
+
172
+ """
173
+ if self.is_finished:
174
+ raise Exception("Evaluation is already finished")
175
+
176
+ loop = asyncio.get_event_loop()
177
+ if loop.is_running():
178
+ return loop.create_task(self._run())
179
+ else:
180
+ return loop.run_until_complete(self._run())
181
+
182
+ async def _run(self) -> None:
183
+ evaluation = L.create_evaluation(self.name)
184
+ self.reporter.start(
185
+ evaluation.name,
186
+ evaluation.projectId,
187
+ evaluation.id,
188
+ len(self.data),
189
+ )
190
+
191
+ try:
192
+ await self.evaluate_in_batches(evaluation)
193
+ except Exception as e:
194
+ L.update_evaluation_status(evaluation.id, "Error")
195
+ self.reporter.stopWithError(e)
196
+ self.is_finished = True
197
+ return
198
+
199
+ # If we update with status "Finished", we expect averageScores to be not empty
200
+ updated_evaluation = L.update_evaluation_status(evaluation.id, "Finished")
201
+ self.reporter.stop(updated_evaluation.averageScores)
202
+ self.is_finished = True
203
+
204
+ async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
205
+ for i in range(0, len(self.data), self.batch_size):
206
+ batch = (
207
+ self.data[i: i + self.batch_size]
208
+ if isinstance(self.data, list)
209
+ else self.data.slice(i, i + self.batch_size)
210
+ )
211
+ try:
212
+ results = await self._evaluate_batch(batch)
213
+ L.post_evaluation_results(evaluation.id, results)
214
+ except Exception as e:
215
+ print(f"Error evaluating batch: {e}")
216
+ finally:
217
+ self.reporter.update(len(batch))
218
+
219
+ async def _evaluate_batch(self, batch: list[Datapoint]) -> list[EvaluationResultDatapoint]:
220
+ batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
221
+ results = await asyncio.gather(*batch_promises)
222
+ return results
223
+
224
+ async def _evaluate_datapoint(self, datapoint) -> EvaluationResultDatapoint:
225
+ output = (
226
+ await self.executor(datapoint.data)
227
+ if is_async(self.executor)
228
+ else self.executor(datapoint.data)
229
+ )
230
+ target = datapoint.target
231
+
232
+ # Iterate over evaluators
233
+ scores: dict[str, Numeric] = {}
234
+ for evaluator_name in self.evaluator_names:
235
+ evaluator = self.evaluators[evaluator_name]
236
+ value = (
237
+ await evaluator(output, target)
238
+ if is_async(evaluator)
239
+ else evaluator(output, target)
240
+ )
241
+
242
+ # If evaluator returns a single number, use evaluator name as key
243
+ if isinstance(value, Numeric):
244
+ scores[evaluator_name] = value
245
+ else:
246
+ scores.update(value)
247
+
248
+ return EvaluationResultDatapoint(
249
+ data=datapoint.data,
250
+ target=target,
251
+ executorOutput=output,
252
+ scores=scores,
253
+ )
254
+
255
+
256
+ def evaluate(
257
+ name: str,
258
+ data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
259
+ executor: Any,
260
+ evaluators: list[Any],
261
+ batch_size: int = DEFAULT_BATCH_SIZE,
262
+ project_api_key: Optional[str] = None,
263
+ base_url: Optional[str] = None,
264
+ http_port: Optional[int] = None,
265
+ ) -> Optional[Awaitable[None]]:
266
+ """
267
+ Run evaluation.
268
+
269
+ If `_set_global_evaluation` is `True`, sets the global evaluation to be run in another part of the program.
270
+
271
+ Otherwise, if there is no event loop, runs the evaluation in the current thread until completion.
272
+ If there is an event loop, schedules the evaluation as a task in the event loop and returns an awaitable handle.
273
+ """
274
+
275
+ evaluation = Evaluation(
276
+ name,
277
+ data,
278
+ executor,
279
+ evaluators,
280
+ batch_size,
281
+ project_api_key,
282
+ base_url,
283
+ http_port,
284
+ )
285
+
286
+ global _evaluation
287
+ if _set_global_evaluation:
288
+ _evaluation = evaluation
289
+ else:
290
+ return evaluation.run()
@@ -37,11 +37,13 @@ from .types import (
37
37
  PipelineRunResponse,
38
38
  NodeInput,
39
39
  PipelineRunRequest,
40
+ UpdateEvaluationResponse,
40
41
  )
41
42
 
42
43
 
43
44
  class Laminar:
44
- __base_url: str = "https://api.lmnr.ai:8443"
45
+ __base_http_url: str
46
+ __base_grpc_url: str
45
47
  __project_api_key: Optional[str] = None
46
48
  __env: dict[str, str] = {}
47
49
  __initialized: bool = False
@@ -52,6 +54,8 @@ class Laminar:
52
54
  project_api_key: Optional[str] = None,
53
55
  env: dict[str, str] = {},
54
56
  base_url: Optional[str] = None,
57
+ http_port: Optional[int] = None,
58
+ grpc_port: Optional[int] = None,
55
59
  instruments: Optional[Set[Instruments]] = None,
56
60
  ):
57
61
  """Initialize Laminar context across the application.
@@ -71,13 +75,12 @@ class Laminar:
71
75
  overriden at request time. Usually, model
72
76
  provider keys are stored here.
73
77
  Defaults to {}.
74
- base_url (Optional[str], optional): Url of Laminar endpoint,
75
- or the customopen telemetry ingester.
76
- If not specified, defaults to
77
- https://api.lmnr.ai:8443.
78
- For locally hosted Laminar, default setting
79
- must be http://localhost:8001
80
- Defaults to None.
78
+ base_url (Optional[str], optional): Laminar API url.
79
+ If not specified, defaults to https://api.lmnr.ai.
80
+ http_port (Optional[int], optional): Laminar API http port.
81
+ If not specified, defaults to 443.
82
+ grpc_port (Optional[int], optional): Laminar API grpc port.
83
+ If not specified, defaults to 8443.
81
84
 
82
85
  Raises:
83
86
  ValueError: If project API key is not set
@@ -85,7 +88,7 @@ class Laminar:
85
88
  cls.__project_api_key = project_api_key or os.environ.get(
86
89
  "LMNR_PROJECT_API_KEY"
87
90
  )
88
- if not project_api_key:
91
+ if not cls.__project_api_key:
89
92
  dotenv_path = dotenv.find_dotenv(usecwd=True)
90
93
  cls.__project_api_key = dotenv.get_key(
91
94
  dotenv_path=dotenv_path, key_to_get="LMNR_PROJECT_API_KEY"
@@ -96,14 +99,16 @@ class Laminar:
96
99
  " your project API key or set the LMNR_PROJECT_API_KEY"
97
100
  " environment variable in your environment or .env file"
98
101
  )
99
- if base_url is not None:
100
- cls.__base_url = base_url
102
+
103
+ cls.__base_http_url = f"{base_url or 'https://api.lmnr.ai'}:{http_port or 443}"
104
+ cls.__base_grpc_url = f"{base_url or 'https://api.lmnr.ai'}:{grpc_port or 8443}"
105
+
101
106
  cls.__env = env
102
107
  cls.__initialized = True
103
108
  cls._initialize_logger()
104
109
  Traceloop.init(
105
110
  exporter=OTLPSpanExporter(
106
- endpoint=cls.__base_url,
111
+ endpoint=cls.__base_grpc_url,
107
112
  headers={"authorization": f"Bearer {cls.__project_api_key}"},
108
113
  ),
109
114
  instruments=instruments,
@@ -190,7 +195,7 @@ class Laminar:
190
195
  raise ValueError(f"Invalid request: {e}")
191
196
 
192
197
  response = requests.post(
193
- cls.__base_url + "/v1/pipeline/run",
198
+ cls.__base_http_url + "/v1/pipeline/run",
194
199
  data=json.dumps(request.to_dict()),
195
200
  headers=cls._headers(),
196
201
  )
@@ -292,7 +297,7 @@ class Laminar:
292
297
  )
293
298
  return
294
299
 
295
- current_span.add_event(name, event)
300
+ current_span.add_event(name, event, timestamp)
296
301
 
297
302
  @classmethod
298
303
  @contextmanager
@@ -407,7 +412,7 @@ class Laminar:
407
412
  @classmethod
408
413
  def create_evaluation(cls, name: str) -> CreateEvaluationResponse:
409
414
  response = requests.post(
410
- cls.__base_url + "/v1/evaluations",
415
+ cls.__base_http_url + "/v1/evaluations",
411
416
  data=json.dumps({"name": name}),
412
417
  headers=cls._headers(),
413
418
  )
@@ -421,14 +426,14 @@ class Laminar:
421
426
 
422
427
  @classmethod
423
428
  def post_evaluation_results(
424
- cls, evaluation_name: str, data: list[EvaluationResultDatapoint]
429
+ cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
425
430
  ) -> requests.Response:
426
431
  body = {
427
- "name": evaluation_name,
428
- "points": data,
432
+ "evaluationId": str(evaluation_id),
433
+ "points": [datapoint.model_dump() for datapoint in data],
429
434
  }
430
435
  response = requests.post(
431
- cls.__base_url + "/v1/evaluation-datapoints",
436
+ cls.__base_http_url + "/v1/evaluation-datapoints",
432
437
  data=json.dumps(body),
433
438
  headers=cls._headers(),
434
439
  )
@@ -446,28 +451,38 @@ class Laminar:
446
451
 
447
452
  @classmethod
448
453
  def update_evaluation_status(
449
- cls, evaluation_name: str, status: str
450
- ) -> requests.Response:
454
+ cls, evaluation_id: str, status: str
455
+ ) -> UpdateEvaluationResponse:
456
+ """
457
+ Updates the status of an evaluation. Returns the updated evaluation object.
458
+
459
+ Args:
460
+ evaluation_id (str): The ID of the evaluation to update.
461
+ status (str): The status to set for the evaluation.
462
+
463
+ Returns:
464
+ UpdateEvaluationResponse: The updated evaluation response.
465
+
466
+ Raises:
467
+ ValueError: If the request fails.
468
+ """
451
469
  body = {
452
- "name": evaluation_name,
453
470
  "status": status,
454
471
  }
455
- response = requests.put(
456
- cls.__base_url + "/v1/evaluations/",
472
+ url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
473
+
474
+ response = requests.post(
475
+ url,
457
476
  data=json.dumps(body),
458
477
  headers=cls._headers(),
459
478
  )
460
479
  if response.status_code != 200:
461
- try:
462
- resp_json = response.json()
463
- raise ValueError(
464
- f"Failed to send evaluation status. Response: {json.dumps(resp_json)}"
465
- )
466
- except Exception:
467
- raise ValueError(
468
- f"Failed to send evaluation status. Error: {response.text}"
469
- )
470
- return response
480
+ raise ValueError(
481
+ f"Failed to update evaluation status {evaluation_id}. "
482
+ f"Response: {response.text}"
483
+ )
484
+
485
+ return UpdateEvaluationResponse.model_validate(response.json())
471
486
 
472
487
  @classmethod
473
488
  def _headers(cls):
@@ -79,7 +79,7 @@ EvaluationDatapointTarget = dict[str, Any]
79
79
 
80
80
 
81
81
  # EvaluationDatapoint is a single data point in the evaluation
82
- class EvaluationDatapoint(pydantic.BaseModel):
82
+ class Datapoint(pydantic.BaseModel):
83
83
  # input to the executor function. Must be a dict with string keys
84
84
  data: EvaluationDatapointData
85
85
  # input to the evaluator function (alongside the executor output).
@@ -114,10 +114,14 @@ class CreateEvaluationResponse(pydantic.BaseModel):
114
114
  status: EvaluationStatus
115
115
  projectId: uuid.UUID
116
116
  metadata: Optional[dict[str, Any]] = None
117
+ averageScores: Optional[dict[str, Numeric]] = None
118
+
119
+
120
+ UpdateEvaluationResponse = CreateEvaluationResponse
117
121
 
118
122
 
119
123
  class EvaluationResultDatapoint(pydantic.BaseModel):
120
124
  data: EvaluationDatapointData
121
125
  target: EvaluationDatapointTarget
122
- executor_output: ExecutorFunctionReturnType
126
+ executorOutput: ExecutorFunctionReturnType
123
127
  scores: dict[str, Numeric]
@@ -55,9 +55,6 @@ class Traceloop:
55
55
 
56
56
  enable_content_tracing = is_content_tracing_enabled()
57
57
 
58
- if exporter or processor:
59
- print(Fore.GREEN + "Laminar exporting traces to a custom exporter")
60
-
61
58
  headers = os.getenv("TRACELOOP_HEADERS") or headers
62
59
 
63
60
  if isinstance(headers, str):
@@ -78,17 +75,7 @@ class Traceloop:
78
75
  print(Fore.RESET)
79
76
  return
80
77
 
81
- if not exporter and not processor and headers:
82
- print(
83
- Fore.GREEN
84
- + f"Laminar exporting traces to {api_endpoint}, authenticating with custom headers"
85
- )
86
-
87
78
  if api_key and not exporter and not processor and not headers:
88
- print(
89
- Fore.GREEN
90
- + f"Laminar exporting traces to {api_endpoint} authenticating with bearer token"
91
- )
92
79
  headers = {
93
80
  "Authorization": f"Bearer {api_key}",
94
81
  }
@@ -124,46 +124,34 @@ class TracerWrapper(object):
124
124
  # this makes sure otel context is propagated so we always want it
125
125
  ThreadingInstrumentor().instrument()
126
126
 
127
- instrument_set = False
128
127
  if instruments is None:
129
128
  init_instrumentations(should_enrich_metrics)
130
- instrument_set = True
131
129
  else:
132
130
  for instrument in instruments:
133
131
  if instrument == Instruments.OPENAI:
134
132
  if not init_openai_instrumentor(should_enrich_metrics):
135
133
  print(Fore.RED + "Warning: OpenAI library does not exist.")
136
134
  print(Fore.RESET)
137
- else:
138
- instrument_set = True
139
135
  elif instrument == Instruments.ANTHROPIC:
140
136
  if not init_anthropic_instrumentor(should_enrich_metrics):
141
137
  print(
142
138
  Fore.RED + "Warning: Anthropic library does not exist."
143
139
  )
144
140
  print(Fore.RESET)
145
- else:
146
- instrument_set = True
147
141
  elif instrument == Instruments.COHERE:
148
142
  if not init_cohere_instrumentor():
149
143
  print(Fore.RED + "Warning: Cohere library does not exist.")
150
144
  print(Fore.RESET)
151
- else:
152
- instrument_set = True
153
145
  elif instrument == Instruments.PINECONE:
154
146
  if not init_pinecone_instrumentor():
155
147
  print(
156
148
  Fore.RED + "Warning: Pinecone library does not exist."
157
149
  )
158
150
  print(Fore.RESET)
159
- else:
160
- instrument_set = True
161
151
  elif instrument == Instruments.CHROMA:
162
152
  if not init_chroma_instrumentor():
163
153
  print(Fore.RED + "Warning: Chroma library does not exist.")
164
154
  print(Fore.RESET)
165
- else:
166
- instrument_set = True
167
155
  elif instrument == Instruments.GOOGLE_GENERATIVEAI:
168
156
  if not init_google_generativeai_instrumentor():
169
157
  print(
@@ -171,44 +159,32 @@ class TracerWrapper(object):
171
159
  + "Warning: Google Generative AI library does not exist."
172
160
  )
173
161
  print(Fore.RESET)
174
- else:
175
- instrument_set = True
176
162
  elif instrument == Instruments.LANGCHAIN:
177
163
  if not init_langchain_instrumentor():
178
164
  print(
179
165
  Fore.RED + "Warning: LangChain library does not exist."
180
166
  )
181
167
  print(Fore.RESET)
182
- else:
183
- instrument_set = True
184
168
  elif instrument == Instruments.MISTRAL:
185
169
  if not init_mistralai_instrumentor():
186
170
  print(
187
171
  Fore.RED + "Warning: MistralAI library does not exist."
188
172
  )
189
173
  print(Fore.RESET)
190
- else:
191
- instrument_set = True
192
174
  elif instrument == Instruments.OLLAMA:
193
175
  if not init_ollama_instrumentor():
194
176
  print(Fore.RED + "Warning: Ollama library does not exist.")
195
177
  print(Fore.RESET)
196
- else:
197
- instrument_set = True
198
178
  elif instrument == Instruments.LLAMA_INDEX:
199
179
  if not init_llama_index_instrumentor():
200
180
  print(
201
181
  Fore.RED + "Warning: LlamaIndex library does not exist."
202
182
  )
203
183
  print(Fore.RESET)
204
- else:
205
- instrument_set = True
206
184
  elif instrument == Instruments.MILVUS:
207
185
  if not init_milvus_instrumentor():
208
186
  print(Fore.RED + "Warning: Milvus library does not exist.")
209
187
  print(Fore.RESET)
210
- else:
211
- instrument_set = True
212
188
  elif instrument == Instruments.TRANSFORMERS:
213
189
  if not init_transformers_instrumentor():
214
190
  print(
@@ -216,72 +192,52 @@ class TracerWrapper(object):
216
192
  + "Warning: Transformers library does not exist."
217
193
  )
218
194
  print(Fore.RESET)
219
- else:
220
- instrument_set = True
221
195
  elif instrument == Instruments.TOGETHER:
222
196
  if not init_together_instrumentor():
223
197
  print(
224
198
  Fore.RED + "Warning: TogetherAI library does not exist."
225
199
  )
226
200
  print(Fore.RESET)
227
- else:
228
- instrument_set = True
229
201
  elif instrument == Instruments.REQUESTS:
230
202
  if not init_requests_instrumentor():
231
203
  print(
232
204
  Fore.RED + "Warning: Requests library does not exist."
233
205
  )
234
206
  print(Fore.RESET)
235
- else:
236
- instrument_set = True
237
207
  elif instrument == Instruments.URLLIB3:
238
208
  if not init_urllib3_instrumentor():
239
209
  print(Fore.RED + "Warning: urllib3 library does not exist.")
240
210
  print(Fore.RESET)
241
- else:
242
- instrument_set = True
243
211
  elif instrument == Instruments.PYMYSQL:
244
212
  if not init_pymysql_instrumentor():
245
213
  print(Fore.RED + "Warning: PyMySQL library does not exist.")
246
214
  print(Fore.RESET)
247
- else:
248
- instrument_set = True
249
215
  elif instrument == Instruments.BEDROCK:
250
216
  if not init_bedrock_instrumentor(should_enrich_metrics):
251
217
  print(Fore.RED + "Warning: Bedrock library does not exist.")
252
218
  print(Fore.RESET)
253
- else:
254
- instrument_set = True
255
219
  elif instrument == Instruments.REPLICATE:
256
220
  if not init_replicate_instrumentor():
257
221
  print(
258
222
  Fore.RED + "Warning: Replicate library does not exist."
259
223
  )
260
224
  print(Fore.RESET)
261
- else:
262
- instrument_set = True
263
225
  elif instrument == Instruments.VERTEXAI:
264
226
  if not init_vertexai_instrumentor():
265
227
  print(
266
228
  Fore.RED + "Warning: Vertex AI library does not exist."
267
229
  )
268
230
  print(Fore.RESET)
269
- else:
270
- instrument_set = True
271
231
  elif instrument == Instruments.WATSONX:
272
232
  if not init_watsonx_instrumentor():
273
233
  print(Fore.RED + "Warning: Watsonx library does not exist.")
274
234
  print(Fore.RESET)
275
- else:
276
- instrument_set = True
277
235
  elif instrument == Instruments.WEAVIATE:
278
236
  if not init_weaviate_instrumentor():
279
237
  print(
280
238
  Fore.RED + "Warning: Weaviate library does not exist."
281
239
  )
282
240
  print(Fore.RESET)
283
- else:
284
- instrument_set = True
285
241
  elif instrument == Instruments.ALEPHALPHA:
286
242
  if not init_alephalpha_instrumentor():
287
243
  print(
@@ -289,26 +245,18 @@ class TracerWrapper(object):
289
245
  + "Warning: Aleph Alpha library does not exist."
290
246
  )
291
247
  print(Fore.RESET)
292
- else:
293
- instrument_set = True
294
248
  elif instrument == Instruments.MARQO:
295
249
  if not init_marqo_instrumentor():
296
250
  print(Fore.RED + "Warning: marqo library does not exist.")
297
251
  print(Fore.RESET)
298
- else:
299
- instrument_set = True
300
252
  elif instrument == Instruments.LANCEDB:
301
253
  if not init_lancedb_instrumentor():
302
254
  print(Fore.RED + "Warning: LanceDB library does not exist.")
303
255
  print(Fore.RESET)
304
- else:
305
- instrument_set = True
306
256
  elif instrument == Instruments.REDIS:
307
257
  if not init_redis_instrumentor():
308
258
  print(Fore.RED + "Warning: redis library does not exist.")
309
259
  print(Fore.RESET)
310
- else:
311
- instrument_set = True
312
260
 
313
261
  else:
314
262
  print(
@@ -324,13 +272,6 @@ class TracerWrapper(object):
324
272
  )
325
273
  print(Fore.RESET)
326
274
 
327
- if not instrument_set:
328
- print(
329
- Fore.RED + "Warning: No valid instruments set. Remove 'instrument' "
330
- "argument to use all instruments, or set a valid instrument."
331
- )
332
- print(Fore.RESET)
333
-
334
275
  obj.__content_allow_list = ContentAllowList()
335
276
 
336
277
  # Force flushes for debug environments (e.g. local development)
@@ -1,178 +0,0 @@
1
- from typing import Any, Union
2
-
3
- from .types import EvaluationDatapoint
4
- from .utils import is_async
5
- from .laminar import Laminar as L
6
- import asyncio
7
-
8
- from abc import ABC, abstractmethod
9
-
10
- DEFAULT_BATCH_SIZE = 5
11
-
12
-
13
- class EvaluationDataset(ABC):
14
- @abstractmethod
15
- def __init__(self, *args, **kwargs):
16
- pass
17
-
18
- @abstractmethod
19
- def __len__(self) -> int:
20
- pass
21
-
22
- @abstractmethod
23
- def __getitem__(self, idx) -> EvaluationDatapoint:
24
- pass
25
-
26
- def slice(self, start: int, end: int):
27
- return [self[i] for i in range(max(start, 0), min(end, len(self)))]
28
-
29
-
30
- class Evaluation:
31
- def __init__(
32
- self,
33
- name,
34
- data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
35
- executor: Any,
36
- evaluators: list[Any],
37
- batch_size: int = DEFAULT_BATCH_SIZE,
38
- project_api_key: str = "",
39
- base_url: str = "https://api.lmnr.ai",
40
- ):
41
- """
42
- Initializes an instance of the Evaluations class.
43
- Parameters:
44
- name (str): The name of the evaluation.
45
- data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
46
- `data` is the input to the executor function,
47
- `target` is the input to the evaluator function.
48
- executor (Callable[..., Any]): The executor function.
49
- Takes the data point + any additional arguments
50
- and returns the output to evaluate.
51
- evaluators (List[Callable[..., Any]]): List of evaluator functions.
52
- Each evaluator function takes the output of the executor _and_
53
- the target data, and returns a score. The score can be a
54
- single number or a record of string keys and number values.
55
- If the score is a single number, it will be named after the
56
- evaluator function. If the function is anonymous, it will be
57
- named `evaluator_${index}`, where index is the index of the
58
- evaluator function in the list starting from 1.
59
- batch_size (int, optional): The batch size for evaluation.
60
- Defaults to DEFAULT_BATCH_SIZE.
61
- project_api_key (str, optional): The project API key.
62
- Defaults to an empty string.
63
- base_url (str, optional): The base URL for the LMNR API.
64
- Useful if self-hosted elsewhere.
65
- Defaults to "https://api.lmnr.ai".
66
- """
67
-
68
- self.name = name
69
- self.executor = executor
70
- self.evaluators = dict(
71
- zip(
72
- [
73
- (
74
- e.__name__
75
- if e.__name__ and e.__name__ != "<lambda>"
76
- else f"evaluator_{i+1}"
77
- )
78
- for i, e in enumerate(evaluators)
79
- ],
80
- evaluators,
81
- )
82
- )
83
- self.evaluator_names = list(self.evaluators.keys())
84
- if isinstance(data, list):
85
- self.data = [
86
- (
87
- EvaluationDatapoint.model_validate(point)
88
- if isinstance(point, dict)
89
- else point
90
- )
91
- for point in data
92
- ]
93
- else:
94
- self.data = data
95
- self.batch_size = batch_size
96
- L.initialize(project_api_key=project_api_key, base_url=base_url)
97
-
98
- def run(self):
99
- """Runs the evaluation.
100
-
101
- Creates a new evaluation if no evaluation with such name exists, or
102
- adds data to an existing one otherwise. Evaluates data points in
103
- batches of `self.batch_size`. The executor
104
- function is called on each data point to get the output,
105
- and then evaluate it by each evaluator function.
106
-
107
- Usage:
108
- ```python
109
- # in a synchronous context:
110
- e.run()
111
- # in an asynchronous context:
112
- await e.run()
113
- ```
114
-
115
- """
116
- loop = asyncio.get_event_loop()
117
- if loop.is_running():
118
- return loop.create_task(self._run())
119
- else:
120
- return loop.run_until_complete(self._run())
121
-
122
- async def _run(self):
123
- response = L.create_evaluation(self.name)
124
-
125
- # Process batches sequentially
126
- for i in range(0, len(self.data), self.batch_size):
127
- batch = (
128
- self.data[i : i + self.batch_size]
129
- if isinstance(self.data, list)
130
- else self.data.slice(i, i + self.batch_size)
131
- )
132
- try:
133
- await self._evaluate_batch(batch)
134
- except Exception as e:
135
- print(f"Error evaluating batch: {e}")
136
-
137
- try:
138
- L.update_evaluation_status(response.name, "Finished")
139
- print(f"Evaluation {response.id} complete")
140
- except Exception as e:
141
- print(f"Error updating evaluation status: {e}")
142
-
143
- async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
144
- batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
145
- results = await asyncio.gather(*batch_promises)
146
-
147
- return L.post_evaluation_results(self.name, results)
148
-
149
- async def _evaluate_datapoint(self, datapoint):
150
- output = (
151
- await self.executor(datapoint.data)
152
- if is_async(self.executor)
153
- else self.executor(datapoint.data)
154
- )
155
- target = datapoint.target
156
-
157
- # Iterate over evaluators
158
- scores = {}
159
- for evaluator_name in self.evaluator_names:
160
- evaluator = self.evaluators[evaluator_name]
161
- value = (
162
- await evaluator(output, target)
163
- if is_async(evaluator)
164
- else evaluator(output, target)
165
- )
166
-
167
- # If evaluator returns a single number, use evaluator name as key
168
- if isinstance(value, (int, float)):
169
- scores[evaluator_name] = value
170
- else:
171
- scores.update(value)
172
-
173
- return {
174
- "executorOutput": output,
175
- "data": datapoint.data,
176
- "target": target,
177
- "scores": scores,
178
- }
File without changes
File without changes
File without changes
File without changes
File without changes