lmnr 0.4.27__tar.gz → 0.4.29b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.27 → lmnr-0.4.29b0}/PKG-INFO +1 -1
- {lmnr-0.4.27 → lmnr-0.4.29b0}/pyproject.toml +2 -2
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/__init__.py +7 -1
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/datasets.py +1 -1
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/evaluations.py +43 -12
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/types.py +12 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/attributes.py +1 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/tracing.py +11 -6
- {lmnr-0.4.27 → lmnr-0.4.29b0}/LICENSE +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/README.md +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/laminar.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/sdk/utils.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.27 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.29b0"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.29b0"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -1,7 +1,13 @@
|
|
1
1
|
from .sdk.datasets import EvaluationDataset, LaminarDataset
|
2
2
|
from .sdk.evaluations import evaluate
|
3
3
|
from .sdk.laminar import Laminar
|
4
|
-
from .sdk.types import
|
4
|
+
from .sdk.types import (
|
5
|
+
ChatMessage,
|
6
|
+
HumanEvaluator,
|
7
|
+
NodeInput,
|
8
|
+
PipelineRunError,
|
9
|
+
PipelineRunResponse,
|
10
|
+
)
|
5
11
|
from .sdk.decorators import observe
|
6
12
|
from .traceloop_sdk import Instruments
|
7
13
|
from .traceloop_sdk.tracing.attributes import Attributes
|
@@ -34,7 +34,7 @@ class LaminarDataset(EvaluationDataset):
|
|
34
34
|
self._fetched_items = []
|
35
35
|
self._offset = 0
|
36
36
|
self._fetch_size = fetch_size
|
37
|
-
self._logger = get_default_logger(self.__class__.__name__
|
37
|
+
self._logger = get_default_logger(self.__class__.__name__)
|
38
38
|
|
39
39
|
def _fetch_batch(self):
|
40
40
|
self._logger.debug(
|
@@ -18,6 +18,7 @@ from .types import (
|
|
18
18
|
EvaluationResultDatapoint,
|
19
19
|
EvaluatorFunction,
|
20
20
|
ExecutorFunction,
|
21
|
+
HumanEvaluator,
|
21
22
|
Numeric,
|
22
23
|
NumericTypes,
|
23
24
|
SpanType,
|
@@ -99,6 +100,7 @@ class Evaluation:
|
|
99
100
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
100
101
|
executor: Any,
|
101
102
|
evaluators: dict[str, EvaluatorFunction],
|
103
|
+
human_evaluators: dict[str, HumanEvaluator] = {},
|
102
104
|
name: Optional[str] = None,
|
103
105
|
group_id: Optional[str] = None,
|
104
106
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -126,6 +128,11 @@ class Evaluation:
|
|
126
128
|
If the score is a single number, it will be named after the\
|
127
129
|
evaluator function. Evaluator function names must contain only\
|
128
130
|
letters, digits, hyphens, underscores, or spaces.
|
131
|
+
human_evaluators (dict[str, HumanEvaluator], optional):\
|
132
|
+
[Beta] Dictionary from human evaluator names to instances of\
|
133
|
+
HumanEvaluator. For now, human evaluator only holds the queue\
|
134
|
+
name.
|
135
|
+
Defaults to an empty dictionary.
|
129
136
|
name (Optional[str], optional): Optional name of the evaluation.\
|
130
137
|
Used to identify the evaluation in the group.\
|
131
138
|
If not provided, a random name will be generated.
|
@@ -159,14 +166,27 @@ class Evaluation:
|
|
159
166
|
if not evaluators:
|
160
167
|
raise ValueError("No evaluators provided")
|
161
168
|
|
162
|
-
|
169
|
+
evaluator_name_regex = re.compile(r"^[\w\s-]+$")
|
163
170
|
for evaluator_name in evaluators:
|
164
|
-
if not
|
171
|
+
if not evaluator_name_regex.match(evaluator_name):
|
165
172
|
raise ValueError(
|
166
173
|
f'Invalid evaluator key: "{evaluator_name}". '
|
167
174
|
"Keys must only contain letters, digits, hyphens,"
|
168
175
|
"underscores, or spaces."
|
169
176
|
)
|
177
|
+
for evaluator_name in human_evaluators or {}:
|
178
|
+
if not evaluator_name_regex.match(evaluator_name):
|
179
|
+
raise ValueError(
|
180
|
+
f'Invalid human evaluator key: "{evaluator_name}". '
|
181
|
+
"Keys must only contain letters, digits, hyphens,"
|
182
|
+
"underscores, or spaces."
|
183
|
+
)
|
184
|
+
|
185
|
+
if intersection := set(evaluators.keys()) & set(human_evaluators.keys()):
|
186
|
+
raise ValueError(
|
187
|
+
"Evaluator and human evaluator names must not overlap. "
|
188
|
+
f"Repeated keys: {intersection}"
|
189
|
+
)
|
170
190
|
|
171
191
|
self.is_finished = False
|
172
192
|
self.reporter = EvaluationReporter()
|
@@ -183,6 +203,7 @@ class Evaluation:
|
|
183
203
|
self.name = name
|
184
204
|
self.batch_size = batch_size
|
185
205
|
self._logger = get_default_logger(self.__class__.__name__)
|
206
|
+
self.human_evaluators = human_evaluators
|
186
207
|
L.initialize(
|
187
208
|
project_api_key=project_api_key,
|
188
209
|
base_url=base_url,
|
@@ -202,9 +223,7 @@ class Evaluation:
|
|
202
223
|
return loop.run_until_complete(self._run())
|
203
224
|
|
204
225
|
async def _run(self) -> None:
|
205
|
-
self.reporter.start(
|
206
|
-
len(self.data),
|
207
|
-
)
|
226
|
+
self.reporter.start(len(self.data))
|
208
227
|
|
209
228
|
try:
|
210
229
|
result_datapoints = await self._evaluate_in_batches()
|
@@ -212,13 +231,19 @@ class Evaluation:
|
|
212
231
|
self.reporter.stopWithError(e)
|
213
232
|
self.is_finished = True
|
214
233
|
return
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
234
|
+
|
235
|
+
# For now add all human evaluators to all result datapoints
|
236
|
+
# In the future, we will add ways to specify which human evaluators
|
237
|
+
# to add to which result datapoints, e.g. sample some randomly
|
238
|
+
for result_datapoint in result_datapoints:
|
239
|
+
result_datapoint.human_evaluators = self.human_evaluators or {}
|
240
|
+
|
241
|
+
evaluation = L.create_evaluation(
|
242
|
+
data=result_datapoints, group_id=self.group_id, name=self.name
|
243
|
+
)
|
244
|
+
average_scores = get_average_scores(result_datapoints)
|
245
|
+
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
246
|
+
self.is_finished = True
|
222
247
|
|
223
248
|
async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
224
249
|
result_datapoints = []
|
@@ -292,6 +317,7 @@ def evaluate(
|
|
292
317
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
293
318
|
executor: ExecutorFunction,
|
294
319
|
evaluators: dict[str, EvaluatorFunction],
|
320
|
+
human_evaluators: dict[str, HumanEvaluator] = {},
|
295
321
|
name: Optional[str] = None,
|
296
322
|
group_id: Optional[str] = None,
|
297
323
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -326,6 +352,10 @@ def evaluate(
|
|
326
352
|
If the score is a single number, it will be named after the\
|
327
353
|
evaluator function. Evaluator function names must contain only\
|
328
354
|
letters, digits, hyphens, underscores, or spaces.
|
355
|
+
human_evaluators (dict[str, HumanEvaluator], optional):\
|
356
|
+
[Beta] Dictionary from human evaluator names to instances of\
|
357
|
+
HumanEvaluator. For now, human evaluator only holds the queue name.
|
358
|
+
Defaults to an empty dictionary.
|
329
359
|
name (Optional[str], optional): Optional name of the evaluation.\
|
330
360
|
Used to identify the evaluation in the group.\
|
331
361
|
If not provided, a random name will be generated.
|
@@ -359,6 +389,7 @@ def evaluate(
|
|
359
389
|
executor=executor,
|
360
390
|
evaluators=evaluators,
|
361
391
|
group_id=group_id,
|
392
|
+
human_evaluators=human_evaluators,
|
362
393
|
name=name,
|
363
394
|
batch_size=batch_size,
|
364
395
|
project_api_key=project_api_key,
|
@@ -110,6 +110,13 @@ EvaluatorFunction = Callable[
|
|
110
110
|
]
|
111
111
|
|
112
112
|
|
113
|
+
class HumanEvaluator(pydantic.BaseModel):
|
114
|
+
queueName: str
|
115
|
+
|
116
|
+
def __init__(self, queue_name: str):
|
117
|
+
super().__init__(queueName=queue_name)
|
118
|
+
|
119
|
+
|
113
120
|
class CreateEvaluationResponse(pydantic.BaseModel):
|
114
121
|
id: uuid.UUID
|
115
122
|
createdAt: datetime.datetime
|
@@ -123,6 +130,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
123
130
|
target: EvaluationDatapointTarget
|
124
131
|
executor_output: ExecutorFunctionReturnType
|
125
132
|
scores: dict[str, Numeric]
|
133
|
+
human_evaluators: dict[str, HumanEvaluator] = pydantic.Field(default_factory=dict)
|
126
134
|
trace_id: uuid.UUID
|
127
135
|
|
128
136
|
# uuid is not serializable by default, so we need to convert it to a string
|
@@ -139,6 +147,10 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
139
147
|
"executorOutput": serialize(self.executor_output),
|
140
148
|
"scores": self.scores,
|
141
149
|
"traceId": str(self.trace_id),
|
150
|
+
"humanEvaluators": {
|
151
|
+
k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
|
152
|
+
for k, v in self.human_evaluators.items()
|
153
|
+
},
|
142
154
|
}
|
143
155
|
|
144
156
|
|
@@ -5,6 +5,7 @@ SPAN_INPUT = "lmnr.span.input"
|
|
5
5
|
SPAN_OUTPUT = "lmnr.span.output"
|
6
6
|
SPAN_TYPE = "lmnr.span.type"
|
7
7
|
SPAN_PATH = "lmnr.span.path"
|
8
|
+
SPAN_INSTRUMENTATION_SOURCE = "lmnr.span.instrumentation_source"
|
8
9
|
|
9
10
|
ASSOCIATION_PROPERTIES = "lmnr.association.properties"
|
10
11
|
SESSION_ID = "session_id"
|
@@ -25,7 +25,11 @@ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
|
|
25
25
|
|
26
26
|
# from lmnr.traceloop_sdk import Telemetry
|
27
27
|
from lmnr.traceloop_sdk.instruments import Instruments
|
28
|
-
from lmnr.traceloop_sdk.tracing.attributes import
|
28
|
+
from lmnr.traceloop_sdk.tracing.attributes import (
|
29
|
+
ASSOCIATION_PROPERTIES,
|
30
|
+
SPAN_INSTRUMENTATION_SOURCE,
|
31
|
+
SPAN_PATH,
|
32
|
+
)
|
29
33
|
from lmnr.traceloop_sdk.tracing.content_allow_list import ContentAllowList
|
30
34
|
from lmnr.traceloop_sdk.utils import is_notebook
|
31
35
|
from lmnr.traceloop_sdk.utils.package_check import is_package_installed
|
@@ -235,6 +239,8 @@ class TracerWrapper(object):
|
|
235
239
|
# the final part of the name to the span on the backend.
|
236
240
|
span.set_attribute(SPAN_PATH, span_path)
|
237
241
|
|
242
|
+
span.set_attribute(SPAN_INSTRUMENTATION_SOURCE, "python")
|
243
|
+
|
238
244
|
association_properties = get_value("association_properties")
|
239
245
|
if association_properties is not None:
|
240
246
|
_set_association_properties_attributes(span, association_properties)
|
@@ -266,10 +272,7 @@ class TracerWrapper(object):
|
|
266
272
|
if hasattr(cls, "instance"):
|
267
273
|
return True
|
268
274
|
|
269
|
-
|
270
|
-
return False
|
271
|
-
|
272
|
-
print("Warning: Laminar not initialized, make sure to initialize")
|
275
|
+
logging.warning("Warning: Laminar not initialized, make sure to initialize")
|
273
276
|
return False
|
274
277
|
|
275
278
|
def flush(self):
|
@@ -557,7 +560,9 @@ def init_langchain_instrumentor():
|
|
557
560
|
instrumentor.instrument()
|
558
561
|
return True
|
559
562
|
except Exception as e:
|
560
|
-
|
563
|
+
# FIXME: silence this error temporarily, it appears to not be critical
|
564
|
+
if str(e) != "No module named 'langchain_community'":
|
565
|
+
logging.error(f"Error initializing LangChain instrumentor: {e}")
|
561
566
|
# Telemetry().log_exception(e)
|
562
567
|
return False
|
563
568
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|