lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. lmnr/__init__.py +6 -15
  2. lmnr/cli/__init__.py +270 -0
  3. lmnr/cli/datasets.py +371 -0
  4. lmnr/{cli.py → cli/evals.py} +20 -102
  5. lmnr/cli/rules.py +42 -0
  6. lmnr/opentelemetry_lib/__init__.py +9 -2
  7. lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
  8. lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
  9. lmnr/opentelemetry_lib/litellm/utils.py +82 -0
  10. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
  11. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
  12. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
  13. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
  14. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
  15. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
  16. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
  17. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
  18. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
  19. lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
  20. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
  21. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
  22. lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
  23. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
  24. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
  25. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
  26. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
  27. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
  28. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
  29. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
  30. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
  31. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
  32. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
  33. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
  34. lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
  35. lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
  36. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
  37. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
  38. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
  39. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
  40. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
  41. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
  42. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
  43. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  44. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
  45. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
  46. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
  47. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
  48. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
  49. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
  50. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
  51. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
  52. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
  53. lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
  54. lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
  55. lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
  56. lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
  57. lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
  58. lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
  59. lmnr/opentelemetry_lib/tracing/context.py +200 -0
  60. lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
  61. lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
  62. lmnr/opentelemetry_lib/tracing/processor.py +128 -30
  63. lmnr/opentelemetry_lib/tracing/span.py +398 -0
  64. lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
  65. lmnr/opentelemetry_lib/tracing/utils.py +62 -0
  66. lmnr/opentelemetry_lib/utils/package_check.py +9 -0
  67. lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
  68. lmnr/sdk/browser/background_send_events.py +158 -0
  69. lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
  70. lmnr/sdk/browser/browser_use_otel.py +12 -12
  71. lmnr/sdk/browser/bubus_otel.py +71 -0
  72. lmnr/sdk/browser/cdp_utils.py +518 -0
  73. lmnr/sdk/browser/inject_script.js +514 -0
  74. lmnr/sdk/browser/patchright_otel.py +18 -44
  75. lmnr/sdk/browser/playwright_otel.py +104 -187
  76. lmnr/sdk/browser/pw_utils.py +249 -210
  77. lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
  78. lmnr/sdk/browser/utils.py +1 -1
  79. lmnr/sdk/client/asynchronous/async_client.py +47 -15
  80. lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
  81. lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
  82. lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
  83. lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
  84. lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
  85. lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
  86. lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
  87. lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
  88. lmnr/sdk/client/synchronous/resources/evals.py +83 -17
  89. lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
  90. lmnr/sdk/client/synchronous/resources/tags.py +4 -10
  91. lmnr/sdk/client/synchronous/sync_client.py +47 -15
  92. lmnr/sdk/datasets/__init__.py +94 -0
  93. lmnr/sdk/datasets/file_utils.py +91 -0
  94. lmnr/sdk/decorators.py +103 -23
  95. lmnr/sdk/evaluations.py +122 -33
  96. lmnr/sdk/laminar.py +816 -333
  97. lmnr/sdk/log.py +7 -2
  98. lmnr/sdk/types.py +124 -143
  99. lmnr/sdk/utils.py +115 -2
  100. lmnr/version.py +1 -1
  101. {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
  102. lmnr-0.7.26.dist-info/RECORD +116 -0
  103. lmnr-0.7.26.dist-info/WHEEL +4 -0
  104. lmnr-0.7.26.dist-info/entry_points.txt +3 -0
  105. lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
  106. lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
  107. lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
  108. lmnr/sdk/client/synchronous/resources/agent.py +0 -323
  109. lmnr/sdk/datasets.py +0 -60
  110. lmnr-0.6.16.dist-info/LICENSE +0 -75
  111. lmnr-0.6.16.dist-info/RECORD +0 -61
  112. lmnr-0.6.16.dist-info/WHEEL +0 -4
  113. lmnr-0.6.16.dist-info/entry_points.txt +0 -3
@@ -8,11 +8,12 @@ from typing import TypeVar
8
8
  from types import TracebackType
9
9
 
10
10
  from lmnr.sdk.client.asynchronous.resources import (
11
- AsyncAgent,
12
11
  AsyncBrowserEvents,
13
12
  AsyncEvals,
14
13
  AsyncTags,
14
+ AsyncEvaluators,
15
15
  )
16
+ from lmnr.sdk.client.asynchronous.resources.datasets import AsyncDatasets
16
17
  from lmnr.sdk.utils import from_env
17
18
 
18
19
  _T = TypeVar("_T", bound="AsyncLaminarClient")
@@ -65,28 +66,42 @@ class AsyncLaminarClient:
65
66
  self.__client = httpx.AsyncClient(
66
67
  headers=self._headers(),
67
68
  timeout=timeout,
69
+ # Context: If the server responds with a 413, the connection becomes
70
+ # poisoned and freezes on subsequent requests, and there is no way
71
+ # to recover or recycle such connection.
72
+ # Setting max_keepalive_connections to 0 will resolve this, but is
73
+ # less efficient, as it will create a new connection
74
+ # (not client, so still better) for each request.
75
+ #
76
+ # Note: from my experiments with a simple python server, forcing the
77
+ # server to read/consume the request payload from the socket seems
78
+ # to resolve this, but I haven't figured out how to do that in our
79
+ # real actix-web backend server and whether it makes sense to do so.
80
+ #
81
+ # TODO: investigate if there are better ways to fix this rather than
82
+ # setting keepalive_expiry to 0. Other alternative: migrate to
83
+ # requests + aiohttp.
84
+ #
85
+ # limits=httpx.Limits(
86
+ # max_keepalive_connections=0,
87
+ # keepalive_expiry=0,
88
+ # ),
68
89
  )
69
90
 
70
91
  # Initialize resource objects
71
- self.__agent = AsyncAgent(
92
+ self.__evals = AsyncEvals(
72
93
  self.__client, self.__base_url, self.__project_api_key
73
94
  )
74
- self.__evals = AsyncEvals(
95
+ self.__evaluators = AsyncEvaluators(
75
96
  self.__client, self.__base_url, self.__project_api_key
76
97
  )
77
98
  self.__browser_events = AsyncBrowserEvents(
78
99
  self.__client, self.__base_url, self.__project_api_key
79
100
  )
80
101
  self.__tags = AsyncTags(self.__client, self.__base_url, self.__project_api_key)
81
-
82
- @property
83
- def agent(self) -> AsyncAgent:
84
- """Get the Agent resource.
85
-
86
- Returns:
87
- Agent: The Agent resource instance.
88
- """
89
- return self.__agent
102
+ self.__datasets = AsyncDatasets(
103
+ self.__client, self.__base_url, self.__project_api_key
104
+ )
90
105
 
91
106
  @property
92
107
  def evals(self) -> AsyncEvals:
@@ -102,7 +117,7 @@ class AsyncLaminarClient:
102
117
  """Get the BrowserEvents resource.
103
118
 
104
119
  Returns:
105
- BrowserEvents: The BrowserEvents resource instance.
120
+ AsyncBrowserEvents: The BrowserEvents resource instance.
106
121
  """
107
122
  return self.__browser_events
108
123
 
@@ -115,6 +130,25 @@ class AsyncLaminarClient:
115
130
  """
116
131
  return self.__tags
117
132
 
133
+ @property
134
+ def evaluators(self) -> AsyncEvaluators:
135
+ """Get the Evaluators resource.
136
+
137
+ Returns:
138
+ AsyncEvaluators: The Evaluators resource instance.
139
+ """
140
+ return self.__evaluators
141
+
142
+ @property
143
+ def datasets(self) -> AsyncDatasets:
144
+ """Get the Datasets resource.
145
+
146
+ Returns:
147
+ AsyncDatasets: The Datasets resource instance.
148
+ """
149
+ return self.__datasets
150
+
151
+ @property
118
152
  def is_closed(self) -> bool:
119
153
  return self.__client.is_closed
120
154
 
@@ -144,5 +178,3 @@ class AsyncLaminarClient:
144
178
  "Content-Type": "application/json",
145
179
  "Accept": "application/json",
146
180
  }
147
-
148
-
@@ -1,11 +1,6 @@
1
- from lmnr.sdk.client.asynchronous.resources.agent import AsyncAgent
2
1
  from lmnr.sdk.client.asynchronous.resources.browser_events import AsyncBrowserEvents
3
2
  from lmnr.sdk.client.asynchronous.resources.evals import AsyncEvals
4
3
  from lmnr.sdk.client.asynchronous.resources.tags import AsyncTags
4
+ from lmnr.sdk.client.asynchronous.resources.evaluators import AsyncEvaluators
5
5
 
6
- __all__ = [
7
- "AsyncAgent",
8
- "AsyncEvals",
9
- "AsyncBrowserEvents",
10
- "AsyncTags",
11
- ]
6
+ __all__ = ["AsyncEvals", "AsyncBrowserEvents", "AsyncTags", "AsyncEvaluators"]
@@ -25,6 +25,7 @@ class AsyncBrowserEvents(BaseAsyncResource):
25
25
  "source": f"python@{PYTHON_VERSION}",
26
26
  "sdkVersion": __version__,
27
27
  }
28
+
28
29
  compressed_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
29
30
  response = await self._client.post(
30
31
  url,
@@ -0,0 +1,131 @@
1
+ """Datasets resource for interacting with Laminar datasets API."""
2
+
3
+ import math
4
+ import uuid
5
+
6
+ from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
7
+ from lmnr.sdk.log import get_default_logger
8
+ from lmnr.sdk.types import (
9
+ Datapoint,
10
+ Dataset,
11
+ GetDatapointsResponse,
12
+ PushDatapointsResponse,
13
+ )
14
+ from lmnr.sdk.utils import serialize
15
+
16
+ logger = get_default_logger(__name__)
17
+
18
+ DEFAULT_DATASET_PULL_LIMIT = 100
19
+ DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
20
+
21
+
22
+ class AsyncDatasets(BaseAsyncResource):
23
+ """Resource for interacting with Laminar datasets API."""
24
+
25
+ async def list_datasets(self) -> list[Dataset]:
26
+ """List all datasets."""
27
+ response = await self._client.get(
28
+ f"{self._base_url}/v1/datasets",
29
+ headers=self._headers(),
30
+ )
31
+ if response.status_code != 200:
32
+ raise ValueError(
33
+ f"Error listing datasets: [{response.status_code}] {response.text}"
34
+ )
35
+ return [Dataset.model_validate(dataset) for dataset in response.json()]
36
+
37
+ async def get_dataset_by_name(self, name: str) -> list[Dataset]:
38
+ """Get a dataset by name."""
39
+ response = await self._client.get(
40
+ f"{self._base_url}/v1/datasets",
41
+ params={"name": name},
42
+ headers=self._headers(),
43
+ )
44
+ if response.status_code != 200:
45
+ raise ValueError(
46
+ f"Error getting dataset: [{response.status_code}] {response.text}"
47
+ )
48
+ return [Dataset.model_validate(dataset) for dataset in response.json()]
49
+
50
+ async def push(
51
+ self,
52
+ points: list[Datapoint],
53
+ name: str | None = None,
54
+ id: uuid.UUID | None = None,
55
+ batch_size: int = DEFAULT_DATASET_PUSH_BATCH_SIZE,
56
+ create_dataset: bool = False,
57
+ ) -> PushDatapointsResponse | None:
58
+ """Push data to a dataset."""
59
+
60
+ if name is None and id is None:
61
+ raise ValueError("Either name or id must be provided")
62
+
63
+ if name is not None and id is not None:
64
+ raise ValueError("Only one of name or id must be provided")
65
+
66
+ if create_dataset and name is None:
67
+ raise ValueError("Name must be provided when creating a new dataset")
68
+
69
+ identifier = {"name": name} if name is not None else {"datasetId": id}
70
+
71
+ batch_num = 0
72
+ total_batches = math.ceil(len(points) / batch_size)
73
+ response = None
74
+ for i in range(0, len(points), batch_size):
75
+ batch_num += 1
76
+ logger.debug(f"Pushing batch {batch_num} of {total_batches}")
77
+ batch = points[i : i + batch_size]
78
+ response = await self._client.post(
79
+ f"{self._base_url}/v1/datasets/datapoints",
80
+ json={
81
+ **identifier,
82
+ "datapoints": [serialize(point) for point in batch],
83
+ "createDataset": create_dataset,
84
+ },
85
+ headers=self._headers(),
86
+ )
87
+
88
+ # 201 when creating a new dataset
89
+ if response.status_code not in [200, 201]:
90
+ raise ValueError(
91
+ f"Error pushing data to dataset: [{response.status_code}] {response.text}"
92
+ )
93
+
94
+ response = PushDatapointsResponse.model_validate(response.json())
95
+ # Currently, the response only contains the dataset ID,
96
+ # so it's safe to return the last response only.
97
+ return response
98
+
99
+ async def pull(
100
+ self,
101
+ name: str | None = None,
102
+ id: uuid.UUID | None = None,
103
+ # TODO: move const to one file, import in CLI
104
+ limit: int = DEFAULT_DATASET_PULL_LIMIT,
105
+ offset: int = 0,
106
+ ) -> GetDatapointsResponse:
107
+ """Pull data from a dataset."""
108
+
109
+ if name is None and id is None:
110
+ raise ValueError("Either name or id must be provided")
111
+
112
+ if name is not None and id is not None:
113
+ raise ValueError("Only one of name or id must be provided")
114
+
115
+ identifier = {"name": name} if name is not None else {"datasetId": id}
116
+
117
+ params = {
118
+ **identifier,
119
+ "offset": offset,
120
+ "limit": limit,
121
+ }
122
+ response = await self._client.get(
123
+ f"{self._base_url}/v1/datasets/datapoints",
124
+ params=params,
125
+ headers=self._headers(),
126
+ )
127
+ if response.status_code != 200:
128
+ raise ValueError(
129
+ f"Error pulling data from dataset: [{response.status_code}] {response.text}"
130
+ )
131
+ return GetDatapointsResponse.model_validate(response.json())
@@ -1,21 +1,32 @@
1
1
  """Evals resource for interacting with Laminar evaluations API."""
2
2
 
3
- from typing import Any
4
3
  import uuid
4
+ import warnings
5
+
6
+ from typing import Any
5
7
 
6
8
  from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
9
+ from lmnr.sdk.log import get_default_logger
7
10
  from lmnr.sdk.types import (
11
+ GetDatapointsResponse,
8
12
  InitEvaluationResponse,
9
13
  EvaluationResultDatapoint,
10
14
  PartialEvaluationDatapoint,
11
15
  )
16
+ from lmnr.sdk.utils import serialize
17
+
18
+ INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH = 16_000_000 # 16MB
19
+ logger = get_default_logger(__name__)
12
20
 
13
21
 
14
22
  class AsyncEvals(BaseAsyncResource):
15
23
  """Resource for interacting with Laminar evaluations API."""
16
24
 
17
25
  async def init(
18
- self, name: str | None = None, group_name: str | None = None, metadata: dict[str, Any] | None = None
26
+ self,
27
+ name: str | None = None,
28
+ group_name: str | None = None,
29
+ metadata: dict[str, Any] | None = None,
19
30
  ) -> InitEvaluationResponse:
20
31
  """Initialize a new evaluation.
21
32
 
@@ -51,7 +62,7 @@ class AsyncEvals(BaseAsyncResource):
51
62
  ) -> uuid.UUID:
52
63
  """
53
64
  Create a new evaluation and return its ID.
54
-
65
+
55
66
  Parameters:
56
67
  name (str | None, optional): Optional name of the evaluation.
57
68
  group_name (str | None, optional): An identifier to group evaluations.
@@ -60,7 +71,9 @@ class AsyncEvals(BaseAsyncResource):
60
71
  Returns:
61
72
  uuid.UUID: The evaluation ID.
62
73
  """
63
- evaluation = await self.init(name=name, group_name=group_name, metadata=metadata)
74
+ evaluation = await self.init(
75
+ name=name, group_name=group_name, metadata=metadata
76
+ )
64
77
  return evaluation.id
65
78
 
66
79
  async def create_datapoint(
@@ -74,7 +87,7 @@ class AsyncEvals(BaseAsyncResource):
74
87
  ) -> uuid.UUID:
75
88
  """
76
89
  Create a datapoint for an evaluation.
77
-
90
+
78
91
  Parameters:
79
92
  eval_id (uuid.UUID): The evaluation ID.
80
93
  data: The input data for the executor.
@@ -82,13 +95,13 @@ class AsyncEvals(BaseAsyncResource):
82
95
  metadata (dict[str, Any] | None, optional): Optional metadata.
83
96
  index (int | None, optional): Optional index of the datapoint.
84
97
  trace_id (uuid.UUID | None, optional): Optional trace ID.
85
-
98
+
86
99
  Returns:
87
100
  uuid.UUID: The datapoint ID.
88
101
  """
89
-
102
+
90
103
  datapoint_id = uuid.uuid4()
91
-
104
+
92
105
  # Create a minimal datapoint first
93
106
  partial_datapoint = PartialEvaluationDatapoint(
94
107
  id=datapoint_id,
@@ -99,7 +112,7 @@ class AsyncEvals(BaseAsyncResource):
99
112
  executor_span_id=uuid.uuid4(), # Will be updated when executor runs
100
113
  metadata=metadata,
101
114
  )
102
-
115
+
103
116
  await self.save_datapoints(eval_id, [partial_datapoint])
104
117
  return datapoint_id
105
118
 
@@ -119,18 +132,67 @@ class AsyncEvals(BaseAsyncResource):
119
132
  Raises:
120
133
  ValueError: If there's an error saving the datapoints.
121
134
  """
135
+ length = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
136
+ points = [datapoint.to_dict(max_data_length=length) for datapoint in datapoints]
122
137
  response = await self._client.post(
123
138
  self._base_url + f"/v1/evals/{eval_id}/datapoints",
124
139
  json={
125
- "points": [datapoint.to_dict() for datapoint in datapoints],
140
+ "points": points,
126
141
  "groupName": group_name,
127
142
  },
128
143
  headers=self._headers(),
129
144
  )
145
+ if response.status_code == 413:
146
+ await self._retry_save_datapoints(eval_id, datapoints, group_name)
147
+ return
148
+
130
149
  if response.status_code != 200:
131
- raise ValueError(f"Error saving evaluation datapoints: {response.text}")
132
-
133
-
150
+ raise ValueError(
151
+ f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
152
+ )
153
+
154
+ async def get_datapoints(
155
+ self,
156
+ dataset_name: str,
157
+ offset: int,
158
+ limit: int,
159
+ ) -> GetDatapointsResponse:
160
+ """Get datapoints from a dataset.
161
+
162
+ Args:
163
+ dataset_name (str): The name of the dataset.
164
+ offset (int): The offset to start from.
165
+ limit (int): The maximum number of datapoints to return.
166
+
167
+ Returns:
168
+ GetDatapointsResponse: The response containing the datapoints.
169
+
170
+ Raises:
171
+ ValueError: If there's an error fetching the datapoints.
172
+ """
173
+ warnings.warn(
174
+ "Use client.datasets.pull instead",
175
+ DeprecationWarning,
176
+ )
177
+
178
+ params = {"name": dataset_name, "offset": offset, "limit": limit}
179
+ response = await self._client.get(
180
+ self._base_url + "/v1/datasets/datapoints",
181
+ params=params,
182
+ headers=self._headers(),
183
+ )
184
+ if response.status_code != 200:
185
+ try:
186
+ resp_json = response.json()
187
+ raise ValueError(
188
+ f"Error fetching datapoints: [{response.status_code}] {resp_json}"
189
+ )
190
+ except Exception:
191
+ raise ValueError(
192
+ f"Error fetching datapoints: [{response.status_code}] {response.text}"
193
+ )
194
+ return GetDatapointsResponse.model_validate(response.json())
195
+
134
196
  async def update_datapoint(
135
197
  self,
136
198
  eval_id: uuid.UUID,
@@ -146,17 +208,59 @@ class AsyncEvals(BaseAsyncResource):
146
208
  executor_output (Any): The executor output.
147
209
  scores (dict[str, float | int] | None, optional): The scores. Defaults to None.
148
210
  """
149
-
211
+
150
212
  response = await self._client.post(
151
213
  self._base_url + f"/v1/evals/{eval_id}/datapoints/{datapoint_id}",
152
214
  json={
153
- "executorOutput": executor_output,
215
+ "executorOutput": (
216
+ str(serialize(executor_output))[
217
+ :INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
218
+ ]
219
+ if executor_output is not None
220
+ else None
221
+ ),
154
222
  "scores": scores,
155
223
  },
156
224
  headers=self._headers(),
157
225
  )
158
226
 
159
227
  if response.status_code != 200:
160
- raise ValueError(f"Error updating evaluation datapoint: {response.text}")
161
-
162
-
228
+ raise ValueError(
229
+ f"Error updating evaluation datapoint: [{response.status_code}] {response.text}"
230
+ )
231
+
232
+ async def _retry_save_datapoints(
233
+ self,
234
+ eval_id: uuid.UUID,
235
+ datapoints: list[EvaluationResultDatapoint | PartialEvaluationDatapoint],
236
+ group_name: str | None = None,
237
+ initial_length: int = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH,
238
+ max_retries: int = 20,
239
+ ):
240
+ retry = 0
241
+ length = initial_length
242
+ while retry < max_retries:
243
+ retry += 1
244
+ length = length // 2
245
+ logger.debug(
246
+ f"Retrying save datapoints: {retry} of {max_retries}, length: {length}"
247
+ )
248
+ if length == 0:
249
+ raise ValueError("Error saving evaluation datapoints")
250
+ points = [
251
+ datapoint.to_dict(max_data_length=length) for datapoint in datapoints
252
+ ]
253
+ response = await self._client.post(
254
+ self._base_url + f"/v1/evals/{eval_id}/datapoints",
255
+ json={
256
+ "points": points,
257
+ "groupName": group_name,
258
+ },
259
+ headers=self._headers(),
260
+ )
261
+ if response.status_code != 413:
262
+ break
263
+ if response.status_code != 200:
264
+ raise ValueError(
265
+ f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
266
+ )
@@ -0,0 +1,85 @@
1
+ """Evaluators resource for creating evaluator scores."""
2
+
3
+ import uuid
4
+ from typing import Any
5
+
6
+ from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
7
+ from lmnr.sdk.utils import format_id
8
+
9
+
10
+ class AsyncEvaluators(BaseAsyncResource):
11
+ """Resource for creating evaluator scores."""
12
+
13
+ async def score(
14
+ self,
15
+ *,
16
+ name: str,
17
+ trace_id: str | int | uuid.UUID | None = None,
18
+ span_id: str | int | uuid.UUID | None = None,
19
+ metadata: dict[str, Any] | None = None,
20
+ score: float,
21
+ ) -> None:
22
+ """Create a score for a span.
23
+
24
+ Args:
25
+ name (str): Name of the score
26
+ trace_id (str | int | uuid.UUID | None, optional): The trace ID to score (will be attached to root span)
27
+ span_id (str | int | uuid.UUID | None, optional): The span ID to score
28
+ metadata (dict[str, Any] | None, optional): Additional metadata. Defaults to None.
29
+ score (float): The score value (float)
30
+
31
+ Raises:
32
+ ValueError: If there's an error creating the score.
33
+
34
+ Example:
35
+ Score by trace ID (will attach to root span):
36
+
37
+ >>> await laminar_client.evaluators.score(
38
+ ... name="quality",
39
+ ... trace_id="trace-id-here",
40
+ ... score=0.95,
41
+ ... metadata={"model": "gpt-4"}
42
+ ... )
43
+
44
+ Score by span ID:
45
+
46
+ >>> await laminar_client.evaluators.score(
47
+ ... name="relevance",
48
+ ... span_id="span-id-here",
49
+ ... score=0.87
50
+ ... )
51
+ """
52
+ if trace_id is not None and span_id is not None:
53
+ raise ValueError("Cannot provide both trace_id and span_id. Please provide only one.")
54
+ if trace_id is None and span_id is None:
55
+ raise ValueError("Either 'trace_id' or 'span_id' must be provided.")
56
+
57
+ if trace_id is not None:
58
+ formatted_trace_id = format_id(trace_id)
59
+ payload = {
60
+ "name": name,
61
+ "traceId": formatted_trace_id,
62
+ "metadata": metadata,
63
+ "score": score,
64
+ "source": "Code",
65
+ }
66
+ else:
67
+ formatted_span_id = format_id(span_id)
68
+ payload = {
69
+ "name": name,
70
+ "spanId": formatted_span_id,
71
+ "metadata": metadata,
72
+ "score": score,
73
+ "source": "Code",
74
+ }
75
+
76
+ response = await self._client.post(
77
+ self._base_url + "/v1/evaluators/score",
78
+ json=payload,
79
+ headers=self._headers(),
80
+ )
81
+
82
+ if response.status_code != 200:
83
+ if response.status_code == 401:
84
+ raise ValueError("Unauthorized. Please check your project API key.")
85
+ raise ValueError(f"Error creating evaluator score: {response.text}")
@@ -5,6 +5,7 @@ import uuid
5
5
 
6
6
  from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
7
7
  from lmnr.sdk.log import get_default_logger
8
+ from lmnr.sdk.utils import format_id
8
9
 
9
10
  logger = get_default_logger(__name__)
10
11
 
@@ -54,18 +55,11 @@ class AsyncTags(BaseAsyncResource):
54
55
  ```
55
56
  """
56
57
  trace_tags = tags if isinstance(tags, list) else [tags]
57
- if isinstance(trace_id, uuid.UUID):
58
- trace_id = str(trace_id)
59
- elif isinstance(trace_id, int):
60
- trace_id = str(uuid.UUID(int=trace_id))
61
- elif isinstance(trace_id, str):
62
- uuid.UUID(trace_id) # Will raise ValueError if invalid
63
- else:
64
- raise ValueError(f"Invalid trace id: {trace_id}")
58
+ formatted_trace_id = format_id(trace_id)
65
59
 
66
60
  url = self._base_url + "/v1/tag"
67
61
  payload = {
68
- "traceId": trace_id,
62
+ "traceId": formatted_trace_id,
69
63
  "names": trace_tags,
70
64
  }
71
65
  response = await self._client.post(
@@ -78,7 +72,7 @@ class AsyncTags(BaseAsyncResource):
78
72
 
79
73
  if response.status_code == 404:
80
74
  logger.warning(
81
- f"Trace {trace_id} not found. The trace may have not been ended yet."
75
+ f"Trace {formatted_trace_id} not found. The trace may have not been ended yet."
82
76
  )
83
77
  return []
84
78
 
@@ -1,6 +1,6 @@
1
- from lmnr.sdk.client.synchronous.resources.agent import Agent
2
1
  from lmnr.sdk.client.synchronous.resources.browser_events import BrowserEvents
3
2
  from lmnr.sdk.client.synchronous.resources.evals import Evals
4
3
  from lmnr.sdk.client.synchronous.resources.tags import Tags
4
+ from lmnr.sdk.client.synchronous.resources.evaluators import Evaluators
5
5
 
6
- __all__ = ["Agent", "Evals", "BrowserEvents", "Tags"]
6
+ __all__ = ["Evals", "Evaluators", "BrowserEvents", "Tags"]