calibrate-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. calibrate/__init__.py +109 -0
  2. calibrate/_default_clients.py +32 -0
  3. calibrate/agent_tests/__init__.py +4 -0
  4. calibrate/agent_tests/client.py +350 -0
  5. calibrate/agent_tests/raw_client.py +455 -0
  6. calibrate/agents/__init__.py +4 -0
  7. calibrate/agents/client.py +206 -0
  8. calibrate/agents/raw_client.py +273 -0
  9. calibrate/client.py +255 -0
  10. calibrate/core/__init__.py +127 -0
  11. calibrate/core/api_error.py +23 -0
  12. calibrate/core/client_wrapper.py +149 -0
  13. calibrate/core/datetime_utils.py +70 -0
  14. calibrate/core/file.py +67 -0
  15. calibrate/core/force_multipart.py +18 -0
  16. calibrate/core/http_client.py +843 -0
  17. calibrate/core/http_response.py +59 -0
  18. calibrate/core/http_sse/__init__.py +42 -0
  19. calibrate/core/http_sse/_api.py +180 -0
  20. calibrate/core/http_sse/_decoders.py +61 -0
  21. calibrate/core/http_sse/_exceptions.py +7 -0
  22. calibrate/core/http_sse/_models.py +17 -0
  23. calibrate/core/jsonable_encoder.py +120 -0
  24. calibrate/core/logging.py +107 -0
  25. calibrate/core/parse_error.py +36 -0
  26. calibrate/core/pydantic_utilities.py +508 -0
  27. calibrate/core/query_encoder.py +58 -0
  28. calibrate/core/remove_none_from_dict.py +11 -0
  29. calibrate/core/request_options.py +37 -0
  30. calibrate/core/serialization.py +347 -0
  31. calibrate/environment.py +7 -0
  32. calibrate/errors/__init__.py +34 -0
  33. calibrate/errors/unprocessable_entity_error.py +11 -0
  34. calibrate/py.typed +0 -0
  35. calibrate/types/__init__.py +83 -0
  36. calibrate/types/batch_run_request.py +19 -0
  37. calibrate/types/batch_test_run.py +22 -0
  38. calibrate/types/batch_test_run_response.py +22 -0
  39. calibrate/types/batch_test_skip.py +21 -0
  40. calibrate/types/http_validation_error.py +20 -0
  41. calibrate/types/judge_result.py +51 -0
  42. calibrate/types/resolve_agent_names_response.py +20 -0
  43. calibrate/types/routers_agent_tests_agent_response.py +27 -0
  44. calibrate/types/routers_agent_tests_agent_response_type.py +5 -0
  45. calibrate/types/task_create_response.py +22 -0
  46. calibrate/types/test_case_result.py +33 -0
  47. calibrate/types/test_output.py +21 -0
  48. calibrate/types/test_run_status_response.py +37 -0
  49. calibrate/types/tool_call_output.py +21 -0
  50. calibrate/types/validation_error.py +22 -0
  51. calibrate/types/validation_error_loc_item.py +5 -0
  52. calibrate/version.py +3 -0
  53. calibrate_sdk-0.0.1.dist-info/LICENSE +21 -0
  54. calibrate_sdk-0.0.1.dist-info/METADATA +76 -0
  55. calibrate_sdk-0.0.1.dist-info/RECORD +56 -0
  56. calibrate_sdk-0.0.1.dist-info/WHEEL +4 -0
calibrate/__init__.py ADDED
@@ -0,0 +1,109 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ # isort: skip_file
4
+
5
+ import typing
6
+ from importlib import import_module
7
+
8
+ if typing.TYPE_CHECKING:
9
+ from .types import (
10
+ BatchRunRequest,
11
+ BatchTestRun,
12
+ BatchTestRunResponse,
13
+ BatchTestSkip,
14
+ HttpValidationError,
15
+ JudgeResult,
16
+ ResolveAgentNamesResponse,
17
+ RoutersAgentTestsAgentResponse,
18
+ RoutersAgentTestsAgentResponseType,
19
+ TaskCreateResponse,
20
+ TestCaseResult,
21
+ TestOutput,
22
+ TestRunStatusResponse,
23
+ ToolCallOutput,
24
+ ValidationError,
25
+ ValidationErrorLocItem,
26
+ )
27
+ from .errors import UnprocessableEntityError
28
+ from . import agent_tests, agents
29
+ from ._default_clients import DefaultAioHttpClient, DefaultAsyncHttpxClient
30
+ from .client import AsyncCalibrate, Calibrate
31
+ from .environment import CalibrateEnvironment
32
+ from .version import __version__
33
+ _dynamic_imports: typing.Dict[str, str] = {
34
+ "AsyncCalibrate": ".client",
35
+ "BatchRunRequest": ".types",
36
+ "BatchTestRun": ".types",
37
+ "BatchTestRunResponse": ".types",
38
+ "BatchTestSkip": ".types",
39
+ "Calibrate": ".client",
40
+ "CalibrateEnvironment": ".environment",
41
+ "DefaultAioHttpClient": "._default_clients",
42
+ "DefaultAsyncHttpxClient": "._default_clients",
43
+ "HttpValidationError": ".types",
44
+ "JudgeResult": ".types",
45
+ "ResolveAgentNamesResponse": ".types",
46
+ "RoutersAgentTestsAgentResponse": ".types",
47
+ "RoutersAgentTestsAgentResponseType": ".types",
48
+ "TaskCreateResponse": ".types",
49
+ "TestCaseResult": ".types",
50
+ "TestOutput": ".types",
51
+ "TestRunStatusResponse": ".types",
52
+ "ToolCallOutput": ".types",
53
+ "UnprocessableEntityError": ".errors",
54
+ "ValidationError": ".types",
55
+ "ValidationErrorLocItem": ".types",
56
+ "__version__": ".version",
57
+ "agent_tests": ".agent_tests",
58
+ "agents": ".agents",
59
+ }
60
+
61
+
62
+ def __getattr__(attr_name: str) -> typing.Any:
63
+ module_name = _dynamic_imports.get(attr_name)
64
+ if module_name is None:
65
+ raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
66
+ try:
67
+ module = import_module(module_name, __package__)
68
+ if module_name == f".{attr_name}":
69
+ return module
70
+ else:
71
+ return getattr(module, attr_name)
72
+ except ImportError as e:
73
+ raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
74
+ except AttributeError as e:
75
+ raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
76
+
77
+
78
+ def __dir__():
79
+ lazy_attrs = list(_dynamic_imports.keys())
80
+ return sorted(lazy_attrs)
81
+
82
+
83
+ __all__ = [
84
+ "AsyncCalibrate",
85
+ "BatchRunRequest",
86
+ "BatchTestRun",
87
+ "BatchTestRunResponse",
88
+ "BatchTestSkip",
89
+ "Calibrate",
90
+ "CalibrateEnvironment",
91
+ "DefaultAioHttpClient",
92
+ "DefaultAsyncHttpxClient",
93
+ "HttpValidationError",
94
+ "JudgeResult",
95
+ "ResolveAgentNamesResponse",
96
+ "RoutersAgentTestsAgentResponse",
97
+ "RoutersAgentTestsAgentResponseType",
98
+ "TaskCreateResponse",
99
+ "TestCaseResult",
100
+ "TestOutput",
101
+ "TestRunStatusResponse",
102
+ "ToolCallOutput",
103
+ "UnprocessableEntityError",
104
+ "ValidationError",
105
+ "ValidationErrorLocItem",
106
+ "__version__",
107
+ "agent_tests",
108
+ "agents",
109
+ ]
@@ -0,0 +1,32 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import httpx
6
+
7
+ SDK_DEFAULT_TIMEOUT = 60
8
+
9
+ try:
10
+ import httpx_aiohttp # type: ignore[import-not-found]
11
+ except ImportError:
12
+
13
+ class DefaultAioHttpClient(httpx.AsyncClient): # type: ignore
14
+ def __init__(self, **kwargs: typing.Any) -> None:
15
+ raise RuntimeError(
16
+ "To use the aiohttp client, install the aiohttp extra: pip install calibrate-sdk[aiohttp]"
17
+ )
18
+
19
+ else:
20
+
21
+ class DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient): # type: ignore
22
+ def __init__(self, **kwargs: typing.Any) -> None:
23
+ kwargs.setdefault("timeout", SDK_DEFAULT_TIMEOUT)
24
+ kwargs.setdefault("follow_redirects", True)
25
+ super().__init__(**kwargs)
26
+
27
+
28
+ class DefaultAsyncHttpxClient(httpx.AsyncClient):
29
+ def __init__(self, **kwargs: typing.Any) -> None:
30
+ kwargs.setdefault("timeout", SDK_DEFAULT_TIMEOUT)
31
+ kwargs.setdefault("follow_redirects", True)
32
+ super().__init__(**kwargs)
@@ -0,0 +1,4 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ # isort: skip_file
4
+
@@ -0,0 +1,350 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
6
+ from ..core.request_options import RequestOptions
7
+ from ..types.batch_run_request import BatchRunRequest
8
+ from ..types.batch_test_run_response import BatchTestRunResponse
9
+ from ..types.task_create_response import TaskCreateResponse
10
+ from ..types.test_run_status_response import TestRunStatusResponse
11
+ from .raw_client import AsyncRawAgentTestsClient, RawAgentTestsClient
12
+
13
+ # this is used as the default value for optional parameters
14
+ OMIT = typing.cast(typing.Any, ...)
15
+
16
+
17
+ class AgentTestsClient:
18
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
19
+ self._raw_client = RawAgentTestsClient(client_wrapper=client_wrapper)
20
+
21
+ @property
22
+ def with_raw_response(self) -> RawAgentTestsClient:
23
+ """
24
+ Retrieves a raw implementation of this client that returns raw responses.
25
+
26
+ Returns
27
+ -------
28
+ RawAgentTestsClient
29
+ """
30
+ return self._raw_client
31
+
32
+ def run(
33
+ self,
34
+ agent_uuid: str,
35
+ *,
36
+ test_uuids: typing.Optional[typing.Sequence[str]] = OMIT,
37
+ request_options: typing.Optional[RequestOptions] = None,
38
+ ) -> TaskCreateResponse:
39
+ """
40
+ Run one or more tests for an agent.
41
+
42
+ This starts a background task that runs the calibrate LLM tests command
43
+ with the agent's config and the combined test cases from all specified tests.
44
+
45
+ Returns a task ID that can be used to poll for status and results.
46
+
47
+ Auth: requires either a JWT (frontend) or an `sk_` API key. The agent
48
+ must belong to the caller's org or this 404s.
49
+
50
+ Parameters
51
+ ----------
52
+ agent_uuid : str
53
+
54
+ test_uuids : typing.Optional[typing.Sequence[str]]
55
+
56
+ request_options : typing.Optional[RequestOptions]
57
+ Request-specific configuration.
58
+
59
+ Returns
60
+ -------
61
+ TaskCreateResponse
62
+ Successful Response
63
+
64
+ Examples
65
+ --------
66
+ from calibrate import Calibrate
67
+
68
+ client = Calibrate(
69
+ org_uuid="YOUR_ORG_UUID",
70
+ api_key="YOUR_API_KEY",
71
+ )
72
+ client.agent_tests.run(
73
+ agent_uuid="agent_uuid",
74
+ )
75
+ """
76
+ _response = self._raw_client.run(agent_uuid, test_uuids=test_uuids, request_options=request_options)
77
+ return _response.data
78
+
79
+ def run_batch(
80
+ self,
81
+ *,
82
+ request: typing.Optional[BatchRunRequest] = None,
83
+ request_options: typing.Optional[RequestOptions] = None,
84
+ ) -> BatchTestRunResponse:
85
+ """
86
+ Run every linked test for a set of agents, one ``llm-unit-test`` job per agent.
87
+
88
+ Scope is driven by the optional ``agent_names`` payload:
89
+
90
+ - **Provided (non-empty)** — run only those agents. Names are unique per org
91
+ and **all are validated up front**: if any doesn't resolve to a
92
+ (non-deleted) agent in the caller's org, the call 404s with the offending
93
+ names and NO jobs are created.
94
+ - **Omitted / null / empty** — run every agent in the caller's org.
95
+
96
+ For each selected agent, its linked tests are launched as one job. Agents
97
+ with no linked tests or an unverified connection are reported under
98
+ ``skipped`` instead of failing the batch. Subject to the normal per-org
99
+ concurrency queue, so over-limit jobs come back ``queued``.
100
+
101
+ Auth accepts a JWT (frontend) or an `sk_` API key (programmatic clients).
102
+ Returns one ``runs`` entry per launched agent with ``agent_name``,
103
+ ``agent_uuid``, ``task_id``, and ``status``.
104
+
105
+ Parameters
106
+ ----------
107
+ request : typing.Optional[BatchRunRequest]
108
+
109
+ request_options : typing.Optional[RequestOptions]
110
+ Request-specific configuration.
111
+
112
+ Returns
113
+ -------
114
+ BatchTestRunResponse
115
+ Successful Response
116
+
117
+ Examples
118
+ --------
119
+ from calibrate import BatchRunRequest, Calibrate
120
+
121
+ client = Calibrate(
122
+ org_uuid="YOUR_ORG_UUID",
123
+ api_key="YOUR_API_KEY",
124
+ )
125
+ client.agent_tests.run_batch(
126
+ request=BatchRunRequest(),
127
+ )
128
+ """
129
+ _response = self._raw_client.run_batch(request=request, request_options=request_options)
130
+ return _response.data
131
+
132
+ def get_run(
133
+ self, task_id: str, *, request_options: typing.Optional[RequestOptions] = None
134
+ ) -> TestRunStatusResponse:
135
+ """
136
+ Get the status of an agent test run.
137
+
138
+ Requires either a JWT (frontend) or an `sk_` API key, plus org
139
+ ownership of the run. Unauthenticated access to a completed run is only
140
+ possible once it is made public, via the share-token endpoint in the public
141
+ router.
142
+
143
+ Returns the current status and, if done, the test results.
144
+
145
+ Parameters
146
+ ----------
147
+ task_id : str
148
+
149
+ request_options : typing.Optional[RequestOptions]
150
+ Request-specific configuration.
151
+
152
+ Returns
153
+ -------
154
+ TestRunStatusResponse
155
+ Successful Response
156
+
157
+ Examples
158
+ --------
159
+ from calibrate import Calibrate
160
+
161
+ client = Calibrate(
162
+ org_uuid="YOUR_ORG_UUID",
163
+ api_key="YOUR_API_KEY",
164
+ )
165
+ client.agent_tests.get_run(
166
+ task_id="task_id",
167
+ )
168
+ """
169
+ _response = self._raw_client.get_run(task_id, request_options=request_options)
170
+ return _response.data
171
+
172
+
173
+ class AsyncAgentTestsClient:
174
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
175
+ self._raw_client = AsyncRawAgentTestsClient(client_wrapper=client_wrapper)
176
+
177
+ @property
178
+ def with_raw_response(self) -> AsyncRawAgentTestsClient:
179
+ """
180
+ Retrieves a raw implementation of this client that returns raw responses.
181
+
182
+ Returns
183
+ -------
184
+ AsyncRawAgentTestsClient
185
+ """
186
+ return self._raw_client
187
+
188
+ async def run(
189
+ self,
190
+ agent_uuid: str,
191
+ *,
192
+ test_uuids: typing.Optional[typing.Sequence[str]] = OMIT,
193
+ request_options: typing.Optional[RequestOptions] = None,
194
+ ) -> TaskCreateResponse:
195
+ """
196
+ Run one or more tests for an agent.
197
+
198
+ This starts a background task that runs the calibrate LLM tests command
199
+ with the agent's config and the combined test cases from all specified tests.
200
+
201
+ Returns a task ID that can be used to poll for status and results.
202
+
203
+ Auth: requires either a JWT (frontend) or an `sk_` API key. The agent
204
+ must belong to the caller's org or this 404s.
205
+
206
+ Parameters
207
+ ----------
208
+ agent_uuid : str
209
+
210
+ test_uuids : typing.Optional[typing.Sequence[str]]
211
+
212
+ request_options : typing.Optional[RequestOptions]
213
+ Request-specific configuration.
214
+
215
+ Returns
216
+ -------
217
+ TaskCreateResponse
218
+ Successful Response
219
+
220
+ Examples
221
+ --------
222
+ import asyncio
223
+
224
+ from calibrate import AsyncCalibrate
225
+
226
+ client = AsyncCalibrate(
227
+ org_uuid="YOUR_ORG_UUID",
228
+ api_key="YOUR_API_KEY",
229
+ )
230
+
231
+
232
+ async def main() -> None:
233
+ await client.agent_tests.run(
234
+ agent_uuid="agent_uuid",
235
+ )
236
+
237
+
238
+ asyncio.run(main())
239
+ """
240
+ _response = await self._raw_client.run(agent_uuid, test_uuids=test_uuids, request_options=request_options)
241
+ return _response.data
242
+
243
+ async def run_batch(
244
+ self,
245
+ *,
246
+ request: typing.Optional[BatchRunRequest] = None,
247
+ request_options: typing.Optional[RequestOptions] = None,
248
+ ) -> BatchTestRunResponse:
249
+ """
250
+ Run every linked test for a set of agents, one ``llm-unit-test`` job per agent.
251
+
252
+ Scope is driven by the optional ``agent_names`` payload:
253
+
254
+ - **Provided (non-empty)** — run only those agents. Names are unique per org
255
+ and **all are validated up front**: if any doesn't resolve to a
256
+ (non-deleted) agent in the caller's org, the call 404s with the offending
257
+ names and NO jobs are created.
258
+ - **Omitted / null / empty** — run every agent in the caller's org.
259
+
260
+ For each selected agent, its linked tests are launched as one job. Agents
261
+ with no linked tests or an unverified connection are reported under
262
+ ``skipped`` instead of failing the batch. Subject to the normal per-org
263
+ concurrency queue, so over-limit jobs come back ``queued``.
264
+
265
+ Auth accepts a JWT (frontend) or an `sk_` API key (programmatic clients).
266
+ Returns one ``runs`` entry per launched agent with ``agent_name``,
267
+ ``agent_uuid``, ``task_id``, and ``status``.
268
+
269
+ Parameters
270
+ ----------
271
+ request : typing.Optional[BatchRunRequest]
272
+
273
+ request_options : typing.Optional[RequestOptions]
274
+ Request-specific configuration.
275
+
276
+ Returns
277
+ -------
278
+ BatchTestRunResponse
279
+ Successful Response
280
+
281
+ Examples
282
+ --------
283
+ import asyncio
284
+
285
+ from calibrate import AsyncCalibrate, BatchRunRequest
286
+
287
+ client = AsyncCalibrate(
288
+ org_uuid="YOUR_ORG_UUID",
289
+ api_key="YOUR_API_KEY",
290
+ )
291
+
292
+
293
+ async def main() -> None:
294
+ await client.agent_tests.run_batch(
295
+ request=BatchRunRequest(),
296
+ )
297
+
298
+
299
+ asyncio.run(main())
300
+ """
301
+ _response = await self._raw_client.run_batch(request=request, request_options=request_options)
302
+ return _response.data
303
+
304
+ async def get_run(
305
+ self, task_id: str, *, request_options: typing.Optional[RequestOptions] = None
306
+ ) -> TestRunStatusResponse:
307
+ """
308
+ Get the status of an agent test run.
309
+
310
+ Requires either a JWT (frontend) or an `sk_` API key, plus org
311
+ ownership of the run. Unauthenticated access to a completed run is only
312
+ possible once it is made public, via the share-token endpoint in the public
313
+ router.
314
+
315
+ Returns the current status and, if done, the test results.
316
+
317
+ Parameters
318
+ ----------
319
+ task_id : str
320
+
321
+ request_options : typing.Optional[RequestOptions]
322
+ Request-specific configuration.
323
+
324
+ Returns
325
+ -------
326
+ TestRunStatusResponse
327
+ Successful Response
328
+
329
+ Examples
330
+ --------
331
+ import asyncio
332
+
333
+ from calibrate import AsyncCalibrate
334
+
335
+ client = AsyncCalibrate(
336
+ org_uuid="YOUR_ORG_UUID",
337
+ api_key="YOUR_API_KEY",
338
+ )
339
+
340
+
341
+ async def main() -> None:
342
+ await client.agent_tests.get_run(
343
+ task_id="task_id",
344
+ )
345
+
346
+
347
+ asyncio.run(main())
348
+ """
349
+ _response = await self._raw_client.get_run(task_id, request_options=request_options)
350
+ return _response.data