deeprails 1.9.0__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deeprails might be problematic. Click here for more details.
- deeprails-1.10.0/.release-please-manifest.json +3 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/CHANGELOG.md +9 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/PKG-INFO +1 -1
- {deeprails-1.9.0 → deeprails-1.10.0}/api.md +0 -13
- {deeprails-1.9.0 → deeprails-1.10.0}/pyproject.toml +1 -1
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_client.py +1 -9
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_version.py +1 -1
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/resources/__init__.py +0 -14
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/__init__.py +0 -2
- deeprails-1.9.0/src/deeprails/types/evaluation.py → deeprails-1.10.0/src/deeprails/types/monitor_detail_response.py +60 -3
- deeprails-1.9.0/.release-please-manifest.json +0 -3
- deeprails-1.9.0/src/deeprails/resources/evaluate.py +0 -334
- deeprails-1.9.0/src/deeprails/types/evaluate_create_params.py +0 -63
- deeprails-1.9.0/src/deeprails/types/monitor_detail_response.py +0 -67
- deeprails-1.9.0/tests/api_resources/test_evaluate.py +0 -222
- {deeprails-1.9.0 → deeprails-1.10.0}/.gitignore +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/CONTRIBUTING.md +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/LICENSE +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/README.md +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/bin/check-release-environment +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/bin/publish-pypi +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/examples/.keep +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/noxfile.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/release-please-config.json +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/requirements-dev.lock +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/requirements.lock +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/__init__.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_base_client.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_compat.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_constants.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_exceptions.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_files.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_models.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_qs.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_resource.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_streaming.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_types.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/__init__.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_compat.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_datetime_parse.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_logs.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_proxy.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_reflection.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_resources_proxy.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_streams.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_sync.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_transform.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_typing.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/_utils/_utils.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/lib/.keep +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/py.typed +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/resources/defend.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/resources/monitor.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/defend_create_workflow_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/defend_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/defend_submit_event_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/defend_update_workflow_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_create_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_event_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_retrieve_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_submit_event_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/monitor_update_params.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/src/deeprails/types/workflow_event_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/__init__.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/api_resources/__init__.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/api_resources/test_defend.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/api_resources/test_monitor.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/conftest.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/sample_file.txt +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_client.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_deepcopy.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_extract_files.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_files.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_models.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_qs.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_required_args.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_response.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_streaming.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_transform.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_utils/test_datetime_parse.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_utils/test_proxy.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/test_utils/test_typing.py +0 -0
- {deeprails-1.9.0 → deeprails-1.10.0}/tests/utils.py +0 -0
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.10.0 (2025-10-29)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v1.9.0...v1.10.0](https://github.com/deeprails/deeprails-sdk-python/compare/v1.9.0...v1.10.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** remove evaluate api ([a3ddec1](https://github.com/deeprails/deeprails-sdk-python/commit/a3ddec1696eaa1247eea2a3bffd61e63d6537d30))
|
|
10
|
+
* **api:** remove evaluate references ([23519a3](https://github.com/deeprails/deeprails-sdk-python/commit/23519a3349a254fe2fe24a51aeb59545d0820b70))
|
|
11
|
+
|
|
3
12
|
## 1.9.0 (2025-10-24)
|
|
4
13
|
|
|
5
14
|
Full Changelog: [v1.8.0...v1.9.0](https://github.com/deeprails/deeprails-sdk-python/compare/v1.8.0...v1.9.0)
|
|
@@ -28,16 +28,3 @@ Methods:
|
|
|
28
28
|
- <code title="get /monitor/{monitor_id}">client.monitor.<a href="./src/deeprails/resources/monitor.py">retrieve</a>(monitor_id, \*\*<a href="src/deeprails/types/monitor_retrieve_params.py">params</a>) -> <a href="./src/deeprails/types/monitor_detail_response.py">MonitorDetailResponse</a></code>
|
|
29
29
|
- <code title="put /monitor/{monitor_id}">client.monitor.<a href="./src/deeprails/resources/monitor.py">update</a>(monitor_id, \*\*<a href="src/deeprails/types/monitor_update_params.py">params</a>) -> <a href="./src/deeprails/types/monitor_response.py">MonitorResponse</a></code>
|
|
30
30
|
- <code title="post /monitor/{monitor_id}/events">client.monitor.<a href="./src/deeprails/resources/monitor.py">submit_event</a>(monitor_id, \*\*<a href="src/deeprails/types/monitor_submit_event_params.py">params</a>) -> <a href="./src/deeprails/types/monitor_event_response.py">MonitorEventResponse</a></code>
|
|
31
|
-
|
|
32
|
-
# Evaluate
|
|
33
|
-
|
|
34
|
-
Types:
|
|
35
|
-
|
|
36
|
-
```python
|
|
37
|
-
from deeprails.types import Evaluation
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
Methods:
|
|
41
|
-
|
|
42
|
-
- <code title="post /evaluate">client.evaluate.<a href="./src/deeprails/resources/evaluate.py">create</a>(\*\*<a href="src/deeprails/types/evaluate_create_params.py">params</a>) -> <a href="./src/deeprails/types/evaluation.py">Evaluation</a></code>
|
|
43
|
-
- <code title="get /evaluate/{eval_id}">client.evaluate.<a href="./src/deeprails/resources/evaluate.py">retrieve</a>(eval_id) -> <a href="./src/deeprails/types/evaluation.py">Evaluation</a></code>
|
|
@@ -21,7 +21,7 @@ from ._types import (
|
|
|
21
21
|
)
|
|
22
22
|
from ._utils import is_given, get_async_library
|
|
23
23
|
from ._version import __version__
|
|
24
|
-
from .resources import defend, monitor
|
|
24
|
+
from .resources import defend, monitor
|
|
25
25
|
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
|
|
26
26
|
from ._exceptions import APIStatusError, DeeprailsError
|
|
27
27
|
from ._base_client import (
|
|
@@ -45,7 +45,6 @@ __all__ = [
|
|
|
45
45
|
class Deeprails(SyncAPIClient):
|
|
46
46
|
defend: defend.DefendResource
|
|
47
47
|
monitor: monitor.MonitorResource
|
|
48
|
-
evaluate: evaluate.EvaluateResource
|
|
49
48
|
with_raw_response: DeeprailsWithRawResponse
|
|
50
49
|
with_streaming_response: DeeprailsWithStreamedResponse
|
|
51
50
|
|
|
@@ -105,7 +104,6 @@ class Deeprails(SyncAPIClient):
|
|
|
105
104
|
|
|
106
105
|
self.defend = defend.DefendResource(self)
|
|
107
106
|
self.monitor = monitor.MonitorResource(self)
|
|
108
|
-
self.evaluate = evaluate.EvaluateResource(self)
|
|
109
107
|
self.with_raw_response = DeeprailsWithRawResponse(self)
|
|
110
108
|
self.with_streaming_response = DeeprailsWithStreamedResponse(self)
|
|
111
109
|
|
|
@@ -217,7 +215,6 @@ class Deeprails(SyncAPIClient):
|
|
|
217
215
|
class AsyncDeeprails(AsyncAPIClient):
|
|
218
216
|
defend: defend.AsyncDefendResource
|
|
219
217
|
monitor: monitor.AsyncMonitorResource
|
|
220
|
-
evaluate: evaluate.AsyncEvaluateResource
|
|
221
218
|
with_raw_response: AsyncDeeprailsWithRawResponse
|
|
222
219
|
with_streaming_response: AsyncDeeprailsWithStreamedResponse
|
|
223
220
|
|
|
@@ -277,7 +274,6 @@ class AsyncDeeprails(AsyncAPIClient):
|
|
|
277
274
|
|
|
278
275
|
self.defend = defend.AsyncDefendResource(self)
|
|
279
276
|
self.monitor = monitor.AsyncMonitorResource(self)
|
|
280
|
-
self.evaluate = evaluate.AsyncEvaluateResource(self)
|
|
281
277
|
self.with_raw_response = AsyncDeeprailsWithRawResponse(self)
|
|
282
278
|
self.with_streaming_response = AsyncDeeprailsWithStreamedResponse(self)
|
|
283
279
|
|
|
@@ -390,28 +386,24 @@ class DeeprailsWithRawResponse:
|
|
|
390
386
|
def __init__(self, client: Deeprails) -> None:
|
|
391
387
|
self.defend = defend.DefendResourceWithRawResponse(client.defend)
|
|
392
388
|
self.monitor = monitor.MonitorResourceWithRawResponse(client.monitor)
|
|
393
|
-
self.evaluate = evaluate.EvaluateResourceWithRawResponse(client.evaluate)
|
|
394
389
|
|
|
395
390
|
|
|
396
391
|
class AsyncDeeprailsWithRawResponse:
|
|
397
392
|
def __init__(self, client: AsyncDeeprails) -> None:
|
|
398
393
|
self.defend = defend.AsyncDefendResourceWithRawResponse(client.defend)
|
|
399
394
|
self.monitor = monitor.AsyncMonitorResourceWithRawResponse(client.monitor)
|
|
400
|
-
self.evaluate = evaluate.AsyncEvaluateResourceWithRawResponse(client.evaluate)
|
|
401
395
|
|
|
402
396
|
|
|
403
397
|
class DeeprailsWithStreamedResponse:
|
|
404
398
|
def __init__(self, client: Deeprails) -> None:
|
|
405
399
|
self.defend = defend.DefendResourceWithStreamingResponse(client.defend)
|
|
406
400
|
self.monitor = monitor.MonitorResourceWithStreamingResponse(client.monitor)
|
|
407
|
-
self.evaluate = evaluate.EvaluateResourceWithStreamingResponse(client.evaluate)
|
|
408
401
|
|
|
409
402
|
|
|
410
403
|
class AsyncDeeprailsWithStreamedResponse:
|
|
411
404
|
def __init__(self, client: AsyncDeeprails) -> None:
|
|
412
405
|
self.defend = defend.AsyncDefendResourceWithStreamingResponse(client.defend)
|
|
413
406
|
self.monitor = monitor.AsyncMonitorResourceWithStreamingResponse(client.monitor)
|
|
414
|
-
self.evaluate = evaluate.AsyncEvaluateResourceWithStreamingResponse(client.evaluate)
|
|
415
407
|
|
|
416
408
|
|
|
417
409
|
Client = Deeprails
|
|
@@ -16,14 +16,6 @@ from .monitor import (
|
|
|
16
16
|
MonitorResourceWithStreamingResponse,
|
|
17
17
|
AsyncMonitorResourceWithStreamingResponse,
|
|
18
18
|
)
|
|
19
|
-
from .evaluate import (
|
|
20
|
-
EvaluateResource,
|
|
21
|
-
AsyncEvaluateResource,
|
|
22
|
-
EvaluateResourceWithRawResponse,
|
|
23
|
-
AsyncEvaluateResourceWithRawResponse,
|
|
24
|
-
EvaluateResourceWithStreamingResponse,
|
|
25
|
-
AsyncEvaluateResourceWithStreamingResponse,
|
|
26
|
-
)
|
|
27
19
|
|
|
28
20
|
__all__ = [
|
|
29
21
|
"DefendResource",
|
|
@@ -38,10 +30,4 @@ __all__ = [
|
|
|
38
30
|
"AsyncMonitorResourceWithRawResponse",
|
|
39
31
|
"MonitorResourceWithStreamingResponse",
|
|
40
32
|
"AsyncMonitorResourceWithStreamingResponse",
|
|
41
|
-
"EvaluateResource",
|
|
42
|
-
"AsyncEvaluateResource",
|
|
43
|
-
"EvaluateResourceWithRawResponse",
|
|
44
|
-
"AsyncEvaluateResourceWithRawResponse",
|
|
45
|
-
"EvaluateResourceWithStreamingResponse",
|
|
46
|
-
"AsyncEvaluateResourceWithStreamingResponse",
|
|
47
33
|
]
|
|
@@ -2,12 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from .evaluation import Evaluation as Evaluation
|
|
6
5
|
from .defend_response import DefendResponse as DefendResponse
|
|
7
6
|
from .monitor_response import MonitorResponse as MonitorResponse
|
|
8
7
|
from .monitor_create_params import MonitorCreateParams as MonitorCreateParams
|
|
9
8
|
from .monitor_update_params import MonitorUpdateParams as MonitorUpdateParams
|
|
10
|
-
from .evaluate_create_params import EvaluateCreateParams as EvaluateCreateParams
|
|
11
9
|
from .monitor_event_response import MonitorEventResponse as MonitorEventResponse
|
|
12
10
|
from .monitor_detail_response import MonitorDetailResponse as MonitorDetailResponse
|
|
13
11
|
from .monitor_retrieve_params import MonitorRetrieveParams as MonitorRetrieveParams
|
|
@@ -8,10 +8,10 @@ from pydantic import Field as FieldInfo
|
|
|
8
8
|
|
|
9
9
|
from .._models import BaseModel
|
|
10
10
|
|
|
11
|
-
__all__ = ["Evaluation", "
|
|
11
|
+
__all__ = ["MonitorDetailResponse", "Evaluation", "EvaluationModelInput", "Stats"]
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
14
|
+
class EvaluationModelInput(BaseModel):
|
|
15
15
|
ground_truth: Optional[str] = None
|
|
16
16
|
"""The ground truth for evaluating Ground Truth Adherence guardrail."""
|
|
17
17
|
|
|
@@ -29,7 +29,7 @@ class Evaluation(BaseModel):
|
|
|
29
29
|
evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
|
|
30
30
|
"""Status of the evaluation."""
|
|
31
31
|
|
|
32
|
-
api_model_input:
|
|
32
|
+
api_model_input: EvaluationModelInput = FieldInfo(alias="model_input")
|
|
33
33
|
"""A dictionary of inputs sent to the LLM to generate output.
|
|
34
34
|
|
|
35
35
|
The dictionary must contain at least a `user_prompt` field or a `system_prompt`
|
|
@@ -103,3 +103,60 @@ class Evaluation(BaseModel):
|
|
|
103
103
|
|
|
104
104
|
start_timestamp: Optional[datetime] = None
|
|
105
105
|
"""The time the evaluation started in UTC."""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class Stats(BaseModel):
|
|
109
|
+
completed_evaluations: Optional[int] = None
|
|
110
|
+
"""Number of evaluations that completed successfully."""
|
|
111
|
+
|
|
112
|
+
failed_evaluations: Optional[int] = None
|
|
113
|
+
"""Number of evaluations that failed."""
|
|
114
|
+
|
|
115
|
+
in_progress_evaluations: Optional[int] = None
|
|
116
|
+
"""Number of evaluations currently in progress."""
|
|
117
|
+
|
|
118
|
+
queued_evaluations: Optional[int] = None
|
|
119
|
+
"""Number of evaluations currently queued."""
|
|
120
|
+
|
|
121
|
+
total_evaluations: Optional[int] = None
|
|
122
|
+
"""Total number of evaluations performed by this monitor."""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class MonitorDetailResponse(BaseModel):
|
|
126
|
+
monitor_id: str
|
|
127
|
+
"""A unique monitor ID."""
|
|
128
|
+
|
|
129
|
+
monitor_status: Literal["active", "inactive"]
|
|
130
|
+
"""Status of the monitor.
|
|
131
|
+
|
|
132
|
+
Can be `active` or `inactive`. Inactive monitors no longer record and evaluate
|
|
133
|
+
events.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
name: str
|
|
137
|
+
"""Name of this monitor."""
|
|
138
|
+
|
|
139
|
+
created_at: Optional[datetime] = None
|
|
140
|
+
"""The time the monitor was created in UTC."""
|
|
141
|
+
|
|
142
|
+
description: Optional[str] = None
|
|
143
|
+
"""Description of this monitor."""
|
|
144
|
+
|
|
145
|
+
evaluations: Optional[List[Evaluation]] = None
|
|
146
|
+
"""An array of all evaluations performed by this monitor.
|
|
147
|
+
|
|
148
|
+
Each one corresponds to a separate monitor event.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
stats: Optional[Stats] = None
|
|
152
|
+
"""
|
|
153
|
+
Contains five fields used for stats of this monitor: total evaluations,
|
|
154
|
+
completed evaluations, failed evaluations, queued evaluations, and in progress
|
|
155
|
+
evaluations.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
updated_at: Optional[datetime] = None
|
|
159
|
+
"""The most recent time the monitor was modified in UTC."""
|
|
160
|
+
|
|
161
|
+
user_id: Optional[str] = None
|
|
162
|
+
"""User ID of the user who created the monitor."""
|
|
@@ -1,334 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import List
|
|
6
|
-
from typing_extensions import Literal
|
|
7
|
-
|
|
8
|
-
import httpx
|
|
9
|
-
|
|
10
|
-
from ..types import evaluate_create_params
|
|
11
|
-
from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
|
|
12
|
-
from .._utils import maybe_transform, async_maybe_transform
|
|
13
|
-
from .._compat import cached_property
|
|
14
|
-
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
15
|
-
from .._response import (
|
|
16
|
-
to_raw_response_wrapper,
|
|
17
|
-
to_streamed_response_wrapper,
|
|
18
|
-
async_to_raw_response_wrapper,
|
|
19
|
-
async_to_streamed_response_wrapper,
|
|
20
|
-
)
|
|
21
|
-
from .._base_client import make_request_options
|
|
22
|
-
from ..types.evaluation import Evaluation
|
|
23
|
-
|
|
24
|
-
__all__ = ["EvaluateResource", "AsyncEvaluateResource"]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class EvaluateResource(SyncAPIResource):
|
|
28
|
-
@cached_property
|
|
29
|
-
def with_raw_response(self) -> EvaluateResourceWithRawResponse:
|
|
30
|
-
"""
|
|
31
|
-
This property can be used as a prefix for any HTTP method call to return
|
|
32
|
-
the raw response object instead of the parsed content.
|
|
33
|
-
|
|
34
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
|
|
35
|
-
"""
|
|
36
|
-
return EvaluateResourceWithRawResponse(self)
|
|
37
|
-
|
|
38
|
-
@cached_property
|
|
39
|
-
def with_streaming_response(self) -> EvaluateResourceWithStreamingResponse:
|
|
40
|
-
"""
|
|
41
|
-
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
42
|
-
|
|
43
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
|
|
44
|
-
"""
|
|
45
|
-
return EvaluateResourceWithStreamingResponse(self)
|
|
46
|
-
|
|
47
|
-
def create(
|
|
48
|
-
self,
|
|
49
|
-
*,
|
|
50
|
-
model_input: evaluate_create_params.ModelInput,
|
|
51
|
-
model_output: str,
|
|
52
|
-
run_mode: Literal["precision_plus", "precision", "smart", "economy"],
|
|
53
|
-
guardrail_metrics: List[
|
|
54
|
-
Literal[
|
|
55
|
-
"correctness",
|
|
56
|
-
"completeness",
|
|
57
|
-
"instruction_adherence",
|
|
58
|
-
"context_adherence",
|
|
59
|
-
"ground_truth_adherence",
|
|
60
|
-
"comprehensive_safety",
|
|
61
|
-
]
|
|
62
|
-
]
|
|
63
|
-
| Omit = omit,
|
|
64
|
-
model_used: str | Omit = omit,
|
|
65
|
-
nametag: str | Omit = omit,
|
|
66
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
67
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
68
|
-
extra_headers: Headers | None = None,
|
|
69
|
-
extra_query: Query | None = None,
|
|
70
|
-
extra_body: Body | None = None,
|
|
71
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
72
|
-
) -> Evaluation:
|
|
73
|
-
"""
|
|
74
|
-
Use this endpoint to evaluate a model's input and output pair against selected
|
|
75
|
-
guardrail metrics
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
79
|
-
contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
80
|
-
ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
81
|
-
|
|
82
|
-
model_output: Output generated by the LLM to be evaluated.
|
|
83
|
-
|
|
84
|
-
run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
85
|
-
accuracy, and cost by determining which models are used to evaluate the event.
|
|
86
|
-
Available run modes include `precision_plus`, `precision`, `smart`, and
|
|
87
|
-
`economy`. Defaults to `smart`.
|
|
88
|
-
|
|
89
|
-
guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
|
|
90
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
91
|
-
guardrail metrics.
|
|
92
|
-
|
|
93
|
-
model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
94
|
-
|
|
95
|
-
nametag: An optional, user-defined tag for the evaluation.
|
|
96
|
-
|
|
97
|
-
extra_headers: Send extra headers
|
|
98
|
-
|
|
99
|
-
extra_query: Add additional query parameters to the request
|
|
100
|
-
|
|
101
|
-
extra_body: Add additional JSON properties to the request
|
|
102
|
-
|
|
103
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
104
|
-
"""
|
|
105
|
-
return self._post(
|
|
106
|
-
"/evaluate",
|
|
107
|
-
body=maybe_transform(
|
|
108
|
-
{
|
|
109
|
-
"model_input": model_input,
|
|
110
|
-
"model_output": model_output,
|
|
111
|
-
"run_mode": run_mode,
|
|
112
|
-
"guardrail_metrics": guardrail_metrics,
|
|
113
|
-
"model_used": model_used,
|
|
114
|
-
"nametag": nametag,
|
|
115
|
-
},
|
|
116
|
-
evaluate_create_params.EvaluateCreateParams,
|
|
117
|
-
),
|
|
118
|
-
options=make_request_options(
|
|
119
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
120
|
-
),
|
|
121
|
-
cast_to=Evaluation,
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
def retrieve(
|
|
125
|
-
self,
|
|
126
|
-
eval_id: str,
|
|
127
|
-
*,
|
|
128
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
129
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
130
|
-
extra_headers: Headers | None = None,
|
|
131
|
-
extra_query: Query | None = None,
|
|
132
|
-
extra_body: Body | None = None,
|
|
133
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
134
|
-
) -> Evaluation:
|
|
135
|
-
"""
|
|
136
|
-
Use this endpoint to retrieve the evaluation record for a given evaluation ID
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
extra_headers: Send extra headers
|
|
140
|
-
|
|
141
|
-
extra_query: Add additional query parameters to the request
|
|
142
|
-
|
|
143
|
-
extra_body: Add additional JSON properties to the request
|
|
144
|
-
|
|
145
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
146
|
-
"""
|
|
147
|
-
if not eval_id:
|
|
148
|
-
raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
|
|
149
|
-
return self._get(
|
|
150
|
-
f"/evaluate/{eval_id}",
|
|
151
|
-
options=make_request_options(
|
|
152
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
153
|
-
),
|
|
154
|
-
cast_to=Evaluation,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
class AsyncEvaluateResource(AsyncAPIResource):
|
|
159
|
-
@cached_property
|
|
160
|
-
def with_raw_response(self) -> AsyncEvaluateResourceWithRawResponse:
|
|
161
|
-
"""
|
|
162
|
-
This property can be used as a prefix for any HTTP method call to return
|
|
163
|
-
the raw response object instead of the parsed content.
|
|
164
|
-
|
|
165
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
|
|
166
|
-
"""
|
|
167
|
-
return AsyncEvaluateResourceWithRawResponse(self)
|
|
168
|
-
|
|
169
|
-
@cached_property
|
|
170
|
-
def with_streaming_response(self) -> AsyncEvaluateResourceWithStreamingResponse:
|
|
171
|
-
"""
|
|
172
|
-
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
173
|
-
|
|
174
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
|
|
175
|
-
"""
|
|
176
|
-
return AsyncEvaluateResourceWithStreamingResponse(self)
|
|
177
|
-
|
|
178
|
-
async def create(
|
|
179
|
-
self,
|
|
180
|
-
*,
|
|
181
|
-
model_input: evaluate_create_params.ModelInput,
|
|
182
|
-
model_output: str,
|
|
183
|
-
run_mode: Literal["precision_plus", "precision", "smart", "economy"],
|
|
184
|
-
guardrail_metrics: List[
|
|
185
|
-
Literal[
|
|
186
|
-
"correctness",
|
|
187
|
-
"completeness",
|
|
188
|
-
"instruction_adherence",
|
|
189
|
-
"context_adherence",
|
|
190
|
-
"ground_truth_adherence",
|
|
191
|
-
"comprehensive_safety",
|
|
192
|
-
]
|
|
193
|
-
]
|
|
194
|
-
| Omit = omit,
|
|
195
|
-
model_used: str | Omit = omit,
|
|
196
|
-
nametag: str | Omit = omit,
|
|
197
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
198
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
199
|
-
extra_headers: Headers | None = None,
|
|
200
|
-
extra_query: Query | None = None,
|
|
201
|
-
extra_body: Body | None = None,
|
|
202
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
203
|
-
) -> Evaluation:
|
|
204
|
-
"""
|
|
205
|
-
Use this endpoint to evaluate a model's input and output pair against selected
|
|
206
|
-
guardrail metrics
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
210
|
-
contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
211
|
-
ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
212
|
-
|
|
213
|
-
model_output: Output generated by the LLM to be evaluated.
|
|
214
|
-
|
|
215
|
-
run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
216
|
-
accuracy, and cost by determining which models are used to evaluate the event.
|
|
217
|
-
Available run modes include `precision_plus`, `precision`, `smart`, and
|
|
218
|
-
`economy`. Defaults to `smart`.
|
|
219
|
-
|
|
220
|
-
guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
|
|
221
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
222
|
-
guardrail metrics.
|
|
223
|
-
|
|
224
|
-
model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
225
|
-
|
|
226
|
-
nametag: An optional, user-defined tag for the evaluation.
|
|
227
|
-
|
|
228
|
-
extra_headers: Send extra headers
|
|
229
|
-
|
|
230
|
-
extra_query: Add additional query parameters to the request
|
|
231
|
-
|
|
232
|
-
extra_body: Add additional JSON properties to the request
|
|
233
|
-
|
|
234
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
235
|
-
"""
|
|
236
|
-
return await self._post(
|
|
237
|
-
"/evaluate",
|
|
238
|
-
body=await async_maybe_transform(
|
|
239
|
-
{
|
|
240
|
-
"model_input": model_input,
|
|
241
|
-
"model_output": model_output,
|
|
242
|
-
"run_mode": run_mode,
|
|
243
|
-
"guardrail_metrics": guardrail_metrics,
|
|
244
|
-
"model_used": model_used,
|
|
245
|
-
"nametag": nametag,
|
|
246
|
-
},
|
|
247
|
-
evaluate_create_params.EvaluateCreateParams,
|
|
248
|
-
),
|
|
249
|
-
options=make_request_options(
|
|
250
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
251
|
-
),
|
|
252
|
-
cast_to=Evaluation,
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
async def retrieve(
|
|
256
|
-
self,
|
|
257
|
-
eval_id: str,
|
|
258
|
-
*,
|
|
259
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
260
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
261
|
-
extra_headers: Headers | None = None,
|
|
262
|
-
extra_query: Query | None = None,
|
|
263
|
-
extra_body: Body | None = None,
|
|
264
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
265
|
-
) -> Evaluation:
|
|
266
|
-
"""
|
|
267
|
-
Use this endpoint to retrieve the evaluation record for a given evaluation ID
|
|
268
|
-
|
|
269
|
-
Args:
|
|
270
|
-
extra_headers: Send extra headers
|
|
271
|
-
|
|
272
|
-
extra_query: Add additional query parameters to the request
|
|
273
|
-
|
|
274
|
-
extra_body: Add additional JSON properties to the request
|
|
275
|
-
|
|
276
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
277
|
-
"""
|
|
278
|
-
if not eval_id:
|
|
279
|
-
raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
|
|
280
|
-
return await self._get(
|
|
281
|
-
f"/evaluate/{eval_id}",
|
|
282
|
-
options=make_request_options(
|
|
283
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
284
|
-
),
|
|
285
|
-
cast_to=Evaluation,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
class EvaluateResourceWithRawResponse:
|
|
290
|
-
def __init__(self, evaluate: EvaluateResource) -> None:
|
|
291
|
-
self._evaluate = evaluate
|
|
292
|
-
|
|
293
|
-
self.create = to_raw_response_wrapper(
|
|
294
|
-
evaluate.create,
|
|
295
|
-
)
|
|
296
|
-
self.retrieve = to_raw_response_wrapper(
|
|
297
|
-
evaluate.retrieve,
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
class AsyncEvaluateResourceWithRawResponse:
|
|
302
|
-
def __init__(self, evaluate: AsyncEvaluateResource) -> None:
|
|
303
|
-
self._evaluate = evaluate
|
|
304
|
-
|
|
305
|
-
self.create = async_to_raw_response_wrapper(
|
|
306
|
-
evaluate.create,
|
|
307
|
-
)
|
|
308
|
-
self.retrieve = async_to_raw_response_wrapper(
|
|
309
|
-
evaluate.retrieve,
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
class EvaluateResourceWithStreamingResponse:
|
|
314
|
-
def __init__(self, evaluate: EvaluateResource) -> None:
|
|
315
|
-
self._evaluate = evaluate
|
|
316
|
-
|
|
317
|
-
self.create = to_streamed_response_wrapper(
|
|
318
|
-
evaluate.create,
|
|
319
|
-
)
|
|
320
|
-
self.retrieve = to_streamed_response_wrapper(
|
|
321
|
-
evaluate.retrieve,
|
|
322
|
-
)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
class AsyncEvaluateResourceWithStreamingResponse:
|
|
326
|
-
def __init__(self, evaluate: AsyncEvaluateResource) -> None:
|
|
327
|
-
self._evaluate = evaluate
|
|
328
|
-
|
|
329
|
-
self.create = async_to_streamed_response_wrapper(
|
|
330
|
-
evaluate.create,
|
|
331
|
-
)
|
|
332
|
-
self.retrieve = async_to_streamed_response_wrapper(
|
|
333
|
-
evaluate.retrieve,
|
|
334
|
-
)
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import List
|
|
6
|
-
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
-
|
|
8
|
-
__all__ = ["EvaluateCreateParams", "ModelInput"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class EvaluateCreateParams(TypedDict, total=False):
|
|
12
|
-
model_input: Required[ModelInput]
|
|
13
|
-
"""A dictionary of inputs sent to the LLM to generate output.
|
|
14
|
-
|
|
15
|
-
The dictionary must contain at least a `user_prompt` field or a `system_prompt`
|
|
16
|
-
field. For ground_truth_adherence guardrail metric, `ground_truth` should be
|
|
17
|
-
provided.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
model_output: Required[str]
|
|
21
|
-
"""Output generated by the LLM to be evaluated."""
|
|
22
|
-
|
|
23
|
-
run_mode: Required[Literal["precision_plus", "precision", "smart", "economy"]]
|
|
24
|
-
"""Run mode for the evaluation.
|
|
25
|
-
|
|
26
|
-
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
27
|
-
determining which models are used to evaluate the event. Available run modes
|
|
28
|
-
include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
|
|
29
|
-
`smart`.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
guardrail_metrics: List[
|
|
33
|
-
Literal[
|
|
34
|
-
"correctness",
|
|
35
|
-
"completeness",
|
|
36
|
-
"instruction_adherence",
|
|
37
|
-
"context_adherence",
|
|
38
|
-
"ground_truth_adherence",
|
|
39
|
-
"comprehensive_safety",
|
|
40
|
-
]
|
|
41
|
-
]
|
|
42
|
-
"""
|
|
43
|
-
An array of guardrail metrics that the model input and output pair will be
|
|
44
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
45
|
-
guardrail metrics.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
model_used: str
|
|
49
|
-
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
50
|
-
|
|
51
|
-
nametag: str
|
|
52
|
-
"""An optional, user-defined tag for the evaluation."""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class ModelInput(TypedDict, total=False):
|
|
56
|
-
ground_truth: str
|
|
57
|
-
"""The ground truth for evaluating Ground Truth Adherence guardrail."""
|
|
58
|
-
|
|
59
|
-
system_prompt: str
|
|
60
|
-
"""The system prompt used to generate the output."""
|
|
61
|
-
|
|
62
|
-
user_prompt: str
|
|
63
|
-
"""The user prompt used to generate the output."""
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from typing import List, Optional
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing_extensions import Literal
|
|
6
|
-
|
|
7
|
-
from .._models import BaseModel
|
|
8
|
-
from .evaluation import Evaluation
|
|
9
|
-
|
|
10
|
-
__all__ = ["MonitorDetailResponse", "Stats"]
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class Stats(BaseModel):
|
|
14
|
-
completed_evaluations: Optional[int] = None
|
|
15
|
-
"""Number of evaluations that completed successfully."""
|
|
16
|
-
|
|
17
|
-
failed_evaluations: Optional[int] = None
|
|
18
|
-
"""Number of evaluations that failed."""
|
|
19
|
-
|
|
20
|
-
in_progress_evaluations: Optional[int] = None
|
|
21
|
-
"""Number of evaluations currently in progress."""
|
|
22
|
-
|
|
23
|
-
queued_evaluations: Optional[int] = None
|
|
24
|
-
"""Number of evaluations currently queued."""
|
|
25
|
-
|
|
26
|
-
total_evaluations: Optional[int] = None
|
|
27
|
-
"""Total number of evaluations performed by this monitor."""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class MonitorDetailResponse(BaseModel):
|
|
31
|
-
monitor_id: str
|
|
32
|
-
"""A unique monitor ID."""
|
|
33
|
-
|
|
34
|
-
monitor_status: Literal["active", "inactive"]
|
|
35
|
-
"""Status of the monitor.
|
|
36
|
-
|
|
37
|
-
Can be `active` or `inactive`. Inactive monitors no longer record and evaluate
|
|
38
|
-
events.
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
name: str
|
|
42
|
-
"""Name of this monitor."""
|
|
43
|
-
|
|
44
|
-
created_at: Optional[datetime] = None
|
|
45
|
-
"""The time the monitor was created in UTC."""
|
|
46
|
-
|
|
47
|
-
description: Optional[str] = None
|
|
48
|
-
"""Description of this monitor."""
|
|
49
|
-
|
|
50
|
-
evaluations: Optional[List[Evaluation]] = None
|
|
51
|
-
"""An array of all evaluations performed by this monitor.
|
|
52
|
-
|
|
53
|
-
Each one corresponds to a separate monitor event.
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
stats: Optional[Stats] = None
|
|
57
|
-
"""
|
|
58
|
-
Contains five fields used for stats of this monitor: total evaluations,
|
|
59
|
-
completed evaluations, failed evaluations, queued evaluations, and in progress
|
|
60
|
-
evaluations.
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
updated_at: Optional[datetime] = None
|
|
64
|
-
"""The most recent time the monitor was modified in UTC."""
|
|
65
|
-
|
|
66
|
-
user_id: Optional[str] = None
|
|
67
|
-
"""User ID of the user who created the monitor."""
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
from typing import Any, cast
|
|
7
|
-
|
|
8
|
-
import pytest
|
|
9
|
-
|
|
10
|
-
from deeprails import Deeprails, AsyncDeeprails
|
|
11
|
-
from tests.utils import assert_matches_type
|
|
12
|
-
from deeprails.types import Evaluation
|
|
13
|
-
|
|
14
|
-
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class TestEvaluate:
|
|
18
|
-
parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
|
|
19
|
-
|
|
20
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
21
|
-
@parametrize
|
|
22
|
-
def test_method_create(self, client: Deeprails) -> None:
|
|
23
|
-
evaluate = client.evaluate.create(
|
|
24
|
-
model_input={},
|
|
25
|
-
model_output="model_output",
|
|
26
|
-
run_mode="precision_plus",
|
|
27
|
-
)
|
|
28
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
29
|
-
|
|
30
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
31
|
-
@parametrize
|
|
32
|
-
def test_method_create_with_all_params(self, client: Deeprails) -> None:
|
|
33
|
-
evaluate = client.evaluate.create(
|
|
34
|
-
model_input={
|
|
35
|
-
"ground_truth": "ground_truth",
|
|
36
|
-
"system_prompt": "system_prompt",
|
|
37
|
-
"user_prompt": "user_prompt",
|
|
38
|
-
},
|
|
39
|
-
model_output="model_output",
|
|
40
|
-
run_mode="precision_plus",
|
|
41
|
-
guardrail_metrics=["correctness"],
|
|
42
|
-
model_used="model_used",
|
|
43
|
-
nametag="nametag",
|
|
44
|
-
)
|
|
45
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
46
|
-
|
|
47
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
48
|
-
@parametrize
|
|
49
|
-
def test_raw_response_create(self, client: Deeprails) -> None:
|
|
50
|
-
response = client.evaluate.with_raw_response.create(
|
|
51
|
-
model_input={},
|
|
52
|
-
model_output="model_output",
|
|
53
|
-
run_mode="precision_plus",
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
assert response.is_closed is True
|
|
57
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
58
|
-
evaluate = response.parse()
|
|
59
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
60
|
-
|
|
61
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
62
|
-
@parametrize
|
|
63
|
-
def test_streaming_response_create(self, client: Deeprails) -> None:
|
|
64
|
-
with client.evaluate.with_streaming_response.create(
|
|
65
|
-
model_input={},
|
|
66
|
-
model_output="model_output",
|
|
67
|
-
run_mode="precision_plus",
|
|
68
|
-
) as response:
|
|
69
|
-
assert not response.is_closed
|
|
70
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
71
|
-
|
|
72
|
-
evaluate = response.parse()
|
|
73
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
74
|
-
|
|
75
|
-
assert cast(Any, response.is_closed) is True
|
|
76
|
-
|
|
77
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
78
|
-
@parametrize
|
|
79
|
-
def test_method_retrieve(self, client: Deeprails) -> None:
|
|
80
|
-
evaluate = client.evaluate.retrieve(
|
|
81
|
-
"eval_id",
|
|
82
|
-
)
|
|
83
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
84
|
-
|
|
85
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
86
|
-
@parametrize
|
|
87
|
-
def test_raw_response_retrieve(self, client: Deeprails) -> None:
|
|
88
|
-
response = client.evaluate.with_raw_response.retrieve(
|
|
89
|
-
"eval_id",
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
assert response.is_closed is True
|
|
93
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
94
|
-
evaluate = response.parse()
|
|
95
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
96
|
-
|
|
97
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
98
|
-
@parametrize
|
|
99
|
-
def test_streaming_response_retrieve(self, client: Deeprails) -> None:
|
|
100
|
-
with client.evaluate.with_streaming_response.retrieve(
|
|
101
|
-
"eval_id",
|
|
102
|
-
) as response:
|
|
103
|
-
assert not response.is_closed
|
|
104
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
105
|
-
|
|
106
|
-
evaluate = response.parse()
|
|
107
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
108
|
-
|
|
109
|
-
assert cast(Any, response.is_closed) is True
|
|
110
|
-
|
|
111
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
112
|
-
@parametrize
|
|
113
|
-
def test_path_params_retrieve(self, client: Deeprails) -> None:
|
|
114
|
-
with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
|
|
115
|
-
client.evaluate.with_raw_response.retrieve(
|
|
116
|
-
"",
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
class TestAsyncEvaluate:
|
|
121
|
-
parametrize = pytest.mark.parametrize(
|
|
122
|
-
"async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
126
|
-
@parametrize
|
|
127
|
-
async def test_method_create(self, async_client: AsyncDeeprails) -> None:
|
|
128
|
-
evaluate = await async_client.evaluate.create(
|
|
129
|
-
model_input={},
|
|
130
|
-
model_output="model_output",
|
|
131
|
-
run_mode="precision_plus",
|
|
132
|
-
)
|
|
133
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
134
|
-
|
|
135
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
136
|
-
@parametrize
|
|
137
|
-
async def test_method_create_with_all_params(self, async_client: AsyncDeeprails) -> None:
|
|
138
|
-
evaluate = await async_client.evaluate.create(
|
|
139
|
-
model_input={
|
|
140
|
-
"ground_truth": "ground_truth",
|
|
141
|
-
"system_prompt": "system_prompt",
|
|
142
|
-
"user_prompt": "user_prompt",
|
|
143
|
-
},
|
|
144
|
-
model_output="model_output",
|
|
145
|
-
run_mode="precision_plus",
|
|
146
|
-
guardrail_metrics=["correctness"],
|
|
147
|
-
model_used="model_used",
|
|
148
|
-
nametag="nametag",
|
|
149
|
-
)
|
|
150
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
151
|
-
|
|
152
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
153
|
-
@parametrize
|
|
154
|
-
async def test_raw_response_create(self, async_client: AsyncDeeprails) -> None:
|
|
155
|
-
response = await async_client.evaluate.with_raw_response.create(
|
|
156
|
-
model_input={},
|
|
157
|
-
model_output="model_output",
|
|
158
|
-
run_mode="precision_plus",
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
assert response.is_closed is True
|
|
162
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
163
|
-
evaluate = await response.parse()
|
|
164
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
165
|
-
|
|
166
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
167
|
-
@parametrize
|
|
168
|
-
async def test_streaming_response_create(self, async_client: AsyncDeeprails) -> None:
|
|
169
|
-
async with async_client.evaluate.with_streaming_response.create(
|
|
170
|
-
model_input={},
|
|
171
|
-
model_output="model_output",
|
|
172
|
-
run_mode="precision_plus",
|
|
173
|
-
) as response:
|
|
174
|
-
assert not response.is_closed
|
|
175
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
176
|
-
|
|
177
|
-
evaluate = await response.parse()
|
|
178
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
179
|
-
|
|
180
|
-
assert cast(Any, response.is_closed) is True
|
|
181
|
-
|
|
182
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
183
|
-
@parametrize
|
|
184
|
-
async def test_method_retrieve(self, async_client: AsyncDeeprails) -> None:
|
|
185
|
-
evaluate = await async_client.evaluate.retrieve(
|
|
186
|
-
"eval_id",
|
|
187
|
-
)
|
|
188
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
189
|
-
|
|
190
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
191
|
-
@parametrize
|
|
192
|
-
async def test_raw_response_retrieve(self, async_client: AsyncDeeprails) -> None:
|
|
193
|
-
response = await async_client.evaluate.with_raw_response.retrieve(
|
|
194
|
-
"eval_id",
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
assert response.is_closed is True
|
|
198
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
199
|
-
evaluate = await response.parse()
|
|
200
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
201
|
-
|
|
202
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
203
|
-
@parametrize
|
|
204
|
-
async def test_streaming_response_retrieve(self, async_client: AsyncDeeprails) -> None:
|
|
205
|
-
async with async_client.evaluate.with_streaming_response.retrieve(
|
|
206
|
-
"eval_id",
|
|
207
|
-
) as response:
|
|
208
|
-
assert not response.is_closed
|
|
209
|
-
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
|
|
210
|
-
|
|
211
|
-
evaluate = await response.parse()
|
|
212
|
-
assert_matches_type(Evaluation, evaluate, path=["response"])
|
|
213
|
-
|
|
214
|
-
assert cast(Any, response.is_closed) is True
|
|
215
|
-
|
|
216
|
-
@pytest.mark.skip(reason="Prism tests are disabled")
|
|
217
|
-
@parametrize
|
|
218
|
-
async def test_path_params_retrieve(self, async_client: AsyncDeeprails) -> None:
|
|
219
|
-
with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
|
|
220
|
-
await async_client.evaluate.with_raw_response.retrieve(
|
|
221
|
-
"",
|
|
222
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|