deeprails 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeprails/_client.py +1 -9
- deeprails/_version.py +1 -1
- deeprails/resources/__init__.py +0 -14
- deeprails/types/__init__.py +0 -2
- deeprails/types/monitor_detail_response.py +98 -3
- {deeprails-1.9.0.dist-info → deeprails-1.10.0.dist-info}/METADATA +1 -1
- {deeprails-1.9.0.dist-info → deeprails-1.10.0.dist-info}/RECORD +9 -12
- deeprails/resources/evaluate.py +0 -334
- deeprails/types/evaluate_create_params.py +0 -63
- deeprails/types/evaluation.py +0 -105
- {deeprails-1.9.0.dist-info → deeprails-1.10.0.dist-info}/WHEEL +0 -0
- {deeprails-1.9.0.dist-info → deeprails-1.10.0.dist-info}/licenses/LICENSE +0 -0
deeprails/_client.py
CHANGED
|
@@ -21,7 +21,7 @@ from ._types import (
|
|
|
21
21
|
)
|
|
22
22
|
from ._utils import is_given, get_async_library
|
|
23
23
|
from ._version import __version__
|
|
24
|
-
from .resources import defend, monitor
|
|
24
|
+
from .resources import defend, monitor
|
|
25
25
|
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
|
|
26
26
|
from ._exceptions import APIStatusError, DeeprailsError
|
|
27
27
|
from ._base_client import (
|
|
@@ -45,7 +45,6 @@ __all__ = [
|
|
|
45
45
|
class Deeprails(SyncAPIClient):
|
|
46
46
|
defend: defend.DefendResource
|
|
47
47
|
monitor: monitor.MonitorResource
|
|
48
|
-
evaluate: evaluate.EvaluateResource
|
|
49
48
|
with_raw_response: DeeprailsWithRawResponse
|
|
50
49
|
with_streaming_response: DeeprailsWithStreamedResponse
|
|
51
50
|
|
|
@@ -105,7 +104,6 @@ class Deeprails(SyncAPIClient):
|
|
|
105
104
|
|
|
106
105
|
self.defend = defend.DefendResource(self)
|
|
107
106
|
self.monitor = monitor.MonitorResource(self)
|
|
108
|
-
self.evaluate = evaluate.EvaluateResource(self)
|
|
109
107
|
self.with_raw_response = DeeprailsWithRawResponse(self)
|
|
110
108
|
self.with_streaming_response = DeeprailsWithStreamedResponse(self)
|
|
111
109
|
|
|
@@ -217,7 +215,6 @@ class Deeprails(SyncAPIClient):
|
|
|
217
215
|
class AsyncDeeprails(AsyncAPIClient):
|
|
218
216
|
defend: defend.AsyncDefendResource
|
|
219
217
|
monitor: monitor.AsyncMonitorResource
|
|
220
|
-
evaluate: evaluate.AsyncEvaluateResource
|
|
221
218
|
with_raw_response: AsyncDeeprailsWithRawResponse
|
|
222
219
|
with_streaming_response: AsyncDeeprailsWithStreamedResponse
|
|
223
220
|
|
|
@@ -277,7 +274,6 @@ class AsyncDeeprails(AsyncAPIClient):
|
|
|
277
274
|
|
|
278
275
|
self.defend = defend.AsyncDefendResource(self)
|
|
279
276
|
self.monitor = monitor.AsyncMonitorResource(self)
|
|
280
|
-
self.evaluate = evaluate.AsyncEvaluateResource(self)
|
|
281
277
|
self.with_raw_response = AsyncDeeprailsWithRawResponse(self)
|
|
282
278
|
self.with_streaming_response = AsyncDeeprailsWithStreamedResponse(self)
|
|
283
279
|
|
|
@@ -390,28 +386,24 @@ class DeeprailsWithRawResponse:
|
|
|
390
386
|
def __init__(self, client: Deeprails) -> None:
|
|
391
387
|
self.defend = defend.DefendResourceWithRawResponse(client.defend)
|
|
392
388
|
self.monitor = monitor.MonitorResourceWithRawResponse(client.monitor)
|
|
393
|
-
self.evaluate = evaluate.EvaluateResourceWithRawResponse(client.evaluate)
|
|
394
389
|
|
|
395
390
|
|
|
396
391
|
class AsyncDeeprailsWithRawResponse:
|
|
397
392
|
def __init__(self, client: AsyncDeeprails) -> None:
|
|
398
393
|
self.defend = defend.AsyncDefendResourceWithRawResponse(client.defend)
|
|
399
394
|
self.monitor = monitor.AsyncMonitorResourceWithRawResponse(client.monitor)
|
|
400
|
-
self.evaluate = evaluate.AsyncEvaluateResourceWithRawResponse(client.evaluate)
|
|
401
395
|
|
|
402
396
|
|
|
403
397
|
class DeeprailsWithStreamedResponse:
|
|
404
398
|
def __init__(self, client: Deeprails) -> None:
|
|
405
399
|
self.defend = defend.DefendResourceWithStreamingResponse(client.defend)
|
|
406
400
|
self.monitor = monitor.MonitorResourceWithStreamingResponse(client.monitor)
|
|
407
|
-
self.evaluate = evaluate.EvaluateResourceWithStreamingResponse(client.evaluate)
|
|
408
401
|
|
|
409
402
|
|
|
410
403
|
class AsyncDeeprailsWithStreamedResponse:
|
|
411
404
|
def __init__(self, client: AsyncDeeprails) -> None:
|
|
412
405
|
self.defend = defend.AsyncDefendResourceWithStreamingResponse(client.defend)
|
|
413
406
|
self.monitor = monitor.AsyncMonitorResourceWithStreamingResponse(client.monitor)
|
|
414
|
-
self.evaluate = evaluate.AsyncEvaluateResourceWithStreamingResponse(client.evaluate)
|
|
415
407
|
|
|
416
408
|
|
|
417
409
|
Client = Deeprails
|
deeprails/_version.py
CHANGED
deeprails/resources/__init__.py
CHANGED
|
@@ -16,14 +16,6 @@ from .monitor import (
|
|
|
16
16
|
MonitorResourceWithStreamingResponse,
|
|
17
17
|
AsyncMonitorResourceWithStreamingResponse,
|
|
18
18
|
)
|
|
19
|
-
from .evaluate import (
|
|
20
|
-
EvaluateResource,
|
|
21
|
-
AsyncEvaluateResource,
|
|
22
|
-
EvaluateResourceWithRawResponse,
|
|
23
|
-
AsyncEvaluateResourceWithRawResponse,
|
|
24
|
-
EvaluateResourceWithStreamingResponse,
|
|
25
|
-
AsyncEvaluateResourceWithStreamingResponse,
|
|
26
|
-
)
|
|
27
19
|
|
|
28
20
|
__all__ = [
|
|
29
21
|
"DefendResource",
|
|
@@ -38,10 +30,4 @@ __all__ = [
|
|
|
38
30
|
"AsyncMonitorResourceWithRawResponse",
|
|
39
31
|
"MonitorResourceWithStreamingResponse",
|
|
40
32
|
"AsyncMonitorResourceWithStreamingResponse",
|
|
41
|
-
"EvaluateResource",
|
|
42
|
-
"AsyncEvaluateResource",
|
|
43
|
-
"EvaluateResourceWithRawResponse",
|
|
44
|
-
"AsyncEvaluateResourceWithRawResponse",
|
|
45
|
-
"EvaluateResourceWithStreamingResponse",
|
|
46
|
-
"AsyncEvaluateResourceWithStreamingResponse",
|
|
47
33
|
]
|
deeprails/types/__init__.py
CHANGED
|
@@ -2,12 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from .evaluation import Evaluation as Evaluation
|
|
6
5
|
from .defend_response import DefendResponse as DefendResponse
|
|
7
6
|
from .monitor_response import MonitorResponse as MonitorResponse
|
|
8
7
|
from .monitor_create_params import MonitorCreateParams as MonitorCreateParams
|
|
9
8
|
from .monitor_update_params import MonitorUpdateParams as MonitorUpdateParams
|
|
10
|
-
from .evaluate_create_params import EvaluateCreateParams as EvaluateCreateParams
|
|
11
9
|
from .monitor_event_response import MonitorEventResponse as MonitorEventResponse
|
|
12
10
|
from .monitor_detail_response import MonitorDetailResponse as MonitorDetailResponse
|
|
13
11
|
from .monitor_retrieve_params import MonitorRetrieveParams as MonitorRetrieveParams
|
|
@@ -1,13 +1,108 @@
|
|
|
1
1
|
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing_extensions import Literal
|
|
6
6
|
|
|
7
|
+
from pydantic import Field as FieldInfo
|
|
8
|
+
|
|
7
9
|
from .._models import BaseModel
|
|
8
|
-
from .evaluation import Evaluation
|
|
9
10
|
|
|
10
|
-
__all__ = ["MonitorDetailResponse", "Stats"]
|
|
11
|
+
__all__ = ["MonitorDetailResponse", "Evaluation", "EvaluationModelInput", "Stats"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EvaluationModelInput(BaseModel):
|
|
15
|
+
ground_truth: Optional[str] = None
|
|
16
|
+
"""The ground truth for evaluating Ground Truth Adherence guardrail."""
|
|
17
|
+
|
|
18
|
+
system_prompt: Optional[str] = None
|
|
19
|
+
"""The system prompt used to generate the output."""
|
|
20
|
+
|
|
21
|
+
user_prompt: Optional[str] = None
|
|
22
|
+
"""The user prompt used to generate the output."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Evaluation(BaseModel):
|
|
26
|
+
eval_id: str
|
|
27
|
+
"""A unique evaluation ID."""
|
|
28
|
+
|
|
29
|
+
evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
|
|
30
|
+
"""Status of the evaluation."""
|
|
31
|
+
|
|
32
|
+
api_model_input: EvaluationModelInput = FieldInfo(alias="model_input")
|
|
33
|
+
"""A dictionary of inputs sent to the LLM to generate output.
|
|
34
|
+
|
|
35
|
+
The dictionary must contain at least a `user_prompt` field or a `system_prompt`
|
|
36
|
+
field. For ground_truth_adherence guardrail metric, `ground_truth` should be
|
|
37
|
+
provided.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
api_model_output: str = FieldInfo(alias="model_output")
|
|
41
|
+
"""Output generated by the LLM to be evaluated."""
|
|
42
|
+
|
|
43
|
+
run_mode: Literal["precision_plus", "precision", "smart", "economy"]
|
|
44
|
+
"""Run mode for the evaluation.
|
|
45
|
+
|
|
46
|
+
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
47
|
+
determining which models are used to evaluate the event.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
created_at: Optional[datetime] = None
|
|
51
|
+
"""The time the evaluation was created in UTC."""
|
|
52
|
+
|
|
53
|
+
end_timestamp: Optional[datetime] = None
|
|
54
|
+
"""The time the evaluation completed in UTC."""
|
|
55
|
+
|
|
56
|
+
error_message: Optional[str] = None
|
|
57
|
+
"""Description of the error causing the evaluation to fail, if any."""
|
|
58
|
+
|
|
59
|
+
error_timestamp: Optional[datetime] = None
|
|
60
|
+
"""The time the error causing the evaluation to fail was recorded."""
|
|
61
|
+
|
|
62
|
+
evaluation_result: Optional[Dict[str, object]] = None
|
|
63
|
+
"""
|
|
64
|
+
Evaluation result consisting of average scores and rationales for each of the
|
|
65
|
+
evaluated guardrail metrics.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
evaluation_total_cost: Optional[float] = None
|
|
69
|
+
"""Total cost of the evaluation."""
|
|
70
|
+
|
|
71
|
+
guardrail_metrics: Optional[
|
|
72
|
+
List[
|
|
73
|
+
Literal[
|
|
74
|
+
"correctness",
|
|
75
|
+
"completeness",
|
|
76
|
+
"instruction_adherence",
|
|
77
|
+
"context_adherence",
|
|
78
|
+
"ground_truth_adherence",
|
|
79
|
+
"comprehensive_safety",
|
|
80
|
+
]
|
|
81
|
+
]
|
|
82
|
+
] = None
|
|
83
|
+
"""
|
|
84
|
+
An array of guardrail metrics that the model input and output pair will be
|
|
85
|
+
evaluated on.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
api_model_used: Optional[str] = FieldInfo(alias="model_used", default=None)
|
|
89
|
+
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
90
|
+
|
|
91
|
+
modified_at: Optional[datetime] = None
|
|
92
|
+
"""The most recent time the evaluation was modified in UTC."""
|
|
93
|
+
|
|
94
|
+
nametag: Optional[str] = None
|
|
95
|
+
"""An optional, user-defined tag for the evaluation."""
|
|
96
|
+
|
|
97
|
+
progress: Optional[int] = None
|
|
98
|
+
"""Evaluation progress.
|
|
99
|
+
|
|
100
|
+
Values range between 0 and 100; 100 corresponds to a completed
|
|
101
|
+
`evaluation_status`.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
start_timestamp: Optional[datetime] = None
|
|
105
|
+
"""The time the evaluation started in UTC."""
|
|
11
106
|
|
|
12
107
|
|
|
13
108
|
class Stats(BaseModel):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
deeprails/__init__.py,sha256=Rz38DnPkRJzvTJjgsLPjYyEId-pJndorgdE4Yhcq0J4,2692
|
|
2
2
|
deeprails/_base_client.py,sha256=3bTH_cn_59gEjfZdcRS6g6Er4ZobAgTCRsnQmD_viNQ,67050
|
|
3
|
-
deeprails/_client.py,sha256=
|
|
3
|
+
deeprails/_client.py,sha256=BzJOTEiC2RszdX3GCBEk_R5k6e48rzP-XlbCN1Cplr4,15563
|
|
4
4
|
deeprails/_compat.py,sha256=DQBVORjFb33zch24jzkhM14msvnzY7mmSmgDLaVFUM8,6562
|
|
5
5
|
deeprails/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
|
|
6
6
|
deeprails/_exceptions.py,sha256=N99bKrZNjDzgEDrvdw99WO-zpuPeZySaUUKNLEUz8mU,3226
|
|
@@ -11,7 +11,7 @@ deeprails/_resource.py,sha256=7RXX5KZr4j0TIE66vnduHp7p9Yf9X0FyDDECuvRHARg,1118
|
|
|
11
11
|
deeprails/_response.py,sha256=yj0HJDU91WPpiczwi6CBOLAl_bqf4I_I96vWMAwx6Fg,28806
|
|
12
12
|
deeprails/_streaming.py,sha256=hCp5bK9dyw2TyrVL69m-6qGC-QtGYwhXmFzITCWPgAs,10112
|
|
13
13
|
deeprails/_types.py,sha256=XR3mad9NsGqZsjrd1VVJ657-4O4kwyw9Qzg4M3i6Vh0,7239
|
|
14
|
-
deeprails/_version.py,sha256=
|
|
14
|
+
deeprails/_version.py,sha256=dE6ROKmhSXt4B73GxgSswrtaxSRJBdQihEVLUUaQ-M0,162
|
|
15
15
|
deeprails/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
deeprails/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
|
|
17
17
|
deeprails/_utils/_compat.py,sha256=D8gtAvjJQrDWt9upS0XaG9Rr5l1QhiAx_I_1utT_tt0,1195
|
|
@@ -26,26 +26,23 @@ deeprails/_utils/_transform.py,sha256=NjCzmnfqYrsAikUHQig6N9QfuTVbKipuP3ur9mcNF-
|
|
|
26
26
|
deeprails/_utils/_typing.py,sha256=N_5PPuFNsaygbtA_npZd98SVN1LQQvFTKL6bkWPBZGU,4786
|
|
27
27
|
deeprails/_utils/_utils.py,sha256=0dDqauUbVZEXV0NVl7Bwu904Wwo5eyFCZpQThhFNhyA,12253
|
|
28
28
|
deeprails/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
|
29
|
-
deeprails/resources/__init__.py,sha256=
|
|
29
|
+
deeprails/resources/__init__.py,sha256=JBzEBVMLg2n1w2QJGYCsyjPHY6zHZ0G3xaWuuFoa5Pg,1015
|
|
30
30
|
deeprails/resources/defend.py,sha256=e6NzUJziYx__FkGSVNeaqzA8sZU0L5dY5Nob1IPCxc4,28186
|
|
31
|
-
deeprails/resources/evaluate.py,sha256=6SJ3iOH3uYPz3k3MS_G_P75zgSu4bBIurazHQ8byEl0,13025
|
|
32
31
|
deeprails/resources/monitor.py,sha256=bW7HCn8lDpWqaPd11J_tVnGSduWTX3sfXlDYYycvk8k,22524
|
|
33
|
-
deeprails/types/__init__.py,sha256
|
|
32
|
+
deeprails/types/__init__.py,sha256=PlF7idbYiAj3xyJYstHDaBer-rLUHe6eUEfRfJKisvk,1124
|
|
34
33
|
deeprails/types/defend_create_workflow_params.py,sha256=AmyoU1z3a0U39pqVtVnFsM86ysK1nYIx15Tolwb4PKA,2102
|
|
35
34
|
deeprails/types/defend_response.py,sha256=VoePIT9RKN795y-3ZvoFmzSTCetqkZZh6iQLYjJEFoY,1603
|
|
36
35
|
deeprails/types/defend_submit_event_params.py,sha256=yL_rLUGKlZjXHGbdi8h9ZItb4sICQ2POO_o0VbUtans,1539
|
|
37
36
|
deeprails/types/defend_update_workflow_params.py,sha256=QH2k7EDMLub3mW1lPV5SUoKDHW_T2arSo-RGHLterwo,373
|
|
38
|
-
deeprails/types/evaluate_create_params.py,sha256=GGdFXjYjyLD5O1fn4bwDwDKI0EwvciJNF-uYmvCUBwA,2001
|
|
39
|
-
deeprails/types/evaluation.py,sha256=3ajWcd7oYRtYZDOe0fVQHeVRN6-ymjA7F47VduwTfP4,3328
|
|
40
37
|
deeprails/types/monitor_create_params.py,sha256=kTSj-PhuzpT-HPDTQJemRWfd8w32foUMH9FQZj8symk,384
|
|
41
|
-
deeprails/types/monitor_detail_response.py,sha256=
|
|
38
|
+
deeprails/types/monitor_detail_response.py,sha256=s8dzFFaU9uI-GHCfUxCN9yLhpmq4LTu7CSww9z0SLvk,5028
|
|
42
39
|
deeprails/types/monitor_event_response.py,sha256=-cnugHD_3QeeZRMbo6aQBirqSPgKIKpaD2qNkgxCeCA,565
|
|
43
40
|
deeprails/types/monitor_response.py,sha256=LjnJVYniee1hgvZu8RT-9jX4xd0Ob_yvq4NBOxVn59c,950
|
|
44
41
|
deeprails/types/monitor_retrieve_params.py,sha256=PEsRmbd-81z4pJvhfi4JbrQWNzmeiLkoNsTUoPZ6kFY,352
|
|
45
42
|
deeprails/types/monitor_submit_event_params.py,sha256=YetTV8HzmDGNnSYoKZp8tv3u9L6oYn4tnvWOppTRfOQ,2164
|
|
46
43
|
deeprails/types/monitor_update_params.py,sha256=gJyFFxT_u_iWABknuKnLpPl9r-VPfCcGtOAmh6sPwUw,550
|
|
47
44
|
deeprails/types/workflow_event_response.py,sha256=mIzOCnYJg4TDSq_tG_0WfA0_Gmc9-0q-befyookfUFM,867
|
|
48
|
-
deeprails-1.
|
|
49
|
-
deeprails-1.
|
|
50
|
-
deeprails-1.
|
|
51
|
-
deeprails-1.
|
|
45
|
+
deeprails-1.10.0.dist-info/METADATA,sha256=oMlAwTn3V1FCFTw1ErsGTS_clu3q1dtpn3vQAssiJv0,12081
|
|
46
|
+
deeprails-1.10.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
47
|
+
deeprails-1.10.0.dist-info/licenses/LICENSE,sha256=rFTxPcYE516UQLju2SCY1r2pSDDfodL0-ZvxF_fgueg,11339
|
|
48
|
+
deeprails-1.10.0.dist-info/RECORD,,
|
deeprails/resources/evaluate.py
DELETED
|
@@ -1,334 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import List
|
|
6
|
-
from typing_extensions import Literal
|
|
7
|
-
|
|
8
|
-
import httpx
|
|
9
|
-
|
|
10
|
-
from ..types import evaluate_create_params
|
|
11
|
-
from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
|
|
12
|
-
from .._utils import maybe_transform, async_maybe_transform
|
|
13
|
-
from .._compat import cached_property
|
|
14
|
-
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
15
|
-
from .._response import (
|
|
16
|
-
to_raw_response_wrapper,
|
|
17
|
-
to_streamed_response_wrapper,
|
|
18
|
-
async_to_raw_response_wrapper,
|
|
19
|
-
async_to_streamed_response_wrapper,
|
|
20
|
-
)
|
|
21
|
-
from .._base_client import make_request_options
|
|
22
|
-
from ..types.evaluation import Evaluation
|
|
23
|
-
|
|
24
|
-
__all__ = ["EvaluateResource", "AsyncEvaluateResource"]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class EvaluateResource(SyncAPIResource):
|
|
28
|
-
@cached_property
|
|
29
|
-
def with_raw_response(self) -> EvaluateResourceWithRawResponse:
|
|
30
|
-
"""
|
|
31
|
-
This property can be used as a prefix for any HTTP method call to return
|
|
32
|
-
the raw response object instead of the parsed content.
|
|
33
|
-
|
|
34
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
|
|
35
|
-
"""
|
|
36
|
-
return EvaluateResourceWithRawResponse(self)
|
|
37
|
-
|
|
38
|
-
@cached_property
|
|
39
|
-
def with_streaming_response(self) -> EvaluateResourceWithStreamingResponse:
|
|
40
|
-
"""
|
|
41
|
-
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
42
|
-
|
|
43
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
|
|
44
|
-
"""
|
|
45
|
-
return EvaluateResourceWithStreamingResponse(self)
|
|
46
|
-
|
|
47
|
-
def create(
|
|
48
|
-
self,
|
|
49
|
-
*,
|
|
50
|
-
model_input: evaluate_create_params.ModelInput,
|
|
51
|
-
model_output: str,
|
|
52
|
-
run_mode: Literal["precision_plus", "precision", "smart", "economy"],
|
|
53
|
-
guardrail_metrics: List[
|
|
54
|
-
Literal[
|
|
55
|
-
"correctness",
|
|
56
|
-
"completeness",
|
|
57
|
-
"instruction_adherence",
|
|
58
|
-
"context_adherence",
|
|
59
|
-
"ground_truth_adherence",
|
|
60
|
-
"comprehensive_safety",
|
|
61
|
-
]
|
|
62
|
-
]
|
|
63
|
-
| Omit = omit,
|
|
64
|
-
model_used: str | Omit = omit,
|
|
65
|
-
nametag: str | Omit = omit,
|
|
66
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
67
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
68
|
-
extra_headers: Headers | None = None,
|
|
69
|
-
extra_query: Query | None = None,
|
|
70
|
-
extra_body: Body | None = None,
|
|
71
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
72
|
-
) -> Evaluation:
|
|
73
|
-
"""
|
|
74
|
-
Use this endpoint to evaluate a model's input and output pair against selected
|
|
75
|
-
guardrail metrics
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
79
|
-
contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
80
|
-
ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
81
|
-
|
|
82
|
-
model_output: Output generated by the LLM to be evaluated.
|
|
83
|
-
|
|
84
|
-
run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
85
|
-
accuracy, and cost by determining which models are used to evaluate the event.
|
|
86
|
-
Available run modes include `precision_plus`, `precision`, `smart`, and
|
|
87
|
-
`economy`. Defaults to `smart`.
|
|
88
|
-
|
|
89
|
-
guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
|
|
90
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
91
|
-
guardrail metrics.
|
|
92
|
-
|
|
93
|
-
model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
94
|
-
|
|
95
|
-
nametag: An optional, user-defined tag for the evaluation.
|
|
96
|
-
|
|
97
|
-
extra_headers: Send extra headers
|
|
98
|
-
|
|
99
|
-
extra_query: Add additional query parameters to the request
|
|
100
|
-
|
|
101
|
-
extra_body: Add additional JSON properties to the request
|
|
102
|
-
|
|
103
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
104
|
-
"""
|
|
105
|
-
return self._post(
|
|
106
|
-
"/evaluate",
|
|
107
|
-
body=maybe_transform(
|
|
108
|
-
{
|
|
109
|
-
"model_input": model_input,
|
|
110
|
-
"model_output": model_output,
|
|
111
|
-
"run_mode": run_mode,
|
|
112
|
-
"guardrail_metrics": guardrail_metrics,
|
|
113
|
-
"model_used": model_used,
|
|
114
|
-
"nametag": nametag,
|
|
115
|
-
},
|
|
116
|
-
evaluate_create_params.EvaluateCreateParams,
|
|
117
|
-
),
|
|
118
|
-
options=make_request_options(
|
|
119
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
120
|
-
),
|
|
121
|
-
cast_to=Evaluation,
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
def retrieve(
|
|
125
|
-
self,
|
|
126
|
-
eval_id: str,
|
|
127
|
-
*,
|
|
128
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
129
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
130
|
-
extra_headers: Headers | None = None,
|
|
131
|
-
extra_query: Query | None = None,
|
|
132
|
-
extra_body: Body | None = None,
|
|
133
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
134
|
-
) -> Evaluation:
|
|
135
|
-
"""
|
|
136
|
-
Use this endpoint to retrieve the evaluation record for a given evaluation ID
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
extra_headers: Send extra headers
|
|
140
|
-
|
|
141
|
-
extra_query: Add additional query parameters to the request
|
|
142
|
-
|
|
143
|
-
extra_body: Add additional JSON properties to the request
|
|
144
|
-
|
|
145
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
146
|
-
"""
|
|
147
|
-
if not eval_id:
|
|
148
|
-
raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
|
|
149
|
-
return self._get(
|
|
150
|
-
f"/evaluate/{eval_id}",
|
|
151
|
-
options=make_request_options(
|
|
152
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
153
|
-
),
|
|
154
|
-
cast_to=Evaluation,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
class AsyncEvaluateResource(AsyncAPIResource):
|
|
159
|
-
@cached_property
|
|
160
|
-
def with_raw_response(self) -> AsyncEvaluateResourceWithRawResponse:
|
|
161
|
-
"""
|
|
162
|
-
This property can be used as a prefix for any HTTP method call to return
|
|
163
|
-
the raw response object instead of the parsed content.
|
|
164
|
-
|
|
165
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
|
|
166
|
-
"""
|
|
167
|
-
return AsyncEvaluateResourceWithRawResponse(self)
|
|
168
|
-
|
|
169
|
-
@cached_property
|
|
170
|
-
def with_streaming_response(self) -> AsyncEvaluateResourceWithStreamingResponse:
|
|
171
|
-
"""
|
|
172
|
-
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
173
|
-
|
|
174
|
-
For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
|
|
175
|
-
"""
|
|
176
|
-
return AsyncEvaluateResourceWithStreamingResponse(self)
|
|
177
|
-
|
|
178
|
-
async def create(
|
|
179
|
-
self,
|
|
180
|
-
*,
|
|
181
|
-
model_input: evaluate_create_params.ModelInput,
|
|
182
|
-
model_output: str,
|
|
183
|
-
run_mode: Literal["precision_plus", "precision", "smart", "economy"],
|
|
184
|
-
guardrail_metrics: List[
|
|
185
|
-
Literal[
|
|
186
|
-
"correctness",
|
|
187
|
-
"completeness",
|
|
188
|
-
"instruction_adherence",
|
|
189
|
-
"context_adherence",
|
|
190
|
-
"ground_truth_adherence",
|
|
191
|
-
"comprehensive_safety",
|
|
192
|
-
]
|
|
193
|
-
]
|
|
194
|
-
| Omit = omit,
|
|
195
|
-
model_used: str | Omit = omit,
|
|
196
|
-
nametag: str | Omit = omit,
|
|
197
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
198
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
199
|
-
extra_headers: Headers | None = None,
|
|
200
|
-
extra_query: Query | None = None,
|
|
201
|
-
extra_body: Body | None = None,
|
|
202
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
203
|
-
) -> Evaluation:
|
|
204
|
-
"""
|
|
205
|
-
Use this endpoint to evaluate a model's input and output pair against selected
|
|
206
|
-
guardrail metrics
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
210
|
-
contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
211
|
-
ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
212
|
-
|
|
213
|
-
model_output: Output generated by the LLM to be evaluated.
|
|
214
|
-
|
|
215
|
-
run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
216
|
-
accuracy, and cost by determining which models are used to evaluate the event.
|
|
217
|
-
Available run modes include `precision_plus`, `precision`, `smart`, and
|
|
218
|
-
`economy`. Defaults to `smart`.
|
|
219
|
-
|
|
220
|
-
guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
|
|
221
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
222
|
-
guardrail metrics.
|
|
223
|
-
|
|
224
|
-
model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
225
|
-
|
|
226
|
-
nametag: An optional, user-defined tag for the evaluation.
|
|
227
|
-
|
|
228
|
-
extra_headers: Send extra headers
|
|
229
|
-
|
|
230
|
-
extra_query: Add additional query parameters to the request
|
|
231
|
-
|
|
232
|
-
extra_body: Add additional JSON properties to the request
|
|
233
|
-
|
|
234
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
235
|
-
"""
|
|
236
|
-
return await self._post(
|
|
237
|
-
"/evaluate",
|
|
238
|
-
body=await async_maybe_transform(
|
|
239
|
-
{
|
|
240
|
-
"model_input": model_input,
|
|
241
|
-
"model_output": model_output,
|
|
242
|
-
"run_mode": run_mode,
|
|
243
|
-
"guardrail_metrics": guardrail_metrics,
|
|
244
|
-
"model_used": model_used,
|
|
245
|
-
"nametag": nametag,
|
|
246
|
-
},
|
|
247
|
-
evaluate_create_params.EvaluateCreateParams,
|
|
248
|
-
),
|
|
249
|
-
options=make_request_options(
|
|
250
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
251
|
-
),
|
|
252
|
-
cast_to=Evaluation,
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
async def retrieve(
|
|
256
|
-
self,
|
|
257
|
-
eval_id: str,
|
|
258
|
-
*,
|
|
259
|
-
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
260
|
-
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
261
|
-
extra_headers: Headers | None = None,
|
|
262
|
-
extra_query: Query | None = None,
|
|
263
|
-
extra_body: Body | None = None,
|
|
264
|
-
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
265
|
-
) -> Evaluation:
|
|
266
|
-
"""
|
|
267
|
-
Use this endpoint to retrieve the evaluation record for a given evaluation ID
|
|
268
|
-
|
|
269
|
-
Args:
|
|
270
|
-
extra_headers: Send extra headers
|
|
271
|
-
|
|
272
|
-
extra_query: Add additional query parameters to the request
|
|
273
|
-
|
|
274
|
-
extra_body: Add additional JSON properties to the request
|
|
275
|
-
|
|
276
|
-
timeout: Override the client-level default timeout for this request, in seconds
|
|
277
|
-
"""
|
|
278
|
-
if not eval_id:
|
|
279
|
-
raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
|
|
280
|
-
return await self._get(
|
|
281
|
-
f"/evaluate/{eval_id}",
|
|
282
|
-
options=make_request_options(
|
|
283
|
-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
284
|
-
),
|
|
285
|
-
cast_to=Evaluation,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
class EvaluateResourceWithRawResponse:
|
|
290
|
-
def __init__(self, evaluate: EvaluateResource) -> None:
|
|
291
|
-
self._evaluate = evaluate
|
|
292
|
-
|
|
293
|
-
self.create = to_raw_response_wrapper(
|
|
294
|
-
evaluate.create,
|
|
295
|
-
)
|
|
296
|
-
self.retrieve = to_raw_response_wrapper(
|
|
297
|
-
evaluate.retrieve,
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
class AsyncEvaluateResourceWithRawResponse:
|
|
302
|
-
def __init__(self, evaluate: AsyncEvaluateResource) -> None:
|
|
303
|
-
self._evaluate = evaluate
|
|
304
|
-
|
|
305
|
-
self.create = async_to_raw_response_wrapper(
|
|
306
|
-
evaluate.create,
|
|
307
|
-
)
|
|
308
|
-
self.retrieve = async_to_raw_response_wrapper(
|
|
309
|
-
evaluate.retrieve,
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
class EvaluateResourceWithStreamingResponse:
|
|
314
|
-
def __init__(self, evaluate: EvaluateResource) -> None:
|
|
315
|
-
self._evaluate = evaluate
|
|
316
|
-
|
|
317
|
-
self.create = to_streamed_response_wrapper(
|
|
318
|
-
evaluate.create,
|
|
319
|
-
)
|
|
320
|
-
self.retrieve = to_streamed_response_wrapper(
|
|
321
|
-
evaluate.retrieve,
|
|
322
|
-
)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
class AsyncEvaluateResourceWithStreamingResponse:
|
|
326
|
-
def __init__(self, evaluate: AsyncEvaluateResource) -> None:
|
|
327
|
-
self._evaluate = evaluate
|
|
328
|
-
|
|
329
|
-
self.create = async_to_streamed_response_wrapper(
|
|
330
|
-
evaluate.create,
|
|
331
|
-
)
|
|
332
|
-
self.retrieve = async_to_streamed_response_wrapper(
|
|
333
|
-
evaluate.retrieve,
|
|
334
|
-
)
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import List
|
|
6
|
-
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
-
|
|
8
|
-
__all__ = ["EvaluateCreateParams", "ModelInput"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class EvaluateCreateParams(TypedDict, total=False):
|
|
12
|
-
model_input: Required[ModelInput]
|
|
13
|
-
"""A dictionary of inputs sent to the LLM to generate output.
|
|
14
|
-
|
|
15
|
-
The dictionary must contain at least a `user_prompt` field or a `system_prompt`
|
|
16
|
-
field. For ground_truth_adherence guardrail metric, `ground_truth` should be
|
|
17
|
-
provided.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
model_output: Required[str]
|
|
21
|
-
"""Output generated by the LLM to be evaluated."""
|
|
22
|
-
|
|
23
|
-
run_mode: Required[Literal["precision_plus", "precision", "smart", "economy"]]
|
|
24
|
-
"""Run mode for the evaluation.
|
|
25
|
-
|
|
26
|
-
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
27
|
-
determining which models are used to evaluate the event. Available run modes
|
|
28
|
-
include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
|
|
29
|
-
`smart`.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
guardrail_metrics: List[
|
|
33
|
-
Literal[
|
|
34
|
-
"correctness",
|
|
35
|
-
"completeness",
|
|
36
|
-
"instruction_adherence",
|
|
37
|
-
"context_adherence",
|
|
38
|
-
"ground_truth_adherence",
|
|
39
|
-
"comprehensive_safety",
|
|
40
|
-
]
|
|
41
|
-
]
|
|
42
|
-
"""
|
|
43
|
-
An array of guardrail metrics that the model input and output pair will be
|
|
44
|
-
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
45
|
-
guardrail metrics.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
model_used: str
|
|
49
|
-
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
50
|
-
|
|
51
|
-
nametag: str
|
|
52
|
-
"""An optional, user-defined tag for the evaluation."""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class ModelInput(TypedDict, total=False):
|
|
56
|
-
ground_truth: str
|
|
57
|
-
"""The ground truth for evaluating Ground Truth Adherence guardrail."""
|
|
58
|
-
|
|
59
|
-
system_prompt: str
|
|
60
|
-
"""The system prompt used to generate the output."""
|
|
61
|
-
|
|
62
|
-
user_prompt: str
|
|
63
|
-
"""The user prompt used to generate the output."""
|
deeprails/types/evaluation.py
DELETED
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
from typing import Dict, List, Optional
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing_extensions import Literal
|
|
6
|
-
|
|
7
|
-
from pydantic import Field as FieldInfo
|
|
8
|
-
|
|
9
|
-
from .._models import BaseModel
|
|
10
|
-
|
|
11
|
-
__all__ = ["Evaluation", "ModelInput"]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class ModelInput(BaseModel):
|
|
15
|
-
ground_truth: Optional[str] = None
|
|
16
|
-
"""The ground truth for evaluating Ground Truth Adherence guardrail."""
|
|
17
|
-
|
|
18
|
-
system_prompt: Optional[str] = None
|
|
19
|
-
"""The system prompt used to generate the output."""
|
|
20
|
-
|
|
21
|
-
user_prompt: Optional[str] = None
|
|
22
|
-
"""The user prompt used to generate the output."""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Evaluation(BaseModel):
|
|
26
|
-
eval_id: str
|
|
27
|
-
"""A unique evaluation ID."""
|
|
28
|
-
|
|
29
|
-
evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
|
|
30
|
-
"""Status of the evaluation."""
|
|
31
|
-
|
|
32
|
-
api_model_input: ModelInput = FieldInfo(alias="model_input")
|
|
33
|
-
"""A dictionary of inputs sent to the LLM to generate output.
|
|
34
|
-
|
|
35
|
-
The dictionary must contain at least a `user_prompt` field or a `system_prompt`
|
|
36
|
-
field. For ground_truth_adherence guardrail metric, `ground_truth` should be
|
|
37
|
-
provided.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
api_model_output: str = FieldInfo(alias="model_output")
|
|
41
|
-
"""Output generated by the LLM to be evaluated."""
|
|
42
|
-
|
|
43
|
-
run_mode: Literal["precision_plus", "precision", "smart", "economy"]
|
|
44
|
-
"""Run mode for the evaluation.
|
|
45
|
-
|
|
46
|
-
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
47
|
-
determining which models are used to evaluate the event.
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
created_at: Optional[datetime] = None
|
|
51
|
-
"""The time the evaluation was created in UTC."""
|
|
52
|
-
|
|
53
|
-
end_timestamp: Optional[datetime] = None
|
|
54
|
-
"""The time the evaluation completed in UTC."""
|
|
55
|
-
|
|
56
|
-
error_message: Optional[str] = None
|
|
57
|
-
"""Description of the error causing the evaluation to fail, if any."""
|
|
58
|
-
|
|
59
|
-
error_timestamp: Optional[datetime] = None
|
|
60
|
-
"""The time the error causing the evaluation to fail was recorded."""
|
|
61
|
-
|
|
62
|
-
evaluation_result: Optional[Dict[str, object]] = None
|
|
63
|
-
"""
|
|
64
|
-
Evaluation result consisting of average scores and rationales for each of the
|
|
65
|
-
evaluated guardrail metrics.
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
evaluation_total_cost: Optional[float] = None
|
|
69
|
-
"""Total cost of the evaluation."""
|
|
70
|
-
|
|
71
|
-
guardrail_metrics: Optional[
|
|
72
|
-
List[
|
|
73
|
-
Literal[
|
|
74
|
-
"correctness",
|
|
75
|
-
"completeness",
|
|
76
|
-
"instruction_adherence",
|
|
77
|
-
"context_adherence",
|
|
78
|
-
"ground_truth_adherence",
|
|
79
|
-
"comprehensive_safety",
|
|
80
|
-
]
|
|
81
|
-
]
|
|
82
|
-
] = None
|
|
83
|
-
"""
|
|
84
|
-
An array of guardrail metrics that the model input and output pair will be
|
|
85
|
-
evaluated on.
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
api_model_used: Optional[str] = FieldInfo(alias="model_used", default=None)
|
|
89
|
-
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
90
|
-
|
|
91
|
-
modified_at: Optional[datetime] = None
|
|
92
|
-
"""The most recent time the evaluation was modified in UTC."""
|
|
93
|
-
|
|
94
|
-
nametag: Optional[str] = None
|
|
95
|
-
"""An optional, user-defined tag for the evaluation."""
|
|
96
|
-
|
|
97
|
-
progress: Optional[int] = None
|
|
98
|
-
"""Evaluation progress.
|
|
99
|
-
|
|
100
|
-
Values range between 0 and 100; 100 corresponds to a completed
|
|
101
|
-
`evaluation_status`.
|
|
102
|
-
"""
|
|
103
|
-
|
|
104
|
-
start_timestamp: Optional[datetime] = None
|
|
105
|
-
"""The time the evaluation started in UTC."""
|
|
File without changes
|
|
File without changes
|