deeprails 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeprails/_client.py CHANGED
@@ -21,7 +21,7 @@ from ._types import (
21
21
  )
22
22
  from ._utils import is_given, get_async_library
23
23
  from ._version import __version__
24
- from .resources import defend, monitor, evaluate
24
+ from .resources import defend, monitor
25
25
  from ._streaming import Stream as Stream, AsyncStream as AsyncStream
26
26
  from ._exceptions import APIStatusError, DeeprailsError
27
27
  from ._base_client import (
@@ -45,7 +45,6 @@ __all__ = [
45
45
  class Deeprails(SyncAPIClient):
46
46
  defend: defend.DefendResource
47
47
  monitor: monitor.MonitorResource
48
- evaluate: evaluate.EvaluateResource
49
48
  with_raw_response: DeeprailsWithRawResponse
50
49
  with_streaming_response: DeeprailsWithStreamedResponse
51
50
 
@@ -105,7 +104,6 @@ class Deeprails(SyncAPIClient):
105
104
 
106
105
  self.defend = defend.DefendResource(self)
107
106
  self.monitor = monitor.MonitorResource(self)
108
- self.evaluate = evaluate.EvaluateResource(self)
109
107
  self.with_raw_response = DeeprailsWithRawResponse(self)
110
108
  self.with_streaming_response = DeeprailsWithStreamedResponse(self)
111
109
 
@@ -217,7 +215,6 @@ class Deeprails(SyncAPIClient):
217
215
  class AsyncDeeprails(AsyncAPIClient):
218
216
  defend: defend.AsyncDefendResource
219
217
  monitor: monitor.AsyncMonitorResource
220
- evaluate: evaluate.AsyncEvaluateResource
221
218
  with_raw_response: AsyncDeeprailsWithRawResponse
222
219
  with_streaming_response: AsyncDeeprailsWithStreamedResponse
223
220
 
@@ -277,7 +274,6 @@ class AsyncDeeprails(AsyncAPIClient):
277
274
 
278
275
  self.defend = defend.AsyncDefendResource(self)
279
276
  self.monitor = monitor.AsyncMonitorResource(self)
280
- self.evaluate = evaluate.AsyncEvaluateResource(self)
281
277
  self.with_raw_response = AsyncDeeprailsWithRawResponse(self)
282
278
  self.with_streaming_response = AsyncDeeprailsWithStreamedResponse(self)
283
279
 
@@ -390,28 +386,24 @@ class DeeprailsWithRawResponse:
390
386
  def __init__(self, client: Deeprails) -> None:
391
387
  self.defend = defend.DefendResourceWithRawResponse(client.defend)
392
388
  self.monitor = monitor.MonitorResourceWithRawResponse(client.monitor)
393
- self.evaluate = evaluate.EvaluateResourceWithRawResponse(client.evaluate)
394
389
 
395
390
 
396
391
  class AsyncDeeprailsWithRawResponse:
397
392
  def __init__(self, client: AsyncDeeprails) -> None:
398
393
  self.defend = defend.AsyncDefendResourceWithRawResponse(client.defend)
399
394
  self.monitor = monitor.AsyncMonitorResourceWithRawResponse(client.monitor)
400
- self.evaluate = evaluate.AsyncEvaluateResourceWithRawResponse(client.evaluate)
401
395
 
402
396
 
403
397
  class DeeprailsWithStreamedResponse:
404
398
  def __init__(self, client: Deeprails) -> None:
405
399
  self.defend = defend.DefendResourceWithStreamingResponse(client.defend)
406
400
  self.monitor = monitor.MonitorResourceWithStreamingResponse(client.monitor)
407
- self.evaluate = evaluate.EvaluateResourceWithStreamingResponse(client.evaluate)
408
401
 
409
402
 
410
403
  class AsyncDeeprailsWithStreamedResponse:
411
404
  def __init__(self, client: AsyncDeeprails) -> None:
412
405
  self.defend = defend.AsyncDefendResourceWithStreamingResponse(client.defend)
413
406
  self.monitor = monitor.AsyncMonitorResourceWithStreamingResponse(client.monitor)
414
- self.evaluate = evaluate.AsyncEvaluateResourceWithStreamingResponse(client.evaluate)
415
407
 
416
408
 
417
409
  Client = Deeprails
deeprails/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "deeprails"
4
- __version__ = "1.9.0" # x-release-please-version
4
+ __version__ = "1.10.0" # x-release-please-version
@@ -16,14 +16,6 @@ from .monitor import (
16
16
  MonitorResourceWithStreamingResponse,
17
17
  AsyncMonitorResourceWithStreamingResponse,
18
18
  )
19
- from .evaluate import (
20
- EvaluateResource,
21
- AsyncEvaluateResource,
22
- EvaluateResourceWithRawResponse,
23
- AsyncEvaluateResourceWithRawResponse,
24
- EvaluateResourceWithStreamingResponse,
25
- AsyncEvaluateResourceWithStreamingResponse,
26
- )
27
19
 
28
20
  __all__ = [
29
21
  "DefendResource",
@@ -38,10 +30,4 @@ __all__ = [
38
30
  "AsyncMonitorResourceWithRawResponse",
39
31
  "MonitorResourceWithStreamingResponse",
40
32
  "AsyncMonitorResourceWithStreamingResponse",
41
- "EvaluateResource",
42
- "AsyncEvaluateResource",
43
- "EvaluateResourceWithRawResponse",
44
- "AsyncEvaluateResourceWithRawResponse",
45
- "EvaluateResourceWithStreamingResponse",
46
- "AsyncEvaluateResourceWithStreamingResponse",
47
33
  ]
@@ -2,12 +2,10 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .evaluation import Evaluation as Evaluation
6
5
  from .defend_response import DefendResponse as DefendResponse
7
6
  from .monitor_response import MonitorResponse as MonitorResponse
8
7
  from .monitor_create_params import MonitorCreateParams as MonitorCreateParams
9
8
  from .monitor_update_params import MonitorUpdateParams as MonitorUpdateParams
10
- from .evaluate_create_params import EvaluateCreateParams as EvaluateCreateParams
11
9
  from .monitor_event_response import MonitorEventResponse as MonitorEventResponse
12
10
  from .monitor_detail_response import MonitorDetailResponse as MonitorDetailResponse
13
11
  from .monitor_retrieve_params import MonitorRetrieveParams as MonitorRetrieveParams
@@ -1,13 +1,108 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
- from typing import List, Optional
3
+ from typing import Dict, List, Optional
4
4
  from datetime import datetime
5
5
  from typing_extensions import Literal
6
6
 
7
+ from pydantic import Field as FieldInfo
8
+
7
9
  from .._models import BaseModel
8
- from .evaluation import Evaluation
9
10
 
10
- __all__ = ["MonitorDetailResponse", "Stats"]
11
+ __all__ = ["MonitorDetailResponse", "Evaluation", "EvaluationModelInput", "Stats"]
12
+
13
+
14
+ class EvaluationModelInput(BaseModel):
15
+ ground_truth: Optional[str] = None
16
+ """The ground truth for evaluating Ground Truth Adherence guardrail."""
17
+
18
+ system_prompt: Optional[str] = None
19
+ """The system prompt used to generate the output."""
20
+
21
+ user_prompt: Optional[str] = None
22
+ """The user prompt used to generate the output."""
23
+
24
+
25
+ class Evaluation(BaseModel):
26
+ eval_id: str
27
+ """A unique evaluation ID."""
28
+
29
+ evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
30
+ """Status of the evaluation."""
31
+
32
+ api_model_input: EvaluationModelInput = FieldInfo(alias="model_input")
33
+ """A dictionary of inputs sent to the LLM to generate output.
34
+
35
+ The dictionary must contain at least a `user_prompt` field or a `system_prompt`
36
+ field. For ground_truth_adherence guardrail metric, `ground_truth` should be
37
+ provided.
38
+ """
39
+
40
+ api_model_output: str = FieldInfo(alias="model_output")
41
+ """Output generated by the LLM to be evaluated."""
42
+
43
+ run_mode: Literal["precision_plus", "precision", "smart", "economy"]
44
+ """Run mode for the evaluation.
45
+
46
+ The run mode allows the user to optimize for speed, accuracy, and cost by
47
+ determining which models are used to evaluate the event.
48
+ """
49
+
50
+ created_at: Optional[datetime] = None
51
+ """The time the evaluation was created in UTC."""
52
+
53
+ end_timestamp: Optional[datetime] = None
54
+ """The time the evaluation completed in UTC."""
55
+
56
+ error_message: Optional[str] = None
57
+ """Description of the error causing the evaluation to fail, if any."""
58
+
59
+ error_timestamp: Optional[datetime] = None
60
+ """The time the error causing the evaluation to fail was recorded."""
61
+
62
+ evaluation_result: Optional[Dict[str, object]] = None
63
+ """
64
+ Evaluation result consisting of average scores and rationales for each of the
65
+ evaluated guardrail metrics.
66
+ """
67
+
68
+ evaluation_total_cost: Optional[float] = None
69
+ """Total cost of the evaluation."""
70
+
71
+ guardrail_metrics: Optional[
72
+ List[
73
+ Literal[
74
+ "correctness",
75
+ "completeness",
76
+ "instruction_adherence",
77
+ "context_adherence",
78
+ "ground_truth_adherence",
79
+ "comprehensive_safety",
80
+ ]
81
+ ]
82
+ ] = None
83
+ """
84
+ An array of guardrail metrics that the model input and output pair will be
85
+ evaluated on.
86
+ """
87
+
88
+ api_model_used: Optional[str] = FieldInfo(alias="model_used", default=None)
89
+ """Model ID used to generate the output, like `gpt-4o` or `o3`."""
90
+
91
+ modified_at: Optional[datetime] = None
92
+ """The most recent time the evaluation was modified in UTC."""
93
+
94
+ nametag: Optional[str] = None
95
+ """An optional, user-defined tag for the evaluation."""
96
+
97
+ progress: Optional[int] = None
98
+ """Evaluation progress.
99
+
100
+ Values range between 0 and 100; 100 corresponds to a completed
101
+ `evaluation_status`.
102
+ """
103
+
104
+ start_timestamp: Optional[datetime] = None
105
+ """The time the evaluation started in UTC."""
11
106
 
12
107
 
13
108
  class Stats(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: deeprails
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: The official Python library for the deeprails API
5
5
  Project-URL: Homepage, https://docs.deeprails.com/
6
6
  Project-URL: Repository, https://github.com/deeprails/deeprails-sdk-python
@@ -1,6 +1,6 @@
1
1
  deeprails/__init__.py,sha256=Rz38DnPkRJzvTJjgsLPjYyEId-pJndorgdE4Yhcq0J4,2692
2
2
  deeprails/_base_client.py,sha256=3bTH_cn_59gEjfZdcRS6g6Er4ZobAgTCRsnQmD_viNQ,67050
3
- deeprails/_client.py,sha256=oinOakaCufsSKAjWC2JOwT4bCeEmRB-xQCT9FNAG1rM,16125
3
+ deeprails/_client.py,sha256=BzJOTEiC2RszdX3GCBEk_R5k6e48rzP-XlbCN1Cplr4,15563
4
4
  deeprails/_compat.py,sha256=DQBVORjFb33zch24jzkhM14msvnzY7mmSmgDLaVFUM8,6562
5
5
  deeprails/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
6
6
  deeprails/_exceptions.py,sha256=N99bKrZNjDzgEDrvdw99WO-zpuPeZySaUUKNLEUz8mU,3226
@@ -11,7 +11,7 @@ deeprails/_resource.py,sha256=7RXX5KZr4j0TIE66vnduHp7p9Yf9X0FyDDECuvRHARg,1118
11
11
  deeprails/_response.py,sha256=yj0HJDU91WPpiczwi6CBOLAl_bqf4I_I96vWMAwx6Fg,28806
12
12
  deeprails/_streaming.py,sha256=hCp5bK9dyw2TyrVL69m-6qGC-QtGYwhXmFzITCWPgAs,10112
13
13
  deeprails/_types.py,sha256=XR3mad9NsGqZsjrd1VVJ657-4O4kwyw9Qzg4M3i6Vh0,7239
14
- deeprails/_version.py,sha256=wxi1GAD191IfMIMELfuuMOJr3v9EY7XoLbXOaNNw-dY,161
14
+ deeprails/_version.py,sha256=dE6ROKmhSXt4B73GxgSswrtaxSRJBdQihEVLUUaQ-M0,162
15
15
  deeprails/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  deeprails/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
17
17
  deeprails/_utils/_compat.py,sha256=D8gtAvjJQrDWt9upS0XaG9Rr5l1QhiAx_I_1utT_tt0,1195
@@ -26,26 +26,23 @@ deeprails/_utils/_transform.py,sha256=NjCzmnfqYrsAikUHQig6N9QfuTVbKipuP3ur9mcNF-
26
26
  deeprails/_utils/_typing.py,sha256=N_5PPuFNsaygbtA_npZd98SVN1LQQvFTKL6bkWPBZGU,4786
27
27
  deeprails/_utils/_utils.py,sha256=0dDqauUbVZEXV0NVl7Bwu904Wwo5eyFCZpQThhFNhyA,12253
28
28
  deeprails/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
29
- deeprails/resources/__init__.py,sha256=ha0jL9Et8fHzPdkTa7ecihYapOC4I6O-PHf2X9igprE,1491
29
+ deeprails/resources/__init__.py,sha256=JBzEBVMLg2n1w2QJGYCsyjPHY6zHZ0G3xaWuuFoa5Pg,1015
30
30
  deeprails/resources/defend.py,sha256=e6NzUJziYx__FkGSVNeaqzA8sZU0L5dY5Nob1IPCxc4,28186
31
- deeprails/resources/evaluate.py,sha256=6SJ3iOH3uYPz3k3MS_G_P75zgSu4bBIurazHQ8byEl0,13025
32
31
  deeprails/resources/monitor.py,sha256=bW7HCn8lDpWqaPd11J_tVnGSduWTX3sfXlDYYycvk8k,22524
33
- deeprails/types/__init__.py,sha256=-Dsqdk7bVrDKj3ryCVbwIfIqSLFfF8pOu3JU7O9-8TA,1254
32
+ deeprails/types/__init__.py,sha256=PlF7idbYiAj3xyJYstHDaBer-rLUHe6eUEfRfJKisvk,1124
34
33
  deeprails/types/defend_create_workflow_params.py,sha256=AmyoU1z3a0U39pqVtVnFsM86ysK1nYIx15Tolwb4PKA,2102
35
34
  deeprails/types/defend_response.py,sha256=VoePIT9RKN795y-3ZvoFmzSTCetqkZZh6iQLYjJEFoY,1603
36
35
  deeprails/types/defend_submit_event_params.py,sha256=yL_rLUGKlZjXHGbdi8h9ZItb4sICQ2POO_o0VbUtans,1539
37
36
  deeprails/types/defend_update_workflow_params.py,sha256=QH2k7EDMLub3mW1lPV5SUoKDHW_T2arSo-RGHLterwo,373
38
- deeprails/types/evaluate_create_params.py,sha256=GGdFXjYjyLD5O1fn4bwDwDKI0EwvciJNF-uYmvCUBwA,2001
39
- deeprails/types/evaluation.py,sha256=3ajWcd7oYRtYZDOe0fVQHeVRN6-ymjA7F47VduwTfP4,3328
40
37
  deeprails/types/monitor_create_params.py,sha256=kTSj-PhuzpT-HPDTQJemRWfd8w32foUMH9FQZj8symk,384
41
- deeprails/types/monitor_detail_response.py,sha256=pts1JThENDUQhF0xKsPvOY3C5fUU-z8HW6ZVCKwmq8g,1939
38
+ deeprails/types/monitor_detail_response.py,sha256=s8dzFFaU9uI-GHCfUxCN9yLhpmq4LTu7CSww9z0SLvk,5028
42
39
  deeprails/types/monitor_event_response.py,sha256=-cnugHD_3QeeZRMbo6aQBirqSPgKIKpaD2qNkgxCeCA,565
43
40
  deeprails/types/monitor_response.py,sha256=LjnJVYniee1hgvZu8RT-9jX4xd0Ob_yvq4NBOxVn59c,950
44
41
  deeprails/types/monitor_retrieve_params.py,sha256=PEsRmbd-81z4pJvhfi4JbrQWNzmeiLkoNsTUoPZ6kFY,352
45
42
  deeprails/types/monitor_submit_event_params.py,sha256=YetTV8HzmDGNnSYoKZp8tv3u9L6oYn4tnvWOppTRfOQ,2164
46
43
  deeprails/types/monitor_update_params.py,sha256=gJyFFxT_u_iWABknuKnLpPl9r-VPfCcGtOAmh6sPwUw,550
47
44
  deeprails/types/workflow_event_response.py,sha256=mIzOCnYJg4TDSq_tG_0WfA0_Gmc9-0q-befyookfUFM,867
48
- deeprails-1.9.0.dist-info/METADATA,sha256=XP4pijd9tcmRhIrw4j10axapTSyQ48VzYK_WCqiGkCc,12080
49
- deeprails-1.9.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
50
- deeprails-1.9.0.dist-info/licenses/LICENSE,sha256=rFTxPcYE516UQLju2SCY1r2pSDDfodL0-ZvxF_fgueg,11339
51
- deeprails-1.9.0.dist-info/RECORD,,
45
+ deeprails-1.10.0.dist-info/METADATA,sha256=oMlAwTn3V1FCFTw1ErsGTS_clu3q1dtpn3vQAssiJv0,12081
46
+ deeprails-1.10.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
47
+ deeprails-1.10.0.dist-info/licenses/LICENSE,sha256=rFTxPcYE516UQLju2SCY1r2pSDDfodL0-ZvxF_fgueg,11339
48
+ deeprails-1.10.0.dist-info/RECORD,,
@@ -1,334 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from __future__ import annotations
4
-
5
- from typing import List
6
- from typing_extensions import Literal
7
-
8
- import httpx
9
-
10
- from ..types import evaluate_create_params
11
- from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
12
- from .._utils import maybe_transform, async_maybe_transform
13
- from .._compat import cached_property
14
- from .._resource import SyncAPIResource, AsyncAPIResource
15
- from .._response import (
16
- to_raw_response_wrapper,
17
- to_streamed_response_wrapper,
18
- async_to_raw_response_wrapper,
19
- async_to_streamed_response_wrapper,
20
- )
21
- from .._base_client import make_request_options
22
- from ..types.evaluation import Evaluation
23
-
24
- __all__ = ["EvaluateResource", "AsyncEvaluateResource"]
25
-
26
-
27
- class EvaluateResource(SyncAPIResource):
28
- @cached_property
29
- def with_raw_response(self) -> EvaluateResourceWithRawResponse:
30
- """
31
- This property can be used as a prefix for any HTTP method call to return
32
- the raw response object instead of the parsed content.
33
-
34
- For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
35
- """
36
- return EvaluateResourceWithRawResponse(self)
37
-
38
- @cached_property
39
- def with_streaming_response(self) -> EvaluateResourceWithStreamingResponse:
40
- """
41
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
42
-
43
- For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
44
- """
45
- return EvaluateResourceWithStreamingResponse(self)
46
-
47
- def create(
48
- self,
49
- *,
50
- model_input: evaluate_create_params.ModelInput,
51
- model_output: str,
52
- run_mode: Literal["precision_plus", "precision", "smart", "economy"],
53
- guardrail_metrics: List[
54
- Literal[
55
- "correctness",
56
- "completeness",
57
- "instruction_adherence",
58
- "context_adherence",
59
- "ground_truth_adherence",
60
- "comprehensive_safety",
61
- ]
62
- ]
63
- | Omit = omit,
64
- model_used: str | Omit = omit,
65
- nametag: str | Omit = omit,
66
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
67
- # The extra values given here take precedence over values defined on the client or passed to this method.
68
- extra_headers: Headers | None = None,
69
- extra_query: Query | None = None,
70
- extra_body: Body | None = None,
71
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
72
- ) -> Evaluation:
73
- """
74
- Use this endpoint to evaluate a model's input and output pair against selected
75
- guardrail metrics
76
-
77
- Args:
78
- model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
79
- contain at least a `user_prompt` field or a `system_prompt` field. For
80
- ground_truth_adherence guardrail metric, `ground_truth` should be provided.
81
-
82
- model_output: Output generated by the LLM to be evaluated.
83
-
84
- run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
85
- accuracy, and cost by determining which models are used to evaluate the event.
86
- Available run modes include `precision_plus`, `precision`, `smart`, and
87
- `economy`. Defaults to `smart`.
88
-
89
- guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
90
- evaluated on. For non-enterprise users, these will be limited to the allowed
91
- guardrail metrics.
92
-
93
- model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
94
-
95
- nametag: An optional, user-defined tag for the evaluation.
96
-
97
- extra_headers: Send extra headers
98
-
99
- extra_query: Add additional query parameters to the request
100
-
101
- extra_body: Add additional JSON properties to the request
102
-
103
- timeout: Override the client-level default timeout for this request, in seconds
104
- """
105
- return self._post(
106
- "/evaluate",
107
- body=maybe_transform(
108
- {
109
- "model_input": model_input,
110
- "model_output": model_output,
111
- "run_mode": run_mode,
112
- "guardrail_metrics": guardrail_metrics,
113
- "model_used": model_used,
114
- "nametag": nametag,
115
- },
116
- evaluate_create_params.EvaluateCreateParams,
117
- ),
118
- options=make_request_options(
119
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
120
- ),
121
- cast_to=Evaluation,
122
- )
123
-
124
- def retrieve(
125
- self,
126
- eval_id: str,
127
- *,
128
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
129
- # The extra values given here take precedence over values defined on the client or passed to this method.
130
- extra_headers: Headers | None = None,
131
- extra_query: Query | None = None,
132
- extra_body: Body | None = None,
133
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
134
- ) -> Evaluation:
135
- """
136
- Use this endpoint to retrieve the evaluation record for a given evaluation ID
137
-
138
- Args:
139
- extra_headers: Send extra headers
140
-
141
- extra_query: Add additional query parameters to the request
142
-
143
- extra_body: Add additional JSON properties to the request
144
-
145
- timeout: Override the client-level default timeout for this request, in seconds
146
- """
147
- if not eval_id:
148
- raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
149
- return self._get(
150
- f"/evaluate/{eval_id}",
151
- options=make_request_options(
152
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
153
- ),
154
- cast_to=Evaluation,
155
- )
156
-
157
-
158
- class AsyncEvaluateResource(AsyncAPIResource):
159
- @cached_property
160
- def with_raw_response(self) -> AsyncEvaluateResourceWithRawResponse:
161
- """
162
- This property can be used as a prefix for any HTTP method call to return
163
- the raw response object instead of the parsed content.
164
-
165
- For more information, see https://www.github.com/deeprails/deeprails-sdk-python#accessing-raw-response-data-eg-headers
166
- """
167
- return AsyncEvaluateResourceWithRawResponse(self)
168
-
169
- @cached_property
170
- def with_streaming_response(self) -> AsyncEvaluateResourceWithStreamingResponse:
171
- """
172
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
173
-
174
- For more information, see https://www.github.com/deeprails/deeprails-sdk-python#with_streaming_response
175
- """
176
- return AsyncEvaluateResourceWithStreamingResponse(self)
177
-
178
- async def create(
179
- self,
180
- *,
181
- model_input: evaluate_create_params.ModelInput,
182
- model_output: str,
183
- run_mode: Literal["precision_plus", "precision", "smart", "economy"],
184
- guardrail_metrics: List[
185
- Literal[
186
- "correctness",
187
- "completeness",
188
- "instruction_adherence",
189
- "context_adherence",
190
- "ground_truth_adherence",
191
- "comprehensive_safety",
192
- ]
193
- ]
194
- | Omit = omit,
195
- model_used: str | Omit = omit,
196
- nametag: str | Omit = omit,
197
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
198
- # The extra values given here take precedence over values defined on the client or passed to this method.
199
- extra_headers: Headers | None = None,
200
- extra_query: Query | None = None,
201
- extra_body: Body | None = None,
202
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
203
- ) -> Evaluation:
204
- """
205
- Use this endpoint to evaluate a model's input and output pair against selected
206
- guardrail metrics
207
-
208
- Args:
209
- model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
210
- contain at least a `user_prompt` field or a `system_prompt` field. For
211
- ground_truth_adherence guardrail metric, `ground_truth` should be provided.
212
-
213
- model_output: Output generated by the LLM to be evaluated.
214
-
215
- run_mode: Run mode for the evaluation. The run mode allows the user to optimize for speed,
216
- accuracy, and cost by determining which models are used to evaluate the event.
217
- Available run modes include `precision_plus`, `precision`, `smart`, and
218
- `economy`. Defaults to `smart`.
219
-
220
- guardrail_metrics: An array of guardrail metrics that the model input and output pair will be
221
- evaluated on. For non-enterprise users, these will be limited to the allowed
222
- guardrail metrics.
223
-
224
- model_used: Model ID used to generate the output, like `gpt-4o` or `o3`.
225
-
226
- nametag: An optional, user-defined tag for the evaluation.
227
-
228
- extra_headers: Send extra headers
229
-
230
- extra_query: Add additional query parameters to the request
231
-
232
- extra_body: Add additional JSON properties to the request
233
-
234
- timeout: Override the client-level default timeout for this request, in seconds
235
- """
236
- return await self._post(
237
- "/evaluate",
238
- body=await async_maybe_transform(
239
- {
240
- "model_input": model_input,
241
- "model_output": model_output,
242
- "run_mode": run_mode,
243
- "guardrail_metrics": guardrail_metrics,
244
- "model_used": model_used,
245
- "nametag": nametag,
246
- },
247
- evaluate_create_params.EvaluateCreateParams,
248
- ),
249
- options=make_request_options(
250
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
251
- ),
252
- cast_to=Evaluation,
253
- )
254
-
255
- async def retrieve(
256
- self,
257
- eval_id: str,
258
- *,
259
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
260
- # The extra values given here take precedence over values defined on the client or passed to this method.
261
- extra_headers: Headers | None = None,
262
- extra_query: Query | None = None,
263
- extra_body: Body | None = None,
264
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
265
- ) -> Evaluation:
266
- """
267
- Use this endpoint to retrieve the evaluation record for a given evaluation ID
268
-
269
- Args:
270
- extra_headers: Send extra headers
271
-
272
- extra_query: Add additional query parameters to the request
273
-
274
- extra_body: Add additional JSON properties to the request
275
-
276
- timeout: Override the client-level default timeout for this request, in seconds
277
- """
278
- if not eval_id:
279
- raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
280
- return await self._get(
281
- f"/evaluate/{eval_id}",
282
- options=make_request_options(
283
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
284
- ),
285
- cast_to=Evaluation,
286
- )
287
-
288
-
289
- class EvaluateResourceWithRawResponse:
290
- def __init__(self, evaluate: EvaluateResource) -> None:
291
- self._evaluate = evaluate
292
-
293
- self.create = to_raw_response_wrapper(
294
- evaluate.create,
295
- )
296
- self.retrieve = to_raw_response_wrapper(
297
- evaluate.retrieve,
298
- )
299
-
300
-
301
- class AsyncEvaluateResourceWithRawResponse:
302
- def __init__(self, evaluate: AsyncEvaluateResource) -> None:
303
- self._evaluate = evaluate
304
-
305
- self.create = async_to_raw_response_wrapper(
306
- evaluate.create,
307
- )
308
- self.retrieve = async_to_raw_response_wrapper(
309
- evaluate.retrieve,
310
- )
311
-
312
-
313
- class EvaluateResourceWithStreamingResponse:
314
- def __init__(self, evaluate: EvaluateResource) -> None:
315
- self._evaluate = evaluate
316
-
317
- self.create = to_streamed_response_wrapper(
318
- evaluate.create,
319
- )
320
- self.retrieve = to_streamed_response_wrapper(
321
- evaluate.retrieve,
322
- )
323
-
324
-
325
- class AsyncEvaluateResourceWithStreamingResponse:
326
- def __init__(self, evaluate: AsyncEvaluateResource) -> None:
327
- self._evaluate = evaluate
328
-
329
- self.create = async_to_streamed_response_wrapper(
330
- evaluate.create,
331
- )
332
- self.retrieve = async_to_streamed_response_wrapper(
333
- evaluate.retrieve,
334
- )
@@ -1,63 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from __future__ import annotations
4
-
5
- from typing import List
6
- from typing_extensions import Literal, Required, TypedDict
7
-
8
- __all__ = ["EvaluateCreateParams", "ModelInput"]
9
-
10
-
11
- class EvaluateCreateParams(TypedDict, total=False):
12
- model_input: Required[ModelInput]
13
- """A dictionary of inputs sent to the LLM to generate output.
14
-
15
- The dictionary must contain at least a `user_prompt` field or a `system_prompt`
16
- field. For ground_truth_adherence guardrail metric, `ground_truth` should be
17
- provided.
18
- """
19
-
20
- model_output: Required[str]
21
- """Output generated by the LLM to be evaluated."""
22
-
23
- run_mode: Required[Literal["precision_plus", "precision", "smart", "economy"]]
24
- """Run mode for the evaluation.
25
-
26
- The run mode allows the user to optimize for speed, accuracy, and cost by
27
- determining which models are used to evaluate the event. Available run modes
28
- include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
29
- `smart`.
30
- """
31
-
32
- guardrail_metrics: List[
33
- Literal[
34
- "correctness",
35
- "completeness",
36
- "instruction_adherence",
37
- "context_adherence",
38
- "ground_truth_adherence",
39
- "comprehensive_safety",
40
- ]
41
- ]
42
- """
43
- An array of guardrail metrics that the model input and output pair will be
44
- evaluated on. For non-enterprise users, these will be limited to the allowed
45
- guardrail metrics.
46
- """
47
-
48
- model_used: str
49
- """Model ID used to generate the output, like `gpt-4o` or `o3`."""
50
-
51
- nametag: str
52
- """An optional, user-defined tag for the evaluation."""
53
-
54
-
55
- class ModelInput(TypedDict, total=False):
56
- ground_truth: str
57
- """The ground truth for evaluating Ground Truth Adherence guardrail."""
58
-
59
- system_prompt: str
60
- """The system prompt used to generate the output."""
61
-
62
- user_prompt: str
63
- """The user prompt used to generate the output."""
@@ -1,105 +0,0 @@
1
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
-
3
- from typing import Dict, List, Optional
4
- from datetime import datetime
5
- from typing_extensions import Literal
6
-
7
- from pydantic import Field as FieldInfo
8
-
9
- from .._models import BaseModel
10
-
11
- __all__ = ["Evaluation", "ModelInput"]
12
-
13
-
14
- class ModelInput(BaseModel):
15
- ground_truth: Optional[str] = None
16
- """The ground truth for evaluating Ground Truth Adherence guardrail."""
17
-
18
- system_prompt: Optional[str] = None
19
- """The system prompt used to generate the output."""
20
-
21
- user_prompt: Optional[str] = None
22
- """The user prompt used to generate the output."""
23
-
24
-
25
- class Evaluation(BaseModel):
26
- eval_id: str
27
- """A unique evaluation ID."""
28
-
29
- evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
30
- """Status of the evaluation."""
31
-
32
- api_model_input: ModelInput = FieldInfo(alias="model_input")
33
- """A dictionary of inputs sent to the LLM to generate output.
34
-
35
- The dictionary must contain at least a `user_prompt` field or a `system_prompt`
36
- field. For ground_truth_adherence guardrail metric, `ground_truth` should be
37
- provided.
38
- """
39
-
40
- api_model_output: str = FieldInfo(alias="model_output")
41
- """Output generated by the LLM to be evaluated."""
42
-
43
- run_mode: Literal["precision_plus", "precision", "smart", "economy"]
44
- """Run mode for the evaluation.
45
-
46
- The run mode allows the user to optimize for speed, accuracy, and cost by
47
- determining which models are used to evaluate the event.
48
- """
49
-
50
- created_at: Optional[datetime] = None
51
- """The time the evaluation was created in UTC."""
52
-
53
- end_timestamp: Optional[datetime] = None
54
- """The time the evaluation completed in UTC."""
55
-
56
- error_message: Optional[str] = None
57
- """Description of the error causing the evaluation to fail, if any."""
58
-
59
- error_timestamp: Optional[datetime] = None
60
- """The time the error causing the evaluation to fail was recorded."""
61
-
62
- evaluation_result: Optional[Dict[str, object]] = None
63
- """
64
- Evaluation result consisting of average scores and rationales for each of the
65
- evaluated guardrail metrics.
66
- """
67
-
68
- evaluation_total_cost: Optional[float] = None
69
- """Total cost of the evaluation."""
70
-
71
- guardrail_metrics: Optional[
72
- List[
73
- Literal[
74
- "correctness",
75
- "completeness",
76
- "instruction_adherence",
77
- "context_adherence",
78
- "ground_truth_adherence",
79
- "comprehensive_safety",
80
- ]
81
- ]
82
- ] = None
83
- """
84
- An array of guardrail metrics that the model input and output pair will be
85
- evaluated on.
86
- """
87
-
88
- api_model_used: Optional[str] = FieldInfo(alias="model_used", default=None)
89
- """Model ID used to generate the output, like `gpt-4o` or `o3`."""
90
-
91
- modified_at: Optional[datetime] = None
92
- """The most recent time the evaluation was modified in UTC."""
93
-
94
- nametag: Optional[str] = None
95
- """An optional, user-defined tag for the evaluation."""
96
-
97
- progress: Optional[int] = None
98
- """Evaluation progress.
99
-
100
- Values range between 0 and 100; 100 corresponds to a completed
101
- `evaluation_status`.
102
- """
103
-
104
- start_timestamp: Optional[datetime] = None
105
- """The time the evaluation started in UTC."""