deeprails 0.3.2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeprails/__init__.py +102 -1
- deeprails/_base_client.py +1995 -0
- deeprails/_client.py +419 -0
- deeprails/_compat.py +219 -0
- deeprails/_constants.py +14 -0
- deeprails/_exceptions.py +108 -0
- deeprails/_files.py +123 -0
- deeprails/_models.py +835 -0
- deeprails/_qs.py +150 -0
- deeprails/_resource.py +43 -0
- deeprails/_response.py +830 -0
- deeprails/_streaming.py +333 -0
- deeprails/_types.py +260 -0
- deeprails/_utils/__init__.py +64 -0
- deeprails/_utils/_compat.py +45 -0
- deeprails/_utils/_datetime_parse.py +136 -0
- deeprails/_utils/_logs.py +25 -0
- deeprails/_utils/_proxy.py +65 -0
- deeprails/_utils/_reflection.py +42 -0
- deeprails/_utils/_resources_proxy.py +24 -0
- deeprails/_utils/_streams.py +12 -0
- deeprails/_utils/_sync.py +86 -0
- deeprails/_utils/_transform.py +457 -0
- deeprails/_utils/_typing.py +156 -0
- deeprails/_utils/_utils.py +421 -0
- deeprails/_version.py +4 -0
- deeprails/lib/.keep +4 -0
- deeprails/py.typed +0 -0
- deeprails/resources/__init__.py +47 -0
- deeprails/resources/defend.py +671 -0
- deeprails/resources/evaluate.py +334 -0
- deeprails/resources/monitor.py +566 -0
- deeprails/types/__init__.py +18 -0
- deeprails/types/api_response.py +50 -0
- deeprails/types/defend_create_workflow_params.py +56 -0
- deeprails/types/defend_response.py +50 -0
- deeprails/types/defend_submit_event_params.py +44 -0
- deeprails/types/defend_update_workflow_params.py +18 -0
- deeprails/types/evaluate_create_params.py +60 -0
- deeprails/types/evaluation.py +113 -0
- deeprails/types/monitor_create_params.py +15 -0
- deeprails/types/monitor_retrieve_params.py +12 -0
- deeprails/types/monitor_retrieve_response.py +81 -0
- deeprails/types/monitor_submit_event_params.py +63 -0
- deeprails/types/monitor_submit_event_response.py +36 -0
- deeprails/types/monitor_update_params.py +22 -0
- deeprails/types/workflow_event_response.py +33 -0
- deeprails-1.2.0.dist-info/METADATA +377 -0
- deeprails-1.2.0.dist-info/RECORD +51 -0
- {deeprails-0.3.2.dist-info → deeprails-1.2.0.dist-info}/WHEEL +1 -1
- deeprails-1.2.0.dist-info/licenses/LICENSE +201 -0
- deeprails/client.py +0 -285
- deeprails/exceptions.py +0 -10
- deeprails/schemas.py +0 -92
- deeprails-0.3.2.dist-info/METADATA +0 -235
- deeprails-0.3.2.dist-info/RECORD +0 -8
- deeprails-0.3.2.dist-info/licenses/LICENSE +0 -11
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from .._models import BaseModel
|
|
8
|
+
|
|
9
|
+
__all__ = ["DefendResponse"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DefendResponse(BaseModel):
|
|
13
|
+
name: str
|
|
14
|
+
"""Name of the workflow."""
|
|
15
|
+
|
|
16
|
+
workflow_id: str
|
|
17
|
+
"""A unique workflow ID."""
|
|
18
|
+
|
|
19
|
+
created_at: Optional[datetime] = None
|
|
20
|
+
"""The time the workflow was created in UTC."""
|
|
21
|
+
|
|
22
|
+
description: Optional[str] = None
|
|
23
|
+
"""Description for the workflow."""
|
|
24
|
+
|
|
25
|
+
improvement_action: Optional[Literal["regenerate", "fixit"]] = None
|
|
26
|
+
"""
|
|
27
|
+
The action used to improve outputs that fail one or more guardrail metrics for
|
|
28
|
+
the workflow events. May be `regenerate`, `fixit`, or null which represents “do
|
|
29
|
+
nothing”. ReGen runs the user's exact input prompt with minor induced variance.
|
|
30
|
+
Fixit attempts to directly address the shortcomings of the output using the
|
|
31
|
+
guardrail failure rationale. Do nothing does not attempt any improvement.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
max_retries: Optional[int] = None
|
|
35
|
+
"""Max.
|
|
36
|
+
|
|
37
|
+
number of improvement action retries until a given event passes the guardrails.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
modified_at: Optional[datetime] = None
|
|
41
|
+
"""The most recent time the workflow was modified in UTC."""
|
|
42
|
+
|
|
43
|
+
status: Optional[Literal["archived", "active"]] = None
|
|
44
|
+
"""Status of the selected workflow.
|
|
45
|
+
|
|
46
|
+
May be `archived` or `active`. Archived workflows will not accept events.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
success_rate: Optional[float] = None
|
|
50
|
+
"""Rate of events associated with this workflow that passed evaluation."""
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["DefendSubmitEventParams", "ModelInput"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DefendSubmitEventParams(TypedDict, total=False):
|
|
12
|
+
model_input: Required[ModelInput]
|
|
13
|
+
"""A dictionary of inputs sent to the LLM to generate output.
|
|
14
|
+
|
|
15
|
+
This must contain a `user_prompt` field and an optional `context` field.
|
|
16
|
+
Additional properties are allowed.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
model_output: Required[str]
|
|
20
|
+
"""Output generated by the LLM to be evaluated."""
|
|
21
|
+
|
|
22
|
+
model_used: Required[str]
|
|
23
|
+
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
24
|
+
|
|
25
|
+
nametag: Required[str]
|
|
26
|
+
"""An optional, user-defined tag for the event."""
|
|
27
|
+
|
|
28
|
+
run_mode: Required[Literal["precision_plus", "precision", "smart", "economy"]]
|
|
29
|
+
"""Run mode for the workflow event.
|
|
30
|
+
|
|
31
|
+
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
32
|
+
determining which models are used to evaluate the event. Available run modes
|
|
33
|
+
include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
|
|
34
|
+
`smart`.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ModelInputTyped(TypedDict, total=False):
|
|
39
|
+
user_prompt: Required[str]
|
|
40
|
+
|
|
41
|
+
context: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
ModelInput: TypeAlias = Union[ModelInputTyped, Dict[str, object]]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Literal, TypedDict
|
|
6
|
+
|
|
7
|
+
__all__ = ["DefendUpdateWorkflowParams"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DefendUpdateWorkflowParams(TypedDict, total=False):
|
|
11
|
+
description: str
|
|
12
|
+
"""Description for the workflow."""
|
|
13
|
+
|
|
14
|
+
name: str
|
|
15
|
+
"""Name of the workflow."""
|
|
16
|
+
|
|
17
|
+
type: Literal["automatic", "custom"]
|
|
18
|
+
"""Type of thresholds to use for the workflow, either `automatic` or `custom`."""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["EvaluateCreateParams", "ModelInput"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EvaluateCreateParams(TypedDict, total=False):
|
|
12
|
+
model_input: Required[ModelInput]
|
|
13
|
+
"""A dictionary of inputs sent to the LLM to generate output.
|
|
14
|
+
|
|
15
|
+
This must contain a `user_prompt` field and an optional `context` field.
|
|
16
|
+
Additional properties are allowed.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
model_output: Required[str]
|
|
20
|
+
"""Output generated by the LLM to be evaluated."""
|
|
21
|
+
|
|
22
|
+
run_mode: Required[Literal["precision_plus", "precision", "smart", "economy"]]
|
|
23
|
+
"""Run mode for the evaluation.
|
|
24
|
+
|
|
25
|
+
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
26
|
+
determining which models are used to evaluate the event. Available run modes
|
|
27
|
+
include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
|
|
28
|
+
`smart`.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
guardrail_metrics: List[
|
|
32
|
+
Literal[
|
|
33
|
+
"correctness",
|
|
34
|
+
"completeness",
|
|
35
|
+
"instruction_adherence",
|
|
36
|
+
"context_adherence",
|
|
37
|
+
"ground_truth_adherence",
|
|
38
|
+
"comprehensive_safety",
|
|
39
|
+
]
|
|
40
|
+
]
|
|
41
|
+
"""
|
|
42
|
+
An array of guardrail metrics that the model input and output pair will be
|
|
43
|
+
evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
44
|
+
guardrail metrics.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
model_used: str
|
|
48
|
+
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
49
|
+
|
|
50
|
+
nametag: str
|
|
51
|
+
"""An optional, user-defined tag for the evaluation."""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ModelInputTyped(TypedDict, total=False):
|
|
55
|
+
user_prompt: Required[str]
|
|
56
|
+
|
|
57
|
+
context: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
ModelInput: TypeAlias = Union[ModelInputTyped, Dict[str, object]]
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Dict, List, Optional
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import Field as FieldInfo
|
|
8
|
+
|
|
9
|
+
from .._models import BaseModel
|
|
10
|
+
|
|
11
|
+
__all__ = ["Evaluation", "ModelInput"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelInput(BaseModel):
|
|
15
|
+
user_prompt: str
|
|
16
|
+
"""The user prompt used to generate the output."""
|
|
17
|
+
|
|
18
|
+
context: Optional[str] = None
|
|
19
|
+
"""Optional context supplied to the LLM when generating the output."""
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
# Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a
|
|
23
|
+
# value to this field, so for compatibility we avoid doing it at runtime.
|
|
24
|
+
__pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride]
|
|
25
|
+
|
|
26
|
+
# Stub to indicate that arbitrary properties are accepted.
|
|
27
|
+
# To access properties that are not valid identifiers you can use `getattr`, e.g.
|
|
28
|
+
# `getattr(obj, '$type')`
|
|
29
|
+
def __getattr__(self, attr: str) -> object: ...
|
|
30
|
+
else:
|
|
31
|
+
__pydantic_extra__: Dict[str, object]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Evaluation(BaseModel):
|
|
35
|
+
eval_id: str
|
|
36
|
+
"""A unique evaluation ID."""
|
|
37
|
+
|
|
38
|
+
evaluation_status: Literal["in_progress", "completed", "canceled", "queued", "failed"]
|
|
39
|
+
"""Status of the evaluation."""
|
|
40
|
+
|
|
41
|
+
api_model_input: ModelInput = FieldInfo(alias="model_input")
|
|
42
|
+
"""A dictionary of inputs sent to the LLM to generate output.
|
|
43
|
+
|
|
44
|
+
The dictionary must contain a `user_prompt` field and an optional `context`
|
|
45
|
+
field. Additional properties are allowed.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
api_model_output: str = FieldInfo(alias="model_output")
|
|
49
|
+
"""Output generated by the LLM to be evaluated."""
|
|
50
|
+
|
|
51
|
+
run_mode: Literal["precision_plus", "precision", "smart", "economy"]
|
|
52
|
+
"""Run mode for the evaluation.
|
|
53
|
+
|
|
54
|
+
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
55
|
+
determining which models are used to evaluate the event.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
created_at: Optional[datetime] = None
|
|
59
|
+
"""The time the evaluation was created in UTC."""
|
|
60
|
+
|
|
61
|
+
end_timestamp: Optional[datetime] = None
|
|
62
|
+
"""The time the evaluation completed in UTC."""
|
|
63
|
+
|
|
64
|
+
error_message: Optional[str] = None
|
|
65
|
+
"""Description of the error causing the evaluation to fail, if any."""
|
|
66
|
+
|
|
67
|
+
error_timestamp: Optional[datetime] = None
|
|
68
|
+
"""The time the error causing the evaluation to fail was recorded."""
|
|
69
|
+
|
|
70
|
+
evaluation_result: Optional[Dict[str, object]] = None
|
|
71
|
+
"""
|
|
72
|
+
Evaluation result consisting of average scores and rationales for each of the
|
|
73
|
+
evaluated guardrail metrics.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
evaluation_total_cost: Optional[float] = None
|
|
77
|
+
"""Total cost of the evaluation."""
|
|
78
|
+
|
|
79
|
+
guardrail_metrics: Optional[
|
|
80
|
+
List[
|
|
81
|
+
Literal[
|
|
82
|
+
"correctness",
|
|
83
|
+
"completeness",
|
|
84
|
+
"instruction_adherence",
|
|
85
|
+
"context_adherence",
|
|
86
|
+
"ground_truth_adherence",
|
|
87
|
+
"comprehensive_safety",
|
|
88
|
+
]
|
|
89
|
+
]
|
|
90
|
+
] = None
|
|
91
|
+
"""
|
|
92
|
+
An array of guardrail metrics that the model input and output pair will be
|
|
93
|
+
evaluated on.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
api_model_used: Optional[str] = FieldInfo(alias="model_used", default=None)
|
|
97
|
+
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
98
|
+
|
|
99
|
+
modified_at: Optional[datetime] = None
|
|
100
|
+
"""The most recent time the evaluation was modified in UTC."""
|
|
101
|
+
|
|
102
|
+
nametag: Optional[str] = None
|
|
103
|
+
"""An optional, user-defined tag for the evaluation."""
|
|
104
|
+
|
|
105
|
+
progress: Optional[int] = None
|
|
106
|
+
"""Evaluation progress.
|
|
107
|
+
|
|
108
|
+
Values range between 0 and 100; 100 corresponds to a completed
|
|
109
|
+
`evaluation_status`.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
start_timestamp: Optional[datetime] = None
|
|
113
|
+
"""The time the evaluation started in UTC."""
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Required, TypedDict
|
|
6
|
+
|
|
7
|
+
__all__ = ["MonitorCreateParams"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MonitorCreateParams(TypedDict, total=False):
|
|
11
|
+
name: Required[str]
|
|
12
|
+
"""Name of the new monitor."""
|
|
13
|
+
|
|
14
|
+
description: str
|
|
15
|
+
"""Description of the new monitor."""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypedDict
|
|
6
|
+
|
|
7
|
+
__all__ = ["MonitorRetrieveParams"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MonitorRetrieveParams(TypedDict, total=False):
|
|
11
|
+
limit: int
|
|
12
|
+
"""Limit the returned events associated with this monitor. Defaults to 10."""
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from .._models import BaseModel
|
|
8
|
+
from .evaluation import Evaluation
|
|
9
|
+
|
|
10
|
+
__all__ = ["MonitorRetrieveResponse", "Data", "DataStats"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DataStats(BaseModel):
|
|
14
|
+
completed_evaluations: Optional[int] = None
|
|
15
|
+
"""Number of evaluations that completed successfully."""
|
|
16
|
+
|
|
17
|
+
failed_evaluations: Optional[int] = None
|
|
18
|
+
"""Number of evaluations that failed."""
|
|
19
|
+
|
|
20
|
+
in_progress_evaluations: Optional[int] = None
|
|
21
|
+
"""Number of evaluations currently in progress."""
|
|
22
|
+
|
|
23
|
+
queued_evaluations: Optional[int] = None
|
|
24
|
+
"""Number of evaluations currently queued."""
|
|
25
|
+
|
|
26
|
+
total_evaluations: Optional[int] = None
|
|
27
|
+
"""Total number of evaluations performed by this monitor."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Data(BaseModel):
|
|
31
|
+
monitor_id: str
|
|
32
|
+
"""A unique monitor ID."""
|
|
33
|
+
|
|
34
|
+
monitor_status: Literal["active", "inactive"]
|
|
35
|
+
"""Status of the monitor.
|
|
36
|
+
|
|
37
|
+
Can be `active` or `inactive`. Inactive monitors no longer record and evaluate
|
|
38
|
+
events.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
name: str
|
|
42
|
+
"""Name of this monitor."""
|
|
43
|
+
|
|
44
|
+
created_at: Optional[datetime] = None
|
|
45
|
+
"""The time the monitor was created in UTC."""
|
|
46
|
+
|
|
47
|
+
description: Optional[str] = None
|
|
48
|
+
"""Description of this monitor."""
|
|
49
|
+
|
|
50
|
+
evaluations: Optional[List[Evaluation]] = None
|
|
51
|
+
"""An array of all evaluations performed by this monitor.
|
|
52
|
+
|
|
53
|
+
Each one corresponds to a separate monitor event.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
stats: Optional[DataStats] = None
|
|
57
|
+
"""
|
|
58
|
+
Contains five fields used for stats of this monitor: total evaluations,
|
|
59
|
+
completed evaluations, failed evaluations, queued evaluations, and in progress
|
|
60
|
+
evaluations.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
updated_at: Optional[datetime] = None
|
|
64
|
+
"""The most recent time the monitor was modified in UTC."""
|
|
65
|
+
|
|
66
|
+
user_id: Optional[str] = None
|
|
67
|
+
"""User ID of the user who created the monitor."""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class MonitorRetrieveResponse(BaseModel):
|
|
71
|
+
success: bool
|
|
72
|
+
"""Represents whether the request was completed successfully."""
|
|
73
|
+
|
|
74
|
+
data: Optional[Data] = None
|
|
75
|
+
"""Detailed response payload for retrieving a monitor and its evaluations."""
|
|
76
|
+
|
|
77
|
+
message: Optional[str] = None
|
|
78
|
+
"""The accompanying message for the request.
|
|
79
|
+
|
|
80
|
+
Includes error details when applicable.
|
|
81
|
+
"""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["MonitorSubmitEventParams", "ModelInput"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MonitorSubmitEventParams(TypedDict, total=False):
|
|
12
|
+
guardrail_metrics: Required[
|
|
13
|
+
List[
|
|
14
|
+
Literal[
|
|
15
|
+
"correctness",
|
|
16
|
+
"completeness",
|
|
17
|
+
"instruction_adherence",
|
|
18
|
+
"context_adherence",
|
|
19
|
+
"ground_truth_adherence",
|
|
20
|
+
"comprehensive_safety",
|
|
21
|
+
]
|
|
22
|
+
]
|
|
23
|
+
]
|
|
24
|
+
"""
|
|
25
|
+
An array of guardrail metrics that the model input and output pair will be
|
|
26
|
+
evaluated on. For non-enterprise users, these will be limited to `correctness`,
|
|
27
|
+
`completeness`, `instruction_adherence`, `context_adherence`,
|
|
28
|
+
`ground_truth_adherence`, and/or `comprehensive_safety`.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
model_input: Required[ModelInput]
|
|
32
|
+
"""A dictionary of inputs sent to the LLM to generate output.
|
|
33
|
+
|
|
34
|
+
This must contain a `user_prompt` field and an optional `context` field.
|
|
35
|
+
Additional properties are allowed.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
model_output: Required[str]
|
|
39
|
+
"""Output generated by the LLM to be evaluated."""
|
|
40
|
+
|
|
41
|
+
model_used: str
|
|
42
|
+
"""Model ID used to generate the output, like `gpt-4o` or `o3`."""
|
|
43
|
+
|
|
44
|
+
nametag: str
|
|
45
|
+
"""An optional, user-defined tag for the event."""
|
|
46
|
+
|
|
47
|
+
run_mode: Literal["precision_plus", "precision", "smart", "economy"]
|
|
48
|
+
"""Run mode for the monitor event.
|
|
49
|
+
|
|
50
|
+
The run mode allows the user to optimize for speed, accuracy, and cost by
|
|
51
|
+
determining which models are used to evaluate the event. Available run modes
|
|
52
|
+
include `precision_plus`, `precision`, `smart`, and `economy`. Defaults to
|
|
53
|
+
`smart`.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ModelInputTyped(TypedDict, total=False):
|
|
58
|
+
user_prompt: Required[str]
|
|
59
|
+
|
|
60
|
+
context: str
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
ModelInput: TypeAlias = Union[ModelInputTyped, Dict[str, object]]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["MonitorSubmitEventResponse", "Data"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Data(BaseModel):
|
|
12
|
+
evaluation_id: str
|
|
13
|
+
"""A unique evaluation ID associated with this event."""
|
|
14
|
+
|
|
15
|
+
event_id: str
|
|
16
|
+
"""A unique monitor event ID."""
|
|
17
|
+
|
|
18
|
+
monitor_id: str
|
|
19
|
+
"""Monitor ID associated with this event."""
|
|
20
|
+
|
|
21
|
+
created_at: Optional[datetime] = None
|
|
22
|
+
"""The time the monitor event was created in UTC."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MonitorSubmitEventResponse(BaseModel):
|
|
26
|
+
success: bool
|
|
27
|
+
"""Represents whether the request was completed successfully."""
|
|
28
|
+
|
|
29
|
+
data: Optional[Data] = None
|
|
30
|
+
"""Response payload for monitor event operations."""
|
|
31
|
+
|
|
32
|
+
message: Optional[str] = None
|
|
33
|
+
"""The accompanying message for the request.
|
|
34
|
+
|
|
35
|
+
Includes error details when applicable.
|
|
36
|
+
"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Literal, TypedDict
|
|
6
|
+
|
|
7
|
+
__all__ = ["MonitorUpdateParams"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MonitorUpdateParams(TypedDict, total=False):
|
|
11
|
+
description: str
|
|
12
|
+
"""Description of the monitor."""
|
|
13
|
+
|
|
14
|
+
monitor_status: Literal["active", "inactive"]
|
|
15
|
+
"""Status of the monitor.
|
|
16
|
+
|
|
17
|
+
Can be `active` or `inactive`. Inactive monitors no longer record and evaluate
|
|
18
|
+
events.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
"""Name of the monitor."""
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .._models import BaseModel
|
|
6
|
+
|
|
7
|
+
__all__ = ["WorkflowEventResponse"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WorkflowEventResponse(BaseModel):
|
|
11
|
+
event_id: str
|
|
12
|
+
"""A unique workflow event ID."""
|
|
13
|
+
|
|
14
|
+
workflow_id: str
|
|
15
|
+
"""Workflow ID associated with the event."""
|
|
16
|
+
|
|
17
|
+
attempt_number: Optional[int] = None
|
|
18
|
+
"""Count of improvement attempts for the event.
|
|
19
|
+
|
|
20
|
+
If greater than one then all previous improvement attempts failed.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
evaluation_id: Optional[str] = None
|
|
24
|
+
"""A unique evaluation ID associated with this event.
|
|
25
|
+
|
|
26
|
+
Every event has one or more evaluation attempts.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
filtered: Optional[bool] = None
|
|
30
|
+
"""
|
|
31
|
+
`False` if evaluation passed all of the guardrail metrics, `True` if evaluation
|
|
32
|
+
failed any of the guardrail metrics.
|
|
33
|
+
"""
|