deeprails 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deeprails might be problematic. Click here for more details.
- {deeprails-0.2.1 → deeprails-0.3.0}/PKG-INFO +86 -9
- {deeprails-0.2.1 → deeprails-0.3.0}/README.md +85 -8
- deeprails-0.3.0/deeprails/client.py +285 -0
- deeprails-0.3.0/deeprails/schemas.py +92 -0
- {deeprails-0.2.1 → deeprails-0.3.0}/pyproject.toml +1 -1
- deeprails-0.2.1/deeprails/client.py +0 -101
- deeprails-0.2.1/deeprails/schemas.py +0 -30
- {deeprails-0.2.1 → deeprails-0.3.0}/LICENSE +0 -0
- {deeprails-0.2.1 → deeprails-0.3.0}/deeprails/__init__.py +0 -0
- {deeprails-0.2.1 → deeprails-0.3.0}/deeprails/exceptions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deeprails
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Python SDK for interacting with the DeepRails API
|
|
5
5
|
Project-URL: Homepage, https://deeprails.com
|
|
6
6
|
Project-URL: Documentation, https://docs.deeprails.com
|
|
@@ -55,12 +55,17 @@ client = DeepRails(token="YOUR_API_KEY")
|
|
|
55
55
|
evaluation = client.create_evaluation(
|
|
56
56
|
model_input={"user_prompt": "Prompt used to generate completion"},
|
|
57
57
|
model_output="Generated output",
|
|
58
|
-
model_used="gpt-4o-mini
|
|
58
|
+
model_used="gpt-4o-mini",
|
|
59
59
|
guardrail_metrics=["correctness", "completeness"]
|
|
60
60
|
)
|
|
61
|
-
|
|
62
|
-
# Print evaluation ID
|
|
63
61
|
print(f"Evaluation created with ID: {evaluation.eval_id}")
|
|
62
|
+
|
|
63
|
+
# Create a monitor
|
|
64
|
+
monitor = client.create_monitor(
|
|
65
|
+
name="Production Assistant Monitor",
|
|
66
|
+
description="Tracking our production assistant quality"
|
|
67
|
+
)
|
|
68
|
+
print(f"Monitor created with ID: {monitor.monitor_id}")
|
|
64
69
|
```
|
|
65
70
|
|
|
66
71
|
## Features
|
|
@@ -69,6 +74,7 @@ print(f"Evaluation created with ID: {evaluation.eval_id}")
|
|
|
69
74
|
- **Comprehensive Metrics**: Evaluate outputs on correctness, completeness, and more
|
|
70
75
|
- **Real-time Progress**: Track evaluation progress in real-time
|
|
71
76
|
- **Detailed Results**: Get detailed scores and rationales for each metric
|
|
77
|
+
- **Continuous Monitoring**: Create monitors to track AI system performance over time
|
|
72
78
|
|
|
73
79
|
## Authentication
|
|
74
80
|
|
|
@@ -81,14 +87,16 @@ token = os.environ.get("DEEPRAILS_API_KEY")
|
|
|
81
87
|
client = DeepRails(token=token)
|
|
82
88
|
```
|
|
83
89
|
|
|
84
|
-
##
|
|
90
|
+
## Evaluation Service
|
|
91
|
+
|
|
92
|
+
### Creating Evaluations
|
|
85
93
|
|
|
86
94
|
```python
|
|
87
95
|
try:
|
|
88
96
|
evaluation = client.create_evaluation(
|
|
89
97
|
model_input={"user_prompt": "Prompt used to generate completion"},
|
|
90
98
|
model_output="Generated output",
|
|
91
|
-
model_used="gpt-4o-mini
|
|
99
|
+
model_used="gpt-4o-mini",
|
|
92
100
|
guardrail_metrics=["correctness", "completeness"]
|
|
93
101
|
)
|
|
94
102
|
print(f"ID: {evaluation.eval_id}")
|
|
@@ -98,7 +106,7 @@ except Exception as e:
|
|
|
98
106
|
print(f"Error: {e}")
|
|
99
107
|
```
|
|
100
108
|
|
|
101
|
-
|
|
109
|
+
#### Parameters
|
|
102
110
|
|
|
103
111
|
- `model_input`: Dictionary containing the prompt and any context (must include `user_prompt`)
|
|
104
112
|
- `model_output`: The generated output to evaluate
|
|
@@ -108,7 +116,7 @@ except Exception as e:
|
|
|
108
116
|
- `nametag`: (Optional) Custom identifier for this evaluation
|
|
109
117
|
- `webhook`: (Optional) URL to receive completion notifications
|
|
110
118
|
|
|
111
|
-
|
|
119
|
+
### Retrieving Evaluations
|
|
112
120
|
|
|
113
121
|
```python
|
|
114
122
|
try:
|
|
@@ -126,6 +134,76 @@ except Exception as e:
|
|
|
126
134
|
print(f"Error: {e}")
|
|
127
135
|
```
|
|
128
136
|
|
|
137
|
+
## Monitor Service
|
|
138
|
+
|
|
139
|
+
### Creating Monitors
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
try:
|
|
143
|
+
# Create a monitor
|
|
144
|
+
monitor = client.create_monitor(
|
|
145
|
+
name="Production Chat Assistant Monitor",
|
|
146
|
+
description="Monitoring our production chatbot responses"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
print(f"Monitor created with ID: {monitor.monitor_id}")
|
|
150
|
+
except Exception as e:
|
|
151
|
+
print(f"Error: {e}")
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Logging Monitor Events
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
try:
|
|
158
|
+
# Add an event to the monitor
|
|
159
|
+
event = client.create_monitor_event(
|
|
160
|
+
monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
|
161
|
+
model_input={"user_prompt": "Tell me about renewable energy"},
|
|
162
|
+
model_output="Renewable energy comes from natural sources...",
|
|
163
|
+
model_used="gpt-4o-mini",
|
|
164
|
+
guardrail_metrics=["correctness", "completeness", "comprehensive_safety"]
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
print(f"Monitor event created with ID: {event.event_id}")
|
|
168
|
+
print(f"Associated evaluation ID: {event.evaluation_id}")
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print(f"Error: {e}")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Retrieving Monitor Data
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
try:
|
|
177
|
+
# Get monitor details
|
|
178
|
+
monitor = client.get_monitor("mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")
|
|
179
|
+
print(f"Monitor name: {monitor.name}")
|
|
180
|
+
print(f"Status: {monitor.monitor_status}")
|
|
181
|
+
|
|
182
|
+
# Get monitor events
|
|
183
|
+
events = client.get_monitor_events(
|
|
184
|
+
monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
|
185
|
+
limit=10
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
for event in events:
|
|
189
|
+
print(f"Event ID: {event.event_id}")
|
|
190
|
+
print(f"Evaluation ID: {event.evaluation_id}")
|
|
191
|
+
|
|
192
|
+
# List all monitors with filtering
|
|
193
|
+
monitors = client.get_monitors(
|
|
194
|
+
limit=5,
|
|
195
|
+
monitor_status=["active"],
|
|
196
|
+
sort_by="created_at",
|
|
197
|
+
sort_order="desc"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
print(f"Total monitors: {monitors.pagination.total_count}")
|
|
201
|
+
for m in monitors.monitors:
|
|
202
|
+
print(f"{m.name}: {m.event_count} events")
|
|
203
|
+
except Exception as e:
|
|
204
|
+
print(f"Error: {e}")
|
|
205
|
+
```
|
|
206
|
+
|
|
129
207
|
## Available Metrics
|
|
130
208
|
|
|
131
209
|
- `correctness`: Measures factual accuracy by evaluating whether each claim in the output is true and verifiable.
|
|
@@ -135,7 +213,6 @@ except Exception as e:
|
|
|
135
213
|
- `ground_truth_adherence`: Measures how closely the output matches a known correct answer (gold standard).
|
|
136
214
|
- `comprehensive_safety`: Detects and categorizes safety violations across areas like PII, CBRN, hate speech, self-harm, and more.
|
|
137
215
|
|
|
138
|
-
|
|
139
216
|
## Error Handling
|
|
140
217
|
|
|
141
218
|
The SDK throws `DeepRailsAPIError` for API-related errors, with status code and detailed message.
|
|
@@ -20,12 +20,17 @@ client = DeepRails(token="YOUR_API_KEY")
|
|
|
20
20
|
evaluation = client.create_evaluation(
|
|
21
21
|
model_input={"user_prompt": "Prompt used to generate completion"},
|
|
22
22
|
model_output="Generated output",
|
|
23
|
-
model_used="gpt-4o-mini
|
|
23
|
+
model_used="gpt-4o-mini",
|
|
24
24
|
guardrail_metrics=["correctness", "completeness"]
|
|
25
25
|
)
|
|
26
|
-
|
|
27
|
-
# Print evaluation ID
|
|
28
26
|
print(f"Evaluation created with ID: {evaluation.eval_id}")
|
|
27
|
+
|
|
28
|
+
# Create a monitor
|
|
29
|
+
monitor = client.create_monitor(
|
|
30
|
+
name="Production Assistant Monitor",
|
|
31
|
+
description="Tracking our production assistant quality"
|
|
32
|
+
)
|
|
33
|
+
print(f"Monitor created with ID: {monitor.monitor_id}")
|
|
29
34
|
```
|
|
30
35
|
|
|
31
36
|
## Features
|
|
@@ -34,6 +39,7 @@ print(f"Evaluation created with ID: {evaluation.eval_id}")
|
|
|
34
39
|
- **Comprehensive Metrics**: Evaluate outputs on correctness, completeness, and more
|
|
35
40
|
- **Real-time Progress**: Track evaluation progress in real-time
|
|
36
41
|
- **Detailed Results**: Get detailed scores and rationales for each metric
|
|
42
|
+
- **Continuous Monitoring**: Create monitors to track AI system performance over time
|
|
37
43
|
|
|
38
44
|
## Authentication
|
|
39
45
|
|
|
@@ -46,14 +52,16 @@ token = os.environ.get("DEEPRAILS_API_KEY")
|
|
|
46
52
|
client = DeepRails(token=token)
|
|
47
53
|
```
|
|
48
54
|
|
|
49
|
-
##
|
|
55
|
+
## Evaluation Service
|
|
56
|
+
|
|
57
|
+
### Creating Evaluations
|
|
50
58
|
|
|
51
59
|
```python
|
|
52
60
|
try:
|
|
53
61
|
evaluation = client.create_evaluation(
|
|
54
62
|
model_input={"user_prompt": "Prompt used to generate completion"},
|
|
55
63
|
model_output="Generated output",
|
|
56
|
-
model_used="gpt-4o-mini
|
|
64
|
+
model_used="gpt-4o-mini",
|
|
57
65
|
guardrail_metrics=["correctness", "completeness"]
|
|
58
66
|
)
|
|
59
67
|
print(f"ID: {evaluation.eval_id}")
|
|
@@ -63,7 +71,7 @@ except Exception as e:
|
|
|
63
71
|
print(f"Error: {e}")
|
|
64
72
|
```
|
|
65
73
|
|
|
66
|
-
|
|
74
|
+
#### Parameters
|
|
67
75
|
|
|
68
76
|
- `model_input`: Dictionary containing the prompt and any context (must include `user_prompt`)
|
|
69
77
|
- `model_output`: The generated output to evaluate
|
|
@@ -73,7 +81,7 @@ except Exception as e:
|
|
|
73
81
|
- `nametag`: (Optional) Custom identifier for this evaluation
|
|
74
82
|
- `webhook`: (Optional) URL to receive completion notifications
|
|
75
83
|
|
|
76
|
-
|
|
84
|
+
### Retrieving Evaluations
|
|
77
85
|
|
|
78
86
|
```python
|
|
79
87
|
try:
|
|
@@ -91,6 +99,76 @@ except Exception as e:
|
|
|
91
99
|
print(f"Error: {e}")
|
|
92
100
|
```
|
|
93
101
|
|
|
102
|
+
## Monitor Service
|
|
103
|
+
|
|
104
|
+
### Creating Monitors
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
try:
|
|
108
|
+
# Create a monitor
|
|
109
|
+
monitor = client.create_monitor(
|
|
110
|
+
name="Production Chat Assistant Monitor",
|
|
111
|
+
description="Monitoring our production chatbot responses"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
print(f"Monitor created with ID: {monitor.monitor_id}")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"Error: {e}")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Logging Monitor Events
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
try:
|
|
123
|
+
# Add an event to the monitor
|
|
124
|
+
event = client.create_monitor_event(
|
|
125
|
+
monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
|
126
|
+
model_input={"user_prompt": "Tell me about renewable energy"},
|
|
127
|
+
model_output="Renewable energy comes from natural sources...",
|
|
128
|
+
model_used="gpt-4o-mini",
|
|
129
|
+
guardrail_metrics=["correctness", "completeness", "comprehensive_safety"]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
print(f"Monitor event created with ID: {event.event_id}")
|
|
133
|
+
print(f"Associated evaluation ID: {event.evaluation_id}")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
print(f"Error: {e}")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Retrieving Monitor Data
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
try:
|
|
142
|
+
# Get monitor details
|
|
143
|
+
monitor = client.get_monitor("mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")
|
|
144
|
+
print(f"Monitor name: {monitor.name}")
|
|
145
|
+
print(f"Status: {monitor.monitor_status}")
|
|
146
|
+
|
|
147
|
+
# Get monitor events
|
|
148
|
+
events = client.get_monitor_events(
|
|
149
|
+
monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
|
150
|
+
limit=10
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
for event in events:
|
|
154
|
+
print(f"Event ID: {event.event_id}")
|
|
155
|
+
print(f"Evaluation ID: {event.evaluation_id}")
|
|
156
|
+
|
|
157
|
+
# List all monitors with filtering
|
|
158
|
+
monitors = client.get_monitors(
|
|
159
|
+
limit=5,
|
|
160
|
+
monitor_status=["active"],
|
|
161
|
+
sort_by="created_at",
|
|
162
|
+
sort_order="desc"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
print(f"Total monitors: {monitors.pagination.total_count}")
|
|
166
|
+
for m in monitors.monitors:
|
|
167
|
+
print(f"{m.name}: {m.event_count} events")
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f"Error: {e}")
|
|
170
|
+
```
|
|
171
|
+
|
|
94
172
|
## Available Metrics
|
|
95
173
|
|
|
96
174
|
- `correctness`: Measures factual accuracy by evaluating whether each claim in the output is true and verifiable.
|
|
@@ -100,7 +178,6 @@ except Exception as e:
|
|
|
100
178
|
- `ground_truth_adherence`: Measures how closely the output matches a known correct answer (gold standard).
|
|
101
179
|
- `comprehensive_safety`: Detects and categorizes safety violations across areas like PII, CBRN, hate speech, self-harm, and more.
|
|
102
180
|
|
|
103
|
-
|
|
104
181
|
## Error Handling
|
|
105
182
|
|
|
106
183
|
The SDK throws `DeepRailsAPIError` for API-related errors, with status code and detailed message.
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
from typing import List, Optional, Dict, Any
|
|
3
|
+
|
|
4
|
+
from .schemas import EvaluationResponse
|
|
5
|
+
from .exceptions import DeepRailsAPIError
|
|
6
|
+
|
|
7
|
+
class DeepRails:
|
|
8
|
+
"""
|
|
9
|
+
Python SDK client for the DeepRails API.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, token: str, base_url: str = "https://api.deeprails.com"):
|
|
13
|
+
"""
|
|
14
|
+
Initializes the DeepRails client.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
token: Your DeepRails API key (starts with 'sk_').
|
|
18
|
+
base_url: The base URL of the DeepRails API.
|
|
19
|
+
"""
|
|
20
|
+
if not token:
|
|
21
|
+
raise ValueError("A valid DeepRails API token is required.")
|
|
22
|
+
|
|
23
|
+
self._base_url = base_url
|
|
24
|
+
self._headers = {
|
|
25
|
+
"Authorization": f"Bearer {token}",
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
"User-Agent": "deeprails-python-sdk/0.3.0"
|
|
28
|
+
}
|
|
29
|
+
self._client = httpx.Client(base_url=self._base_url, headers=self._headers, timeout=30.0)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
|
|
33
|
+
"""Helper method to make requests and handle API errors."""
|
|
34
|
+
try:
|
|
35
|
+
response = self._client.request(method, endpoint, **kwargs)
|
|
36
|
+
response.raise_for_status()
|
|
37
|
+
return response
|
|
38
|
+
except httpx.HTTPStatusError as e:
|
|
39
|
+
error_detail = "No detail provided."
|
|
40
|
+
try:
|
|
41
|
+
error_detail = e.response.json().get("detail", error_detail)
|
|
42
|
+
except Exception:
|
|
43
|
+
error_detail = e.response.text
|
|
44
|
+
raise DeepRailsAPIError(status_code=e.response.status_code, error_detail=error_detail) from e
|
|
45
|
+
except httpx.RequestError as e:
|
|
46
|
+
raise DeepRailsAPIError(status_code=500, error_detail=f"Request failed: {e}") from e
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def create_evaluation(
|
|
50
|
+
self,
|
|
51
|
+
*,
|
|
52
|
+
model_input: Dict[str, Any],
|
|
53
|
+
model_output: str,
|
|
54
|
+
model_used: Optional[str] = None,
|
|
55
|
+
run_mode: Optional[str] = "smart", # Set default to "smart"
|
|
56
|
+
guardrail_metrics: Optional[List[str]] = None,
|
|
57
|
+
nametag: Optional[str] = None,
|
|
58
|
+
webhook: Optional[str] = None
|
|
59
|
+
) -> EvaluationResponse:
|
|
60
|
+
"""
|
|
61
|
+
Creates a new evaluation and immediately processes it.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
model_input: A dictionary containing the inputs for the model.
|
|
65
|
+
Must contain a "user_prompt" key.
|
|
66
|
+
model_output: The response generated by the model you are evaluating.
|
|
67
|
+
model_used: The name or identifier of the model being evaluated.
|
|
68
|
+
run_mode: The evaluation mode (e.g., "smart", "dev").
|
|
69
|
+
guardrail_metrics: A list of metrics to evaluate.
|
|
70
|
+
nametag: A user-defined name or tag for the evaluation.
|
|
71
|
+
webhook: A URL to send a POST request to upon evaluation completion.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
An EvaluationResponse object with the details of the created evaluation.
|
|
75
|
+
"""
|
|
76
|
+
if "user_prompt" not in model_input:
|
|
77
|
+
raise ValueError("`model_input` must contain a 'user_prompt' key.")
|
|
78
|
+
|
|
79
|
+
payload = {
|
|
80
|
+
"model_input": model_input,
|
|
81
|
+
"model_output": model_output,
|
|
82
|
+
"model_used": model_used,
|
|
83
|
+
"run_mode": run_mode,
|
|
84
|
+
"guardrail_metrics": guardrail_metrics,
|
|
85
|
+
"nametag": nametag,
|
|
86
|
+
"webhook": webhook,
|
|
87
|
+
}
|
|
88
|
+
json_payload = {k: v for k, v in payload.items() if v is not None}
|
|
89
|
+
|
|
90
|
+
response = self._request("POST", "/evaluate", json=json_payload)
|
|
91
|
+
return EvaluationResponse.parse_obj(response.json())
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_evaluation(self, eval_id: str) -> EvaluationResponse:
|
|
95
|
+
"""
|
|
96
|
+
Retrieves the status and results of a specific evaluation.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
eval_id: The unique identifier of the evaluation.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
An EvaluationResponse object with the full, up-to-date details of the evaluation.
|
|
103
|
+
"""
|
|
104
|
+
response = self._request("GET", f"/evaluate/{eval_id}")
|
|
105
|
+
return EvaluationResponse.parse_obj(response.json())
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def create_monitor(
|
|
109
|
+
self,
|
|
110
|
+
*,
|
|
111
|
+
name: str,
|
|
112
|
+
description: Optional[str] = None
|
|
113
|
+
) -> MonitorResponse:
|
|
114
|
+
"""
|
|
115
|
+
Creates a new monitor for tracking AI responses.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
name: A name for the monitor.
|
|
119
|
+
description: Optional description of the monitor's purpose.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
A MonitorResponse object with the details of the created monitor.
|
|
123
|
+
"""
|
|
124
|
+
payload = {
|
|
125
|
+
"name": name,
|
|
126
|
+
"description": description
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Remove None values
|
|
130
|
+
json_payload = {k: v for k, v in payload.items() if v is not None}
|
|
131
|
+
|
|
132
|
+
response = self._request("POST", "/monitor", json=json_payload)
|
|
133
|
+
response_json = response.json()
|
|
134
|
+
|
|
135
|
+
# Handle DeepRails API response structure
|
|
136
|
+
if "data" in response_json:
|
|
137
|
+
return MonitorResponse.parse_obj(response_json["data"])
|
|
138
|
+
else:
|
|
139
|
+
return MonitorResponse.parse_obj(response_json)
|
|
140
|
+
|
|
141
|
+
def get_monitor(self, monitor_id: str) -> MonitorResponse:
|
|
142
|
+
"""
|
|
143
|
+
Get details of a specific monitor.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
monitor_id: The ID of the monitor to retrieve.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
A MonitorResponse object with the monitor details.
|
|
150
|
+
"""
|
|
151
|
+
response = self._request("GET", f"/monitor/{monitor_id}")
|
|
152
|
+
response_json = response.json()
|
|
153
|
+
|
|
154
|
+
# Handle DeepRails API response structure
|
|
155
|
+
if "data" in response_json:
|
|
156
|
+
return MonitorResponse.parse_obj(response_json["data"])
|
|
157
|
+
else:
|
|
158
|
+
return MonitorResponse.parse_obj(response_json)
|
|
159
|
+
|
|
160
|
+
def create_monitor_event(
|
|
161
|
+
self,
|
|
162
|
+
*,
|
|
163
|
+
monitor_id: str,
|
|
164
|
+
model_input: Dict[str, Any],
|
|
165
|
+
model_output: str,
|
|
166
|
+
guardrail_metrics: List[str],
|
|
167
|
+
model_used: Optional[str] = None,
|
|
168
|
+
run_mode: Optional[str] = None,
|
|
169
|
+
nametag: Optional[str] = None,
|
|
170
|
+
webhook: Optional[str] = None
|
|
171
|
+
) -> MonitorEventResponse:
|
|
172
|
+
"""
|
|
173
|
+
Creates a new event for a monitor.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
monitor_id: The ID of the monitor to create an event for.
|
|
177
|
+
model_input: A dictionary containing the inputs for the model.
|
|
178
|
+
model_output: The response generated by the model you are evaluating.
|
|
179
|
+
guardrail_metrics: A list of metrics to evaluate.
|
|
180
|
+
model_used: The name or identifier of the model being evaluated.
|
|
181
|
+
run_mode: The evaluation mode (e.g., "smart", "dev").
|
|
182
|
+
nametag: A user-defined name or tag for the event.
|
|
183
|
+
webhook: A URL to send a POST request to upon evaluation completion.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
A MonitorEventResponse object with the details of the created event.
|
|
187
|
+
"""
|
|
188
|
+
payload = {
|
|
189
|
+
"model_input": model_input,
|
|
190
|
+
"model_output": model_output,
|
|
191
|
+
"model_used": model_used,
|
|
192
|
+
"run_mode": run_mode,
|
|
193
|
+
"guardrail_metrics": guardrail_metrics,
|
|
194
|
+
"nametag": nametag,
|
|
195
|
+
"webhook": webhook,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
# Remove None values
|
|
199
|
+
json_payload = {k: v for k, v in payload.items() if v is not None}
|
|
200
|
+
|
|
201
|
+
response = self._request("POST", f"/monitor/{monitor_id}/events", json=json_payload)
|
|
202
|
+
response_json = response.json()
|
|
203
|
+
|
|
204
|
+
# Handle DeepRails API response structure
|
|
205
|
+
if "data" in response_json:
|
|
206
|
+
return MonitorEventResponse.parse_obj(response_json["data"])
|
|
207
|
+
else:
|
|
208
|
+
return MonitorEventResponse.parse_obj(response_json)
|
|
209
|
+
|
|
210
|
+
def get_monitor_events(
|
|
211
|
+
self,
|
|
212
|
+
monitor_id: str,
|
|
213
|
+
limit: int = 10,
|
|
214
|
+
offset: int = 0
|
|
215
|
+
) -> List[MonitorEventResponse]:
|
|
216
|
+
"""
|
|
217
|
+
Retrieves events for a specific monitor.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
monitor_id: The ID of the monitor to get events for.
|
|
221
|
+
limit: Maximum number of events to return (default: 10).
|
|
222
|
+
offset: Offset for pagination (default: 0).
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
A list of MonitorEventResponse objects with details of the monitor events.
|
|
226
|
+
"""
|
|
227
|
+
params = {
|
|
228
|
+
"limit": limit,
|
|
229
|
+
"offset": offset
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
response = self._request("GET", f"/monitor/{monitor_id}/events", params=params)
|
|
233
|
+
response_json = response.json()
|
|
234
|
+
|
|
235
|
+
# Handle DeepRails API response structure
|
|
236
|
+
if "data" in response_json and isinstance(response_json["data"], list):
|
|
237
|
+
return [MonitorEventResponse.parse_obj(event) for event in response_json["data"]]
|
|
238
|
+
else:
|
|
239
|
+
# Fallback if the response structure is unexpected
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
def get_monitors(
|
|
243
|
+
self,
|
|
244
|
+
*,
|
|
245
|
+
page: int = 1,
|
|
246
|
+
limit: int = 20,
|
|
247
|
+
search: Optional[List[str]] = None,
|
|
248
|
+
monitor_status: Optional[List[str]] = None,
|
|
249
|
+
date_from: Optional[str] = None,
|
|
250
|
+
date_to: Optional[str] = None,
|
|
251
|
+
sort_by: str = "created_at",
|
|
252
|
+
sort_order: str = "desc"
|
|
253
|
+
) -> MonitorListResponse:
|
|
254
|
+
"""
|
|
255
|
+
Get a paginated list of monitors with optional filtering.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
page: Page number for pagination (default: 1)
|
|
259
|
+
limit: Number of items per page (default: 20, max: 100)
|
|
260
|
+
search: Optional list of free-text search terms
|
|
261
|
+
monitor_status: Optional list of monitor statuses ("active", "inactive", "all")
|
|
262
|
+
date_from: Optional filter for monitors from this date (ISO format)
|
|
263
|
+
date_to: Optional filter for monitors to this date (ISO format)
|
|
264
|
+
sort_by: Field to sort by (default: "created_at")
|
|
265
|
+
sort_order: Sort order (default: "desc")
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
A MonitorListResponse object containing monitors, pagination info, and applied filters.
|
|
269
|
+
"""
|
|
270
|
+
params = {
|
|
271
|
+
"page": page,
|
|
272
|
+
"limit": limit,
|
|
273
|
+
"sort_by": sort_by,
|
|
274
|
+
"sort_order": sort_order,
|
|
275
|
+
"search": search,
|
|
276
|
+
"monitor_status": monitor_status,
|
|
277
|
+
"date_from": date_from,
|
|
278
|
+
"date_to": date_to
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
# Remove None values
|
|
282
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
283
|
+
|
|
284
|
+
response = self._request("GET", "/monitor", params=params)
|
|
285
|
+
return MonitorListResponse.parse_obj(response.json())
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from typing import List, Optional, Dict, Any
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EvaluationResponse(BaseModel):
|
|
7
|
+
"""Represents the response for an evaluation from the DeepRails API."""
|
|
8
|
+
eval_id: str
|
|
9
|
+
evaluation_status: str
|
|
10
|
+
guardrail_metrics: Optional[List[str]] = None
|
|
11
|
+
model_used: Optional[str] = None
|
|
12
|
+
run_mode: Optional[str] = None
|
|
13
|
+
model_input: Optional[Dict[str, Any]] = None
|
|
14
|
+
model_output: Optional[str] = None
|
|
15
|
+
estimated_cost: Optional[float] = None
|
|
16
|
+
input_tokens: Optional[int] = None
|
|
17
|
+
output_tokens: Optional[int] = None
|
|
18
|
+
nametag: Optional[str] = None
|
|
19
|
+
progress: Optional[int] = Field(None, ge=0, le=100)
|
|
20
|
+
start_timestamp: Optional[datetime] = None
|
|
21
|
+
completion_timestamp: Optional[datetime] = None
|
|
22
|
+
error_message: Optional[str] = None
|
|
23
|
+
error_timestamp: Optional[datetime] = None
|
|
24
|
+
evaluation_result: Optional[Dict[str, Any]] = None
|
|
25
|
+
evaluation_total_cost: Optional[float] = None
|
|
26
|
+
created_at: Optional[datetime] = None
|
|
27
|
+
modified_at: Optional[datetime] = None
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
extra = 'ignore'
|
|
31
|
+
|
|
32
|
+
class MonitorResponse(BaseModel):
|
|
33
|
+
"""Represents a monitor from the DeepRails API."""
|
|
34
|
+
monitor_id: str
|
|
35
|
+
user_id: str
|
|
36
|
+
name: str
|
|
37
|
+
description: Optional[str] = None
|
|
38
|
+
monitor_status: str
|
|
39
|
+
created_at: str
|
|
40
|
+
updated_at: str
|
|
41
|
+
|
|
42
|
+
class Config:
|
|
43
|
+
extra = 'ignore'
|
|
44
|
+
|
|
45
|
+
class MonitorEventCreate(BaseModel):
|
|
46
|
+
"""Model for creating a new monitor event."""
|
|
47
|
+
model_input: Dict[str, Any]
|
|
48
|
+
model_output: str
|
|
49
|
+
model_used: Optional[str] = None
|
|
50
|
+
run_mode: Optional[str] = None
|
|
51
|
+
guardrail_metrics: List[str]
|
|
52
|
+
nametag: Optional[str] = None
|
|
53
|
+
webhook: Optional[str] = None
|
|
54
|
+
|
|
55
|
+
class MonitorEventResponse(BaseModel):
|
|
56
|
+
"""Response model for a monitor event."""
|
|
57
|
+
event_id: str
|
|
58
|
+
monitor_id: str
|
|
59
|
+
evaluation_id: str
|
|
60
|
+
created_at: str
|
|
61
|
+
|
|
62
|
+
class Config:
|
|
63
|
+
extra = 'ignore'
|
|
64
|
+
|
|
65
|
+
class PaginationInfo(BaseModel):
|
|
66
|
+
"""Pagination information for list responses."""
|
|
67
|
+
page: int
|
|
68
|
+
limit: int
|
|
69
|
+
total_pages: int
|
|
70
|
+
total_count: int
|
|
71
|
+
has_next: bool
|
|
72
|
+
has_previous: bool
|
|
73
|
+
|
|
74
|
+
class MonitorFiltersApplied(BaseModel):
|
|
75
|
+
"""Information about which filters were applied to the monitor query."""
|
|
76
|
+
search: Optional[List[str]] = None
|
|
77
|
+
status: Optional[List[str]] = None
|
|
78
|
+
date_from: Optional[str] = None
|
|
79
|
+
date_to: Optional[str] = None
|
|
80
|
+
sort_by: Optional[str] = None
|
|
81
|
+
sort_order: Optional[str] = None
|
|
82
|
+
|
|
83
|
+
class MonitorWithEventCountResponse(MonitorResponse):
|
|
84
|
+
"""Monitor response with event count information."""
|
|
85
|
+
event_count: int
|
|
86
|
+
latest_event_modified_at: Optional[str] = None
|
|
87
|
+
|
|
88
|
+
class MonitorListResponse(BaseModel):
|
|
89
|
+
"""Response model for a paginated list of monitors."""
|
|
90
|
+
monitors: List[MonitorWithEventCountResponse]
|
|
91
|
+
pagination: PaginationInfo
|
|
92
|
+
filters_applied: MonitorFiltersApplied
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "deeprails"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Python SDK for interacting with the DeepRails API"
|
|
9
9
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
10
|
authors = [{name = "Neil Mate", email = "support@deeprails.ai"}]
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import httpx
|
|
2
|
-
from typing import List, Optional, Dict, Any
|
|
3
|
-
|
|
4
|
-
from .schemas import EvaluationResponse
|
|
5
|
-
from .exceptions import DeepRailsAPIError
|
|
6
|
-
|
|
7
|
-
class DeepRails:
|
|
8
|
-
"""
|
|
9
|
-
Python SDK client for the DeepRails API.
|
|
10
|
-
"""
|
|
11
|
-
def __init__(self, token: str, base_url: str = "https://api.deeprails.com"):
|
|
12
|
-
"""
|
|
13
|
-
Initializes the DeepRails client.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
token: Your DeepRails API key (starts with 'sk_').
|
|
17
|
-
base_url: The base URL of the DeepRails API.
|
|
18
|
-
"""
|
|
19
|
-
if not token:
|
|
20
|
-
raise ValueError("A valid DeepRails API token is required.")
|
|
21
|
-
|
|
22
|
-
self._base_url = base_url
|
|
23
|
-
self._headers = {
|
|
24
|
-
"Authorization": f"Bearer {token}",
|
|
25
|
-
"Content-Type": "application/json",
|
|
26
|
-
"User-Agent": "deeprails-python-sdk/0.2.0"
|
|
27
|
-
}
|
|
28
|
-
self._client = httpx.Client(base_url=self._base_url, headers=self._headers, timeout=30.0)
|
|
29
|
-
|
|
30
|
-
def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
|
|
31
|
-
"""Helper method to make requests and handle API errors."""
|
|
32
|
-
try:
|
|
33
|
-
response = self._client.request(method, endpoint, **kwargs)
|
|
34
|
-
response.raise_for_status()
|
|
35
|
-
return response
|
|
36
|
-
except httpx.HTTPStatusError as e:
|
|
37
|
-
error_detail = "No detail provided."
|
|
38
|
-
try:
|
|
39
|
-
error_detail = e.response.json().get("detail", error_detail)
|
|
40
|
-
except Exception:
|
|
41
|
-
error_detail = e.response.text
|
|
42
|
-
raise DeepRailsAPIError(status_code=e.response.status_code, error_detail=error_detail) from e
|
|
43
|
-
except httpx.RequestError as e:
|
|
44
|
-
raise DeepRailsAPIError(status_code=500, error_detail=f"Request failed: {e}") from e
|
|
45
|
-
|
|
46
|
-
def create_evaluation(
|
|
47
|
-
self,
|
|
48
|
-
*,
|
|
49
|
-
model_input: Dict[str, Any],
|
|
50
|
-
model_output: str,
|
|
51
|
-
model_used: Optional[str] = None,
|
|
52
|
-
run_mode: Optional[str] = "smart", # Set default to "smart"
|
|
53
|
-
guardrail_metrics: Optional[List[str]] = None,
|
|
54
|
-
nametag: Optional[str] = None,
|
|
55
|
-
webhook: Optional[str] = None
|
|
56
|
-
) -> EvaluationResponse:
|
|
57
|
-
"""
|
|
58
|
-
Creates a new evaluation and immediately processes it.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
model_input: A dictionary containing the inputs for the model.
|
|
62
|
-
Must contain a "user_prompt" key.
|
|
63
|
-
model_output: The response generated by the model you are evaluating.
|
|
64
|
-
model_used: The name or identifier of the model being evaluated.
|
|
65
|
-
run_mode: The evaluation mode (e.g., "smart", "dev").
|
|
66
|
-
guardrail_metrics: A list of metrics to evaluate.
|
|
67
|
-
nametag: A user-defined name or tag for the evaluation.
|
|
68
|
-
webhook: A URL to send a POST request to upon evaluation completion.
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
An EvaluationResponse object with the details of the created evaluation.
|
|
72
|
-
"""
|
|
73
|
-
if "user_prompt" not in model_input:
|
|
74
|
-
raise ValueError("`model_input` must contain a 'user_prompt' key.")
|
|
75
|
-
|
|
76
|
-
payload = {
|
|
77
|
-
"model_input": model_input,
|
|
78
|
-
"model_output": model_output,
|
|
79
|
-
"model_used": model_used,
|
|
80
|
-
"run_mode": run_mode,
|
|
81
|
-
"guardrail_metrics": guardrail_metrics,
|
|
82
|
-
"nametag": nametag,
|
|
83
|
-
"webhook": webhook,
|
|
84
|
-
}
|
|
85
|
-
json_payload = {k: v for k, v in payload.items() if v is not None}
|
|
86
|
-
|
|
87
|
-
response = self._request("POST", "/evaluate", json=json_payload)
|
|
88
|
-
return EvaluationResponse.parse_obj(response.json())
|
|
89
|
-
|
|
90
|
-
def get_evaluation(self, eval_id: str) -> EvaluationResponse:
|
|
91
|
-
"""
|
|
92
|
-
Retrieves the status and results of a specific evaluation.
|
|
93
|
-
|
|
94
|
-
Args:
|
|
95
|
-
eval_id: The unique identifier of the evaluation.
|
|
96
|
-
|
|
97
|
-
Returns:
|
|
98
|
-
An EvaluationResponse object with the full, up-to-date details of the evaluation.
|
|
99
|
-
"""
|
|
100
|
-
response = self._request("GET", f"/evaluate/{eval_id}")
|
|
101
|
-
return EvaluationResponse.parse_obj(response.json())
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional, Dict, Any
|
|
2
|
-
from pydantic import BaseModel, Field
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class EvaluationResponse(BaseModel):
|
|
7
|
-
"""Represents the response for an evaluation from the DeepRails API."""
|
|
8
|
-
eval_id: str
|
|
9
|
-
evaluation_status: str
|
|
10
|
-
guardrail_metrics: Optional[List[str]] = None
|
|
11
|
-
model_used: Optional[str] = None
|
|
12
|
-
run_mode: Optional[str] = None
|
|
13
|
-
model_input: Optional[Dict[str, Any]] = None
|
|
14
|
-
model_output: Optional[str] = None
|
|
15
|
-
estimated_cost: Optional[float] = None
|
|
16
|
-
input_tokens: Optional[int] = None
|
|
17
|
-
output_tokens: Optional[int] = None
|
|
18
|
-
nametag: Optional[str] = None
|
|
19
|
-
progress: Optional[int] = Field(None, ge=0, le=100)
|
|
20
|
-
start_timestamp: Optional[datetime] = None
|
|
21
|
-
completion_timestamp: Optional[datetime] = None
|
|
22
|
-
error_message: Optional[str] = None
|
|
23
|
-
error_timestamp: Optional[datetime] = None
|
|
24
|
-
evaluation_result: Optional[Dict[str, Any]] = None
|
|
25
|
-
evaluation_total_cost: Optional[float] = None
|
|
26
|
-
created_at: Optional[datetime] = None
|
|
27
|
-
modified_at: Optional[datetime] = None
|
|
28
|
-
|
|
29
|
-
class Config:
|
|
30
|
-
extra = 'ignore'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|