deeprails 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deeprails might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeprails
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Python SDK for interacting with the DeepRails API
5
5
  Project-URL: Homepage, https://deeprails.com
6
6
  Project-URL: Documentation, https://docs.deeprails.com
@@ -55,12 +55,17 @@ client = DeepRails(token="YOUR_API_KEY")
55
55
  evaluation = client.create_evaluation(
56
56
  model_input={"user_prompt": "Prompt used to generate completion"},
57
57
  model_output="Generated output",
58
- model_used="gpt-4o-mini (LLM used to generate completion)",
58
+ model_used="gpt-4o-mini",
59
59
  guardrail_metrics=["correctness", "completeness"]
60
60
  )
61
-
62
- # Print evaluation ID
63
61
  print(f"Evaluation created with ID: {evaluation.eval_id}")
62
+
63
+ # Create a monitor
64
+ monitor = client.create_monitor(
65
+ name="Production Assistant Monitor",
66
+ description="Tracking our production assistant quality"
67
+ )
68
+ print(f"Monitor created with ID: {monitor.monitor_id}")
64
69
  ```
65
70
 
66
71
  ## Features
@@ -69,6 +74,7 @@ print(f"Evaluation created with ID: {evaluation.eval_id}")
69
74
  - **Comprehensive Metrics**: Evaluate outputs on correctness, completeness, and more
70
75
  - **Real-time Progress**: Track evaluation progress in real-time
71
76
  - **Detailed Results**: Get detailed scores and rationales for each metric
77
+ - **Continuous Monitoring**: Create monitors to track AI system performance over time
72
78
 
73
79
  ## Authentication
74
80
 
@@ -81,14 +87,16 @@ token = os.environ.get("DEEPRAILS_API_KEY")
81
87
  client = DeepRails(token=token)
82
88
  ```
83
89
 
84
- ## Creating Evaluations
90
+ ## Evaluation Service
91
+
92
+ ### Creating Evaluations
85
93
 
86
94
  ```python
87
95
  try:
88
96
  evaluation = client.create_evaluation(
89
97
  model_input={"user_prompt": "Prompt used to generate completion"},
90
98
  model_output="Generated output",
91
- model_used="gpt-4o-mini (LLM used to generate completion)",
99
+ model_used="gpt-4o-mini",
92
100
  guardrail_metrics=["correctness", "completeness"]
93
101
  )
94
102
  print(f"ID: {evaluation.eval_id}")
@@ -98,7 +106,7 @@ except Exception as e:
98
106
  print(f"Error: {e}")
99
107
  ```
100
108
 
101
- ### Parameters
109
+ #### Parameters
102
110
 
103
111
  - `model_input`: Dictionary containing the prompt and any context (must include `user_prompt`)
104
112
  - `model_output`: The generated output to evaluate
@@ -108,7 +116,7 @@ except Exception as e:
108
116
  - `nametag`: (Optional) Custom identifier for this evaluation
109
117
  - `webhook`: (Optional) URL to receive completion notifications
110
118
 
111
- ## Retrieving Evaluations
119
+ ### Retrieving Evaluations
112
120
 
113
121
  ```python
114
122
  try:
@@ -126,13 +134,84 @@ except Exception as e:
126
134
  print(f"Error: {e}")
127
135
  ```
128
136
 
137
+ ## Monitor Service
138
+
139
+ ### Creating Monitors
140
+
141
+ ```python
142
+ try:
143
+ # Create a monitor
144
+ monitor = client.create_monitor(
145
+ name="Production Chat Assistant Monitor",
146
+ description="Monitoring our production chatbot responses"
147
+ )
148
+
149
+ print(f"Monitor created with ID: {monitor.monitor_id}")
150
+ except Exception as e:
151
+ print(f"Error: {e}")
152
+ ```
153
+
154
+ ### Logging Monitor Events
155
+
156
+ ```python
157
+ try:
158
+ # Add an event to the monitor
159
+ event = client.create_monitor_event(
160
+ monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
161
+ model_input={"user_prompt": "Tell me about renewable energy"},
162
+ model_output="Renewable energy comes from natural sources...",
163
+ model_used="gpt-4o-mini",
164
+ guardrail_metrics=["correctness", "completeness", "comprehensive_safety"]
165
+ )
166
+
167
+ print(f"Monitor event created with ID: {event.event_id}")
168
+ print(f"Associated evaluation ID: {event.evaluation_id}")
169
+ except Exception as e:
170
+ print(f"Error: {e}")
171
+ ```
172
+
173
+ ### Retrieving Monitor Data
174
+
175
+ ```python
176
+ try:
177
+ # Get monitor details
178
+ monitor = client.get_monitor("mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")
179
+ print(f"Monitor name: {monitor.name}")
180
+ print(f"Status: {monitor.monitor_status}")
181
+
182
+ # Get monitor events
183
+ events = client.get_monitor_events(
184
+ monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
185
+ limit=10
186
+ )
187
+
188
+ for event in events:
189
+ print(f"Event ID: {event.event_id}")
190
+ print(f"Evaluation ID: {event.evaluation_id}")
191
+
192
+ # List all monitors with filtering
193
+ monitors = client.get_monitors(
194
+ limit=5,
195
+ monitor_status=["active"],
196
+ sort_by="created_at",
197
+ sort_order="desc"
198
+ )
199
+
200
+ print(f"Total monitors: {monitors.pagination.total_count}")
201
+ for m in monitors.monitors:
202
+ print(f"{m.name}: {m.event_count} events")
203
+ except Exception as e:
204
+ print(f"Error: {e}")
205
+ ```
206
+
129
207
  ## Available Metrics
130
208
 
131
- - `correctness`: Evaluates factual accuracy of the output
132
- - `completeness`: Checks if the output addresses all aspects of the prompt
133
- - `harmfulness`: Detects potentially harmful content
134
- - `bias`: Identifies biased language or reasoning
135
- - And more...
209
+ - `correctness`: Measures factual accuracy by evaluating whether each claim in the output is true and verifiable.
210
+ - `completeness`: Assesses whether the response addresses all necessary parts of the prompt with sufficient detail and relevance.
211
+ - `instruction_adherence`: Checks whether the AI followed the explicit instructions in the prompt and system directives.
212
+ - `context_adherence`: Determines whether each factual claim is directly supported by the provided context.
213
+ - `ground_truth_adherence`: Measures how closely the output matches a known correct answer (gold standard).
214
+ - `comprehensive_safety`: Detects and categorizes safety violations across areas like PII, CBRN, hate speech, self-harm, and more.
136
215
 
137
216
  ## Error Handling
138
217
 
@@ -0,0 +1,198 @@
1
+ # DeepRails Python SDK
2
+
3
+ A lightweight, intuitive Python SDK for interacting with the DeepRails API. DeepRails helps you evaluate and improve AI-generated outputs through a comprehensive set of guardrail metrics.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install deeprails
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from deeprails import DeepRails
15
+
16
+ # Initialize with your API token
17
+ client = DeepRails(token="YOUR_API_KEY")
18
+
19
+ # Create an evaluation
20
+ evaluation = client.create_evaluation(
21
+ model_input={"user_prompt": "Prompt used to generate completion"},
22
+ model_output="Generated output",
23
+ model_used="gpt-4o-mini",
24
+ guardrail_metrics=["correctness", "completeness"]
25
+ )
26
+ print(f"Evaluation created with ID: {evaluation.eval_id}")
27
+
28
+ # Create a monitor
29
+ monitor = client.create_monitor(
30
+ name="Production Assistant Monitor",
31
+ description="Tracking our production assistant quality"
32
+ )
33
+ print(f"Monitor created with ID: {monitor.monitor_id}")
34
+ ```
35
+
36
+ ## Features
37
+
38
+ - **Simple API**: Just a few lines of code to integrate evaluation into your workflow
39
+ - **Comprehensive Metrics**: Evaluate outputs on correctness, completeness, and more
40
+ - **Real-time Progress**: Track evaluation progress in real-time
41
+ - **Detailed Results**: Get detailed scores and rationales for each metric
42
+ - **Continuous Monitoring**: Create monitors to track AI system performance over time
43
+
44
+ ## Authentication
45
+
46
+ All API requests require authentication using your DeepRails API key. Your API key is a sensitive credential that should be kept secure.
47
+
48
+ ```python
49
+ # Best practice: Load token from environment variable
50
+ import os
51
+ token = os.environ.get("DEEPRAILS_API_KEY")
52
+ client = DeepRails(token=token)
53
+ ```
54
+
55
+ ## Evaluation Service
56
+
57
+ ### Creating Evaluations
58
+
59
+ ```python
60
+ try:
61
+ evaluation = client.create_evaluation(
62
+ model_input={"user_prompt": "Prompt used to generate completion"},
63
+ model_output="Generated output",
64
+ model_used="gpt-4o-mini",
65
+ guardrail_metrics=["correctness", "completeness"]
66
+ )
67
+ print(f"ID: {evaluation.eval_id}")
68
+ print(f"Status: {evaluation.evaluation_status}")
69
+ print(f"Progress: {evaluation.progress}%")
70
+ except Exception as e:
71
+ print(f"Error: {e}")
72
+ ```
73
+
74
+ #### Parameters
75
+
76
+ - `model_input`: Dictionary containing the prompt and any context (must include `user_prompt`)
77
+ - `model_output`: The generated output to evaluate
78
+ - `model_used`: (Optional) The model that generated the output
79
+ - `run_mode`: (Optional) Evaluation run mode - defaults to "smart"
80
+ - `guardrail_metrics`: (Optional) List of metrics to evaluate
81
+ - `nametag`: (Optional) Custom identifier for this evaluation
82
+ - `webhook`: (Optional) URL to receive completion notifications
83
+
84
+ ### Retrieving Evaluations
85
+
86
+ ```python
87
+ try:
88
+ eval_id = "eval-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
89
+ evaluation = client.get_evaluation(eval_id)
90
+
91
+ print(f"Status: {evaluation.evaluation_status}")
92
+
93
+ if evaluation.evaluation_result:
94
+ print("\nResults:")
95
+ for metric, result in evaluation.evaluation_result.items():
96
+ score = result.get('score', 'N/A')
97
+ print(f" {metric}: {score}")
98
+ except Exception as e:
99
+ print(f"Error: {e}")
100
+ ```
101
+
102
+ ## Monitor Service
103
+
104
+ ### Creating Monitors
105
+
106
+ ```python
107
+ try:
108
+ # Create a monitor
109
+ monitor = client.create_monitor(
110
+ name="Production Chat Assistant Monitor",
111
+ description="Monitoring our production chatbot responses"
112
+ )
113
+
114
+ print(f"Monitor created with ID: {monitor.monitor_id}")
115
+ except Exception as e:
116
+ print(f"Error: {e}")
117
+ ```
118
+
119
+ ### Logging Monitor Events
120
+
121
+ ```python
122
+ try:
123
+ # Add an event to the monitor
124
+ event = client.create_monitor_event(
125
+ monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
126
+ model_input={"user_prompt": "Tell me about renewable energy"},
127
+ model_output="Renewable energy comes from natural sources...",
128
+ model_used="gpt-4o-mini",
129
+ guardrail_metrics=["correctness", "completeness", "comprehensive_safety"]
130
+ )
131
+
132
+ print(f"Monitor event created with ID: {event.event_id}")
133
+ print(f"Associated evaluation ID: {event.evaluation_id}")
134
+ except Exception as e:
135
+ print(f"Error: {e}")
136
+ ```
137
+
138
+ ### Retrieving Monitor Data
139
+
140
+ ```python
141
+ try:
142
+ # Get monitor details
143
+ monitor = client.get_monitor("mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")
144
+ print(f"Monitor name: {monitor.name}")
145
+ print(f"Status: {monitor.monitor_status}")
146
+
147
+ # Get monitor events
148
+ events = client.get_monitor_events(
149
+ monitor_id="mon-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
150
+ limit=10
151
+ )
152
+
153
+ for event in events:
154
+ print(f"Event ID: {event.event_id}")
155
+ print(f"Evaluation ID: {event.evaluation_id}")
156
+
157
+ # List all monitors with filtering
158
+ monitors = client.get_monitors(
159
+ limit=5,
160
+ monitor_status=["active"],
161
+ sort_by="created_at",
162
+ sort_order="desc"
163
+ )
164
+
165
+ print(f"Total monitors: {monitors.pagination.total_count}")
166
+ for m in monitors.monitors:
167
+ print(f"{m.name}: {m.event_count} events")
168
+ except Exception as e:
169
+ print(f"Error: {e}")
170
+ ```
171
+
172
+ ## Available Metrics
173
+
174
+ - `correctness`: Measures factual accuracy by evaluating whether each claim in the output is true and verifiable.
175
+ - `completeness`: Assesses whether the response addresses all necessary parts of the prompt with sufficient detail and relevance.
176
+ - `instruction_adherence`: Checks whether the AI followed the explicit instructions in the prompt and system directives.
177
+ - `context_adherence`: Determines whether each factual claim is directly supported by the provided context.
178
+ - `ground_truth_adherence`: Measures how closely the output matches a known correct answer (gold standard).
179
+ - `comprehensive_safety`: Detects and categorizes safety violations across areas like PII, CBRN, hate speech, self-harm, and more.
180
+
181
+ ## Error Handling
182
+
183
+ The SDK throws `DeepRailsAPIError` for API-related errors, with status code and detailed message.
184
+
185
+ ```python
186
+ from deeprails import DeepRailsAPIError
187
+
188
+ try:
189
+ # SDK operations
190
+ except DeepRailsAPIError as e:
191
+ print(f"API Error: {e.status_code} - {e.error_detail}")
192
+ except Exception as e:
193
+ print(f"Unexpected error: {e}")
194
+ ```
195
+
196
+ ## Support
197
+
198
+ For questions or support, please contact support@deeprails.ai.
@@ -0,0 +1,285 @@
1
+ import httpx
2
+ from typing import List, Optional, Dict, Any
3
+
4
+ from .schemas import EvaluationResponse
5
+ from .exceptions import DeepRailsAPIError
6
+
7
+ class DeepRails:
8
+ """
9
+ Python SDK client for the DeepRails API.
10
+ """
11
+
12
+ def __init__(self, token: str, base_url: str = "https://api.deeprails.com"):
13
+ """
14
+ Initializes the DeepRails client.
15
+
16
+ Args:
17
+ token: Your DeepRails API key (starts with 'sk_').
18
+ base_url: The base URL of the DeepRails API.
19
+ """
20
+ if not token:
21
+ raise ValueError("A valid DeepRails API token is required.")
22
+
23
+ self._base_url = base_url
24
+ self._headers = {
25
+ "Authorization": f"Bearer {token}",
26
+ "Content-Type": "application/json",
27
+ "User-Agent": "deeprails-python-sdk/0.3.0"
28
+ }
29
+ self._client = httpx.Client(base_url=self._base_url, headers=self._headers, timeout=30.0)
30
+
31
+
32
+ def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
33
+ """Helper method to make requests and handle API errors."""
34
+ try:
35
+ response = self._client.request(method, endpoint, **kwargs)
36
+ response.raise_for_status()
37
+ return response
38
+ except httpx.HTTPStatusError as e:
39
+ error_detail = "No detail provided."
40
+ try:
41
+ error_detail = e.response.json().get("detail", error_detail)
42
+ except Exception:
43
+ error_detail = e.response.text
44
+ raise DeepRailsAPIError(status_code=e.response.status_code, error_detail=error_detail) from e
45
+ except httpx.RequestError as e:
46
+ raise DeepRailsAPIError(status_code=500, error_detail=f"Request failed: {e}") from e
47
+
48
+
49
+ def create_evaluation(
50
+ self,
51
+ *,
52
+ model_input: Dict[str, Any],
53
+ model_output: str,
54
+ model_used: Optional[str] = None,
55
+ run_mode: Optional[str] = "smart", # Set default to "smart"
56
+ guardrail_metrics: Optional[List[str]] = None,
57
+ nametag: Optional[str] = None,
58
+ webhook: Optional[str] = None
59
+ ) -> EvaluationResponse:
60
+ """
61
+ Creates a new evaluation and immediately processes it.
62
+
63
+ Args:
64
+ model_input: A dictionary containing the inputs for the model.
65
+ Must contain a "user_prompt" key.
66
+ model_output: The response generated by the model you are evaluating.
67
+ model_used: The name or identifier of the model being evaluated.
68
+ run_mode: The evaluation mode (e.g., "smart", "dev").
69
+ guardrail_metrics: A list of metrics to evaluate.
70
+ nametag: A user-defined name or tag for the evaluation.
71
+ webhook: A URL to send a POST request to upon evaluation completion.
72
+
73
+ Returns:
74
+ An EvaluationResponse object with the details of the created evaluation.
75
+ """
76
+ if "user_prompt" not in model_input:
77
+ raise ValueError("`model_input` must contain a 'user_prompt' key.")
78
+
79
+ payload = {
80
+ "model_input": model_input,
81
+ "model_output": model_output,
82
+ "model_used": model_used,
83
+ "run_mode": run_mode,
84
+ "guardrail_metrics": guardrail_metrics,
85
+ "nametag": nametag,
86
+ "webhook": webhook,
87
+ }
88
+ json_payload = {k: v for k, v in payload.items() if v is not None}
89
+
90
+ response = self._request("POST", "/evaluate", json=json_payload)
91
+ return EvaluationResponse.parse_obj(response.json())
92
+
93
+
94
+ def get_evaluation(self, eval_id: str) -> EvaluationResponse:
95
+ """
96
+ Retrieves the status and results of a specific evaluation.
97
+
98
+ Args:
99
+ eval_id: The unique identifier of the evaluation.
100
+
101
+ Returns:
102
+ An EvaluationResponse object with the full, up-to-date details of the evaluation.
103
+ """
104
+ response = self._request("GET", f"/evaluate/{eval_id}")
105
+ return EvaluationResponse.parse_obj(response.json())
106
+
107
+
108
+ def create_monitor(
109
+ self,
110
+ *,
111
+ name: str,
112
+ description: Optional[str] = None
113
+ ) -> MonitorResponse:
114
+ """
115
+ Creates a new monitor for tracking AI responses.
116
+
117
+ Args:
118
+ name: A name for the monitor.
119
+ description: Optional description of the monitor's purpose.
120
+
121
+ Returns:
122
+ A MonitorResponse object with the details of the created monitor.
123
+ """
124
+ payload = {
125
+ "name": name,
126
+ "description": description
127
+ }
128
+
129
+ # Remove None values
130
+ json_payload = {k: v for k, v in payload.items() if v is not None}
131
+
132
+ response = self._request("POST", "/monitor", json=json_payload)
133
+ response_json = response.json()
134
+
135
+ # Handle DeepRails API response structure
136
+ if "data" in response_json:
137
+ return MonitorResponse.parse_obj(response_json["data"])
138
+ else:
139
+ return MonitorResponse.parse_obj(response_json)
140
+
141
+ def get_monitor(self, monitor_id: str) -> MonitorResponse:
142
+ """
143
+ Get details of a specific monitor.
144
+
145
+ Args:
146
+ monitor_id: The ID of the monitor to retrieve.
147
+
148
+ Returns:
149
+ A MonitorResponse object with the monitor details.
150
+ """
151
+ response = self._request("GET", f"/monitor/{monitor_id}")
152
+ response_json = response.json()
153
+
154
+ # Handle DeepRails API response structure
155
+ if "data" in response_json:
156
+ return MonitorResponse.parse_obj(response_json["data"])
157
+ else:
158
+ return MonitorResponse.parse_obj(response_json)
159
+
160
+ def create_monitor_event(
161
+ self,
162
+ *,
163
+ monitor_id: str,
164
+ model_input: Dict[str, Any],
165
+ model_output: str,
166
+ guardrail_metrics: List[str],
167
+ model_used: Optional[str] = None,
168
+ run_mode: Optional[str] = None,
169
+ nametag: Optional[str] = None,
170
+ webhook: Optional[str] = None
171
+ ) -> MonitorEventResponse:
172
+ """
173
+ Creates a new event for a monitor.
174
+
175
+ Args:
176
+ monitor_id: The ID of the monitor to create an event for.
177
+ model_input: A dictionary containing the inputs for the model.
178
+ model_output: The response generated by the model you are evaluating.
179
+ guardrail_metrics: A list of metrics to evaluate.
180
+ model_used: The name or identifier of the model being evaluated.
181
+ run_mode: The evaluation mode (e.g., "smart", "dev").
182
+ nametag: A user-defined name or tag for the event.
183
+ webhook: A URL to send a POST request to upon evaluation completion.
184
+
185
+ Returns:
186
+ A MonitorEventResponse object with the details of the created event.
187
+ """
188
+ payload = {
189
+ "model_input": model_input,
190
+ "model_output": model_output,
191
+ "model_used": model_used,
192
+ "run_mode": run_mode,
193
+ "guardrail_metrics": guardrail_metrics,
194
+ "nametag": nametag,
195
+ "webhook": webhook,
196
+ }
197
+
198
+ # Remove None values
199
+ json_payload = {k: v for k, v in payload.items() if v is not None}
200
+
201
+ response = self._request("POST", f"/monitor/{monitor_id}/events", json=json_payload)
202
+ response_json = response.json()
203
+
204
+ # Handle DeepRails API response structure
205
+ if "data" in response_json:
206
+ return MonitorEventResponse.parse_obj(response_json["data"])
207
+ else:
208
+ return MonitorEventResponse.parse_obj(response_json)
209
+
210
+ def get_monitor_events(
211
+ self,
212
+ monitor_id: str,
213
+ limit: int = 10,
214
+ offset: int = 0
215
+ ) -> List[MonitorEventResponse]:
216
+ """
217
+ Retrieves events for a specific monitor.
218
+
219
+ Args:
220
+ monitor_id: The ID of the monitor to get events for.
221
+ limit: Maximum number of events to return (default: 10).
222
+ offset: Offset for pagination (default: 0).
223
+
224
+ Returns:
225
+ A list of MonitorEventResponse objects with details of the monitor events.
226
+ """
227
+ params = {
228
+ "limit": limit,
229
+ "offset": offset
230
+ }
231
+
232
+ response = self._request("GET", f"/monitor/{monitor_id}/events", params=params)
233
+ response_json = response.json()
234
+
235
+ # Handle DeepRails API response structure
236
+ if "data" in response_json and isinstance(response_json["data"], list):
237
+ return [MonitorEventResponse.parse_obj(event) for event in response_json["data"]]
238
+ else:
239
+ # Fallback if the response structure is unexpected
240
+ return []
241
+
242
+ def get_monitors(
243
+ self,
244
+ *,
245
+ page: int = 1,
246
+ limit: int = 20,
247
+ search: Optional[List[str]] = None,
248
+ monitor_status: Optional[List[str]] = None,
249
+ date_from: Optional[str] = None,
250
+ date_to: Optional[str] = None,
251
+ sort_by: str = "created_at",
252
+ sort_order: str = "desc"
253
+ ) -> MonitorListResponse:
254
+ """
255
+ Get a paginated list of monitors with optional filtering.
256
+
257
+ Args:
258
+ page: Page number for pagination (default: 1)
259
+ limit: Number of items per page (default: 20, max: 100)
260
+ search: Optional list of free-text search terms
261
+ monitor_status: Optional list of monitor statuses ("active", "inactive", "all")
262
+ date_from: Optional filter for monitors from this date (ISO format)
263
+ date_to: Optional filter for monitors to this date (ISO format)
264
+ sort_by: Field to sort by (default: "created_at")
265
+ sort_order: Sort order (default: "desc")
266
+
267
+ Returns:
268
+ A MonitorListResponse object containing monitors, pagination info, and applied filters.
269
+ """
270
+ params = {
271
+ "page": page,
272
+ "limit": limit,
273
+ "sort_by": sort_by,
274
+ "sort_order": sort_order,
275
+ "search": search,
276
+ "monitor_status": monitor_status,
277
+ "date_from": date_from,
278
+ "date_to": date_to
279
+ }
280
+
281
+ # Remove None values
282
+ params = {k: v for k, v in params.items() if v is not None}
283
+
284
+ response = self._request("GET", "/monitor", params=params)
285
+ return MonitorListResponse.parse_obj(response.json())
@@ -0,0 +1,92 @@
1
+ from typing import List, Optional, Dict, Any
2
+ from pydantic import BaseModel, Field
3
+ from datetime import datetime
4
+
5
+
6
+ class EvaluationResponse(BaseModel):
7
+ """Represents the response for an evaluation from the DeepRails API."""
8
+ eval_id: str
9
+ evaluation_status: str
10
+ guardrail_metrics: Optional[List[str]] = None
11
+ model_used: Optional[str] = None
12
+ run_mode: Optional[str] = None
13
+ model_input: Optional[Dict[str, Any]] = None
14
+ model_output: Optional[str] = None
15
+ estimated_cost: Optional[float] = None
16
+ input_tokens: Optional[int] = None
17
+ output_tokens: Optional[int] = None
18
+ nametag: Optional[str] = None
19
+ progress: Optional[int] = Field(None, ge=0, le=100)
20
+ start_timestamp: Optional[datetime] = None
21
+ completion_timestamp: Optional[datetime] = None
22
+ error_message: Optional[str] = None
23
+ error_timestamp: Optional[datetime] = None
24
+ evaluation_result: Optional[Dict[str, Any]] = None
25
+ evaluation_total_cost: Optional[float] = None
26
+ created_at: Optional[datetime] = None
27
+ modified_at: Optional[datetime] = None
28
+
29
+ class Config:
30
+ extra = 'ignore'
31
+
32
+ class MonitorResponse(BaseModel):
33
+ """Represents a monitor from the DeepRails API."""
34
+ monitor_id: str
35
+ user_id: str
36
+ name: str
37
+ description: Optional[str] = None
38
+ monitor_status: str
39
+ created_at: str
40
+ updated_at: str
41
+
42
+ class Config:
43
+ extra = 'ignore'
44
+
45
+ class MonitorEventCreate(BaseModel):
46
+ """Model for creating a new monitor event."""
47
+ model_input: Dict[str, Any]
48
+ model_output: str
49
+ model_used: Optional[str] = None
50
+ run_mode: Optional[str] = None
51
+ guardrail_metrics: List[str]
52
+ nametag: Optional[str] = None
53
+ webhook: Optional[str] = None
54
+
55
+ class MonitorEventResponse(BaseModel):
56
+ """Response model for a monitor event."""
57
+ event_id: str
58
+ monitor_id: str
59
+ evaluation_id: str
60
+ created_at: str
61
+
62
+ class Config:
63
+ extra = 'ignore'
64
+
65
+ class PaginationInfo(BaseModel):
66
+ """Pagination information for list responses."""
67
+ page: int
68
+ limit: int
69
+ total_pages: int
70
+ total_count: int
71
+ has_next: bool
72
+ has_previous: bool
73
+
74
+ class MonitorFiltersApplied(BaseModel):
75
+ """Information about which filters were applied to the monitor query."""
76
+ search: Optional[List[str]] = None
77
+ status: Optional[List[str]] = None
78
+ date_from: Optional[str] = None
79
+ date_to: Optional[str] = None
80
+ sort_by: Optional[str] = None
81
+ sort_order: Optional[str] = None
82
+
83
+ class MonitorWithEventCountResponse(MonitorResponse):
84
+ """Monitor response with event count information."""
85
+ event_count: int
86
+ latest_event_modified_at: Optional[str] = None
87
+
88
+ class MonitorListResponse(BaseModel):
89
+ """Response model for a paginated list of monitors."""
90
+ monitors: List[MonitorWithEventCountResponse]
91
+ pagination: PaginationInfo
92
+ filters_applied: MonitorFiltersApplied
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "deeprails"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Python SDK for interacting with the DeepRails API"
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  authors = [{name = "Neil Mate", email = "support@deeprails.ai"}]
deeprails-0.2.0/README.md DELETED
@@ -1,119 +0,0 @@
1
- # DeepRails Python SDK
2
-
3
- A lightweight, intuitive Python SDK for interacting with the DeepRails API. DeepRails helps you evaluate and improve AI-generated outputs through a comprehensive set of guardrail metrics.
4
-
5
- ## Installation
6
-
7
- ```bash
8
- pip install deeprails
9
- ```
10
-
11
- ## Quick Start
12
-
13
- ```python
14
- from deeprails import DeepRails
15
-
16
- # Initialize with your API token
17
- client = DeepRails(token="YOUR_API_KEY")
18
-
19
- # Create an evaluation
20
- evaluation = client.create_evaluation(
21
- model_input={"user_prompt": "Prompt used to generate completion"},
22
- model_output="Generated output",
23
- model_used="gpt-4o-mini (LLM used to generate completion)",
24
- guardrail_metrics=["correctness", "completeness"]
25
- )
26
-
27
- # Print evaluation ID
28
- print(f"Evaluation created with ID: {evaluation.eval_id}")
29
- ```
30
-
31
- ## Features
32
-
33
- - **Simple API**: Just a few lines of code to integrate evaluation into your workflow
34
- - **Comprehensive Metrics**: Evaluate outputs on correctness, completeness, and more
35
- - **Real-time Progress**: Track evaluation progress in real-time
36
- - **Detailed Results**: Get detailed scores and rationales for each metric
37
-
38
- ## Authentication
39
-
40
- All API requests require authentication using your DeepRails API key. Your API key is a sensitive credential that should be kept secure.
41
-
42
- ```python
43
- # Best practice: Load token from environment variable
44
- import os
45
- token = os.environ.get("DEEPRAILS_API_KEY")
46
- client = DeepRails(token=token)
47
- ```
48
-
49
- ## Creating Evaluations
50
-
51
- ```python
52
- try:
53
- evaluation = client.create_evaluation(
54
- model_input={"user_prompt": "Prompt used to generate completion"},
55
- model_output="Generated output",
56
- model_used="gpt-4o-mini (LLM used to generate completion)",
57
- guardrail_metrics=["correctness", "completeness"]
58
- )
59
- print(f"ID: {evaluation.eval_id}")
60
- print(f"Status: {evaluation.evaluation_status}")
61
- print(f"Progress: {evaluation.progress}%")
62
- except Exception as e:
63
- print(f"Error: {e}")
64
- ```
65
-
66
- ### Parameters
67
-
68
- - `model_input`: Dictionary containing the prompt and any context (must include `user_prompt`)
69
- - `model_output`: The generated output to evaluate
70
- - `model_used`: (Optional) The model that generated the output
71
- - `run_mode`: (Optional) Evaluation run mode - defaults to "smart"
72
- - `guardrail_metrics`: (Optional) List of metrics to evaluate
73
- - `nametag`: (Optional) Custom identifier for this evaluation
74
- - `webhook`: (Optional) URL to receive completion notifications
75
-
76
- ## Retrieving Evaluations
77
-
78
- ```python
79
- try:
80
- eval_id = "eval-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
81
- evaluation = client.get_evaluation(eval_id)
82
-
83
- print(f"Status: {evaluation.evaluation_status}")
84
-
85
- if evaluation.evaluation_result:
86
- print("\nResults:")
87
- for metric, result in evaluation.evaluation_result.items():
88
- score = result.get('score', 'N/A')
89
- print(f" {metric}: {score}")
90
- except Exception as e:
91
- print(f"Error: {e}")
92
- ```
93
-
94
- ## Available Metrics
95
-
96
- - `correctness`: Evaluates factual accuracy of the output
97
- - `completeness`: Checks if the output addresses all aspects of the prompt
98
- - `harmfulness`: Detects potentially harmful content
99
- - `bias`: Identifies biased language or reasoning
100
- - And more...
101
-
102
- ## Error Handling
103
-
104
- The SDK throws `DeepRailsAPIError` for API-related errors, with status code and detailed message.
105
-
106
- ```python
107
- from deeprails import DeepRailsAPIError
108
-
109
- try:
110
- # SDK operations
111
- except DeepRailsAPIError as e:
112
- print(f"API Error: {e.status_code} - {e.error_detail}")
113
- except Exception as e:
114
- print(f"Unexpected error: {e}")
115
- ```
116
-
117
- ## Support
118
-
119
- For questions or support, please contact support@deeprails.ai.
@@ -1,101 +0,0 @@
1
- import httpx
2
- from typing import List, Optional, Dict, Any
3
-
4
- from .schemas import EvaluationResponse
5
- from .exceptions import DeepRailsAPIError
6
-
7
- class DeepRails:
8
- """
9
- Python SDK client for the DeepRails API.
10
- """
11
- def __init__(self, token: str, base_url: str = "https://api.deeprails.com"):
12
- """
13
- Initializes the DeepRails client.
14
-
15
- Args:
16
- token: Your DeepRails API key (starts with 'sk_').
17
- base_url: The base URL of the DeepRails API.
18
- """
19
- if not token:
20
- raise ValueError("A valid DeepRails API token is required.")
21
-
22
- self._base_url = base_url
23
- self._headers = {
24
- "Authorization": f"Bearer {token}",
25
- "Content-Type": "application/json",
26
- "User-Agent": "deeprails-python-sdk/0.2.0"
27
- }
28
- self._client = httpx.Client(base_url=self._base_url, headers=self._headers, timeout=30.0)
29
-
30
- def _request(self, method: str, endpoint: str, **kwargs) -> httpx.Response:
31
- """Helper method to make requests and handle API errors."""
32
- try:
33
- response = self._client.request(method, endpoint, **kwargs)
34
- response.raise_for_status()
35
- return response
36
- except httpx.HTTPStatusError as e:
37
- error_detail = "No detail provided."
38
- try:
39
- error_detail = e.response.json().get("detail", error_detail)
40
- except Exception:
41
- error_detail = e.response.text
42
- raise DeepRailsAPIError(status_code=e.response.status_code, error_detail=error_detail) from e
43
- except httpx.RequestError as e:
44
- raise DeepRailsAPIError(status_code=500, error_detail=f"Request failed: {e}") from e
45
-
46
- def create_evaluation(
47
- self,
48
- *,
49
- model_input: Dict[str, Any],
50
- model_output: str,
51
- model_used: Optional[str] = None,
52
- run_mode: Optional[str] = "smart", # Set default to "smart"
53
- guardrail_metrics: Optional[List[str]] = None,
54
- nametag: Optional[str] = None,
55
- webhook: Optional[str] = None
56
- ) -> EvaluationResponse:
57
- """
58
- Creates a new evaluation and immediately processes it.
59
-
60
- Args:
61
- model_input: A dictionary containing the inputs for the model.
62
- Must contain a "user_prompt" key.
63
- model_output: The response generated by the model you are evaluating.
64
- model_used: The name or identifier of the model being evaluated.
65
- run_mode: The evaluation mode (e.g., "smart", "dev").
66
- guardrail_metrics: A list of metrics to evaluate.
67
- nametag: A user-defined name or tag for the evaluation.
68
- webhook: A URL to send a POST request to upon evaluation completion.
69
-
70
- Returns:
71
- An EvaluationResponse object with the details of the created evaluation.
72
- """
73
- if "user_prompt" not in model_input:
74
- raise ValueError("`model_input` must contain a 'user_prompt' key.")
75
-
76
- payload = {
77
- "model_input": model_input,
78
- "model_output": model_output,
79
- "model_used": model_used,
80
- "run_mode": run_mode,
81
- "guardrail_metrics": guardrail_metrics,
82
- "nametag": nametag,
83
- "webhook": webhook,
84
- }
85
- json_payload = {k: v for k, v in payload.items() if v is not None}
86
-
87
- response = self._request("POST", "/evaluate", json=json_payload)
88
- return EvaluationResponse.parse_obj(response.json())
89
-
90
- def get_evaluation(self, eval_id: str) -> EvaluationResponse:
91
- """
92
- Retrieves the status and results of a specific evaluation.
93
-
94
- Args:
95
- eval_id: The unique identifier of the evaluation.
96
-
97
- Returns:
98
- An EvaluationResponse object with the full, up-to-date details of the evaluation.
99
- """
100
- response = self._request("GET", f"/evaluate/{eval_id}")
101
- return EvaluationResponse.parse_obj(response.json())
@@ -1,30 +0,0 @@
1
- from typing import List, Optional, Dict, Any
2
- from pydantic import BaseModel, Field
3
- from datetime import datetime
4
-
5
-
6
- class EvaluationResponse(BaseModel):
7
- """Represents the response for an evaluation from the DeepRails API."""
8
- eval_id: str
9
- evaluation_status: str
10
- guardrail_metrics: Optional[List[str]] = None
11
- model_used: Optional[str] = None
12
- run_mode: Optional[str] = None
13
- model_input: Optional[Dict[str, Any]] = None
14
- model_output: Optional[str] = None
15
- estimated_cost: Optional[float] = None
16
- input_tokens: Optional[int] = None
17
- output_tokens: Optional[int] = None
18
- nametag: Optional[str] = None
19
- progress: Optional[int] = Field(None, ge=0, le=100)
20
- start_timestamp: Optional[datetime] = None
21
- completion_timestamp: Optional[datetime] = None
22
- error_message: Optional[str] = None
23
- error_timestamp: Optional[datetime] = None
24
- evaluation_result: Optional[Dict[str, Any]] = None
25
- evaluation_total_cost: Optional[float] = None
26
- created_at: Optional[datetime] = None
27
- modified_at: Optional[datetime] = None
28
-
29
- class Config:
30
- extra = 'ignore'
File without changes