judgeval 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/common/utils.py CHANGED
@@ -103,7 +103,7 @@ def validate_api_key(judgment_api_key: str):
103
103
  Validates that the user api key is valid
104
104
  """
105
105
  response = requests.post(
106
- f"{ROOT_API}/validate_api_key/",
106
+ f"{ROOT_API}/auth/validate_api_key/",
107
107
  headers={
108
108
  "Content-Type": "application/json",
109
109
  "Authorization": f"Bearer {judgment_api_key}",
judgeval/constants.py CHANGED
@@ -58,8 +58,13 @@ JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
58
58
  JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
59
59
  JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
60
60
  JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
61
+ JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
62
+ JUDGMENT_TRACES_USAGE_CHECK_API_URL = f"{ROOT_API}/traces/usage/check/"
63
+ JUDGMENT_TRACES_USAGE_UPDATE_API_URL = f"{ROOT_API}/traces/usage/update/"
61
64
  JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
62
65
  JUDGMENT_TRACES_ADD_ANNOTATION_API_URL = f"{ROOT_API}/traces/add_annotation/"
66
+ JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
67
+ JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = f"{ROOT_API}/traces/evaluation_runs/batch/"
63
68
  JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
64
69
  JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
65
70
  # RabbitMQ
judgeval/data/trace.py CHANGED
@@ -54,7 +54,8 @@ class TraceSpan(BaseModel):
54
54
  "has_evaluation": self.has_evaluation,
55
55
  "agent_name": self.agent_name,
56
56
  "state_before": self.state_before,
57
- "state_after": self.state_after
57
+ "state_after": self.state_after,
58
+ "additional_metadata": self._serialize_value(self.additional_metadata)
58
59
  }
59
60
 
60
61
  def print_span(self):
@@ -3,9 +3,11 @@ from uuid import UUID
3
3
  import time
4
4
  import uuid
5
5
  import contextvars # <--- Import contextvars
6
+ from datetime import datetime
6
7
 
7
- from judgeval.common.tracer import TraceClient, TraceSpan, Tracer, SpanType, EvaluationConfig
8
+ from judgeval.common.tracer import TraceClient, TraceSpan, Tracer, SpanType, EvaluationConfig, cost_per_token
8
9
  from judgeval.data import Example # Import Example
10
+ from judgeval.data.trace import TraceUsage
9
11
 
10
12
  from langchain_core.callbacks import BaseCallbackHandler
11
13
  from langchain_core.agents import AgentAction, AgentFinish
@@ -36,18 +38,48 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
36
38
  def __init__(self, tracer: Tracer):
37
39
 
38
40
  self.tracer = tracer
41
+ # Initialize tracking/logging variables (preserved across resets)
42
+ self.executed_nodes: List[str] = []
43
+ self.executed_tools: List[str] = []
44
+ self.executed_node_tools: List[str] = []
45
+ self.traces: List[Dict[str, Any]] = []
46
+ # Initialize execution state (reset between runs)
47
+ self._reset_state()
48
+ # --- END NEW __init__ ---
49
+
50
+ def _reset_state(self):
51
+ """Reset only the critical execution state for reuse across multiple executions"""
52
+ # Reset core execution state that must be cleared between runs
39
53
  self._trace_client: Optional[TraceClient] = None
40
54
  self._run_id_to_span_id: Dict[UUID, str] = {}
41
55
  self._span_id_to_start_time: Dict[str, float] = {}
42
56
  self._span_id_to_depth: Dict[str, int] = {}
43
57
  self._root_run_id: Optional[UUID] = None
44
- self._trace_saved: bool = False # Flag to prevent actions after trace is saved
45
-
46
- self.executed_nodes: List[str] = [] # These last four members are only appended to and never accessed; can probably be removed but still might be useful for future reference?
58
+ self._trace_saved: bool = False
59
+ self.span_id_to_token: Dict[str, Any] = {}
60
+ self.trace_id_to_token: Dict[str, Any] = {}
61
+
62
+ # Add timestamp to track when we last reset
63
+ self._last_reset_time: float = time.time()
64
+
65
+ # Preserve tracking/logging variables across executions:
66
+ # - self.executed_nodes: List[str] = [] # Keep as running log
67
+ # - self.executed_tools: List[str] = [] # Keep as running log
68
+ # - self.executed_node_tools: List[str] = [] # Keep as running log
69
+ # - self.traces: List[Dict[str, Any]] = [] # Keep for collecting multiple traces
70
+
71
+ def reset(self):
72
+ """Public method to manually reset handler execution state for reuse"""
73
+ self._reset_state()
74
+
75
+ def reset_all(self):
76
+ """Public method to reset ALL handler state including tracking/logging data"""
77
+ self._reset_state()
78
+ # Also reset tracking/logging variables
79
+ self.executed_nodes: List[str] = []
47
80
  self.executed_tools: List[str] = []
48
81
  self.executed_node_tools: List[str] = []
49
82
  self.traces: List[Dict[str, Any]] = []
50
- # --- END NEW __init__ ---
51
83
 
52
84
  # --- MODIFIED _ensure_trace_client ---
53
85
  def _ensure_trace_client(self, run_id: UUID, parent_run_id: Optional[UUID], event_name: str) -> Optional[TraceClient]:
@@ -57,6 +89,11 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
57
89
  Returns the client or None.
58
90
  """
59
91
 
92
+ # If this is a potential new root execution (no parent_run_id) and we had a previous trace saved,
93
+ # reset state to allow reuse of the handler
94
+ if parent_run_id is None and self._trace_saved:
95
+ self._reset_state()
96
+
60
97
  # If a client already exists, return it.
61
98
  if self._trace_client:
62
99
  return self._trace_client
@@ -73,11 +110,25 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
73
110
  enable_evaluations=self.tracer.enable_evaluations
74
111
  )
75
112
  self._trace_client = client_instance
113
+ token = self.tracer.set_current_trace(self._trace_client)
114
+ if token:
115
+ self.trace_id_to_token[trace_id] = token
76
116
  if self._trace_client:
77
117
  self._root_run_id = run_id # Assign the first run_id encountered as the tentative root
78
118
  self._trace_saved = False # Ensure flag is reset
79
119
  # Set active client on Tracer (important for potential fallbacks)
80
120
  self.tracer._active_trace_client = self._trace_client
121
+
122
+ # NEW: Initial save for live tracking (follows the new practice)
123
+ try:
124
+ trace_id_saved, server_response = self._trace_client.save_with_rate_limiting(
125
+ overwrite=self._trace_client.overwrite,
126
+ final_save=False # Initial save for live tracking
127
+ )
128
+ except Exception as e:
129
+ import warnings
130
+ warnings.warn(f"Failed to save initial trace for live tracking: {e}")
131
+
81
132
  return self._trace_client
82
133
  else:
83
134
  return None
@@ -112,12 +163,7 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
112
163
  self._span_id_to_start_time[span_id] = start_time
113
164
  self._span_id_to_depth[span_id] = current_depth
114
165
 
115
-
116
- # --- Set SPAN context variable ONLY for chain (node) spans (Sync version) ---
117
- if span_type == "chain":
118
- self.tracer.set_current_span(span_id)
119
-
120
- new_trace = TraceSpan(
166
+ new_span = TraceSpan(
121
167
  span_id=span_id,
122
168
  trace_id=trace_client.trace_id,
123
169
  parent_span_id=parent_span_id,
@@ -127,9 +173,36 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
127
173
  span_type=span_type
128
174
  )
129
175
 
130
- new_trace.inputs = inputs
131
-
132
- trace_client.add_span(new_trace)
176
+ # Separate metadata from inputs
177
+ if inputs:
178
+ metadata = {}
179
+ clean_inputs = {}
180
+
181
+ # Extract metadata fields
182
+ metadata_fields = ['tags', 'metadata', 'kwargs', 'serialized']
183
+ for field in metadata_fields:
184
+ if field in inputs:
185
+ metadata[field] = inputs.pop(field)
186
+
187
+ # Store the remaining inputs
188
+ clean_inputs = inputs
189
+
190
+ # Set both fields on the span
191
+ new_span.inputs = clean_inputs
192
+ new_span.additional_metadata = metadata
193
+ else:
194
+ new_span.inputs = {}
195
+ new_span.additional_metadata = {}
196
+
197
+ trace_client.add_span(new_span)
198
+
199
+ # Queue span with initial state (input phase) through background service
200
+ if trace_client.background_span_service:
201
+ trace_client.background_span_service.queue_span(new_span, span_state="input")
202
+
203
+ token = self.tracer.set_current_span(span_id)
204
+ if token:
205
+ self.span_id_to_token[span_id] = token
133
206
 
134
207
  def _end_span_tracking(
135
208
  self,
@@ -142,6 +215,8 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
142
215
 
143
216
  # Get span ID and check if it exists
144
217
  span_id = self._run_id_to_span_id.get(run_id)
218
+ token = self.span_id_to_token.pop(span_id, None)
219
+ self.tracer.reset_current_span(token, span_id)
145
220
 
146
221
  start_time = self._span_id_to_start_time.get(span_id) if span_id else None
147
222
  duration = time.time() - start_time if start_time is not None else None
@@ -151,7 +226,38 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
151
226
  trace_span = trace_client.span_id_to_span.get(span_id)
152
227
  if trace_span:
153
228
  trace_span.duration = duration
154
- trace_span.output = error if error else outputs
229
+
230
+ # Handle outputs and error
231
+ if error:
232
+ trace_span.output = error
233
+ elif outputs:
234
+ # Separate metadata from outputs
235
+ metadata = {}
236
+ clean_outputs = {}
237
+
238
+ # Extract metadata fields
239
+ metadata_fields = ['tags', 'kwargs']
240
+ if isinstance(outputs, dict):
241
+ for field in metadata_fields:
242
+ if field in outputs:
243
+ metadata[field] = outputs.pop(field)
244
+
245
+ # Store the remaining outputs
246
+ clean_outputs = outputs
247
+ else:
248
+ clean_outputs = outputs
249
+
250
+ # Set both fields on the span
251
+ trace_span.output = clean_outputs
252
+ if metadata:
253
+ # Merge with existing metadata
254
+ existing_metadata = trace_span.additional_metadata or {}
255
+ trace_span.additional_metadata = {**existing_metadata, **metadata}
256
+
257
+ # Queue span with completed state through background service
258
+ if trace_client.background_span_service:
259
+ span_state = "error" if error else "completed"
260
+ trace_client.background_span_service.queue_span(trace_span, span_state=span_state)
155
261
 
156
262
  # Clean up dictionaries for this specific span
157
263
  if span_id in self._span_id_to_start_time: del self._span_id_to_start_time[span_id]
@@ -165,9 +271,30 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
165
271
  # Reset input storage for this handler instance
166
272
 
167
273
  if self._trace_client and not self._trace_saved: # Check if not already saved
168
- # TODO: Check if trace_client.save needs await if TraceClient becomes async
169
- trace_id, trace_data = self._trace_client.save(overwrite=self._trace_client.overwrite) # Use client's overwrite setting
170
- self.traces.append(trace_data) # Leaving this in for now but can probably be removed
274
+ # Flush background spans before saving the final trace
275
+
276
+ complete_trace_data = {
277
+ "trace_id": self._trace_client.trace_id,
278
+ "name": self._trace_client.name,
279
+ "created_at": datetime.utcfromtimestamp(self._trace_client.start_time).isoformat(),
280
+ "duration": self._trace_client.get_duration(),
281
+ "trace_spans": [span.model_dump() for span in self._trace_client.trace_spans],
282
+ "overwrite": self._trace_client.overwrite,
283
+ "offline_mode": self.tracer.offline_mode,
284
+ "parent_trace_id": self._trace_client.parent_trace_id,
285
+ "parent_name": self._trace_client.parent_name
286
+ }
287
+
288
+ # NEW: Use save_with_rate_limiting with final_save=True for final save
289
+ trace_id, trace_data = self._trace_client.save_with_rate_limiting(
290
+ overwrite=self._trace_client.overwrite,
291
+ final_save=True # Final save with usage counter updates
292
+ )
293
+ token = self.trace_id_to_token.pop(trace_id, None)
294
+ self.tracer.reset_current_trace(token, trace_id)
295
+
296
+ # Store complete trace data instead of server response
297
+ self.tracer.traces.append(complete_trace_data)
171
298
  self._trace_saved = True # Set flag only after successful save
172
299
  finally:
173
300
  # --- NEW: Consolidated Cleanup Logic ---
@@ -254,10 +381,26 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
254
381
  # --- Root node cleanup (Existing logic - slightly modified save call) ---
255
382
  if run_id == self._root_run_id:
256
383
  if trace_client and not self._trace_saved:
257
- # Save might need to be async if TraceClient methods become async
258
- # Pass overwrite=True based on client's setting
259
- trace_id_saved, trace_data = trace_client.save(overwrite=trace_client.overwrite)
260
- self.traces.append(trace_data) # Leaving this in for now but can probably be removed
384
+ # Store complete trace data instead of server response
385
+ complete_trace_data = {
386
+ "trace_id": trace_client.trace_id,
387
+ "name": trace_client.name,
388
+ "created_at": datetime.utcfromtimestamp(trace_client.start_time).isoformat(),
389
+ "duration": trace_client.get_duration(),
390
+ "trace_spans": [span.model_dump() for span in trace_client.trace_spans],
391
+ "overwrite": trace_client.overwrite,
392
+ "offline_mode": self.tracer.offline_mode,
393
+ "parent_trace_id": trace_client.parent_trace_id,
394
+ "parent_name": trace_client.parent_name
395
+ }
396
+ # NEW: Use save_with_rate_limiting with final_save=True for final save
397
+ trace_id_saved, trace_data = trace_client.save_with_rate_limiting(
398
+ overwrite=trace_client.overwrite,
399
+ final_save=True # Final save with usage counter updates
400
+ )
401
+
402
+
403
+ self.tracer.traces.append(complete_trace_data)
261
404
  self._trace_saved = True
262
405
  # Reset tracer's active client *after* successful save
263
406
  if self.tracer._active_trace_client == trace_client:
@@ -333,11 +476,23 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
333
476
  if not trace_client:
334
477
  return
335
478
  outputs = {"response": response, "kwargs": kwargs}
336
- # --- Token Usage Extraction and Accumulation ---
337
- token_usage = None
338
- prompt_tokens = None # Use standard name
339
- completion_tokens = None # Use standard name
479
+
480
+ # --- Token Usage Extraction and Cost Calculation ---
481
+ prompt_tokens = None
482
+ completion_tokens = None
340
483
  total_tokens = None
484
+ model_name = None
485
+
486
+ # Extract model name from response if available
487
+ if hasattr(response, 'llm_output') and response.llm_output and isinstance(response.llm_output, dict):
488
+ model_name = response.llm_output.get('model_name') or response.llm_output.get('model')
489
+
490
+ # Try to get model from the first generation if available
491
+ if not model_name and response.generations and len(response.generations) > 0:
492
+ if hasattr(response.generations[0][0], 'generation_info') and response.generations[0][0].generation_info:
493
+ gen_info = response.generations[0][0].generation_info
494
+ model_name = gen_info.get('model') or gen_info.get('model_name')
495
+
341
496
  if response.llm_output and isinstance(response.llm_output, dict):
342
497
  # Check for OpenAI/standard 'token_usage' first
343
498
  if 'token_usage' in response.llm_output:
@@ -356,14 +511,43 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
356
511
  if prompt_tokens is not None and completion_tokens is not None:
357
512
  total_tokens = prompt_tokens + completion_tokens
358
513
 
359
- # --- Store individual usage in span output and Accumulate ---
514
+ # --- Create TraceUsage object and set on span ---
360
515
  if prompt_tokens is not None or completion_tokens is not None:
361
- # Store individual usage for this span
362
- outputs['usage'] = {
363
- 'prompt_tokens': prompt_tokens,
364
- 'completion_tokens': completion_tokens,
365
- 'total_tokens': total_tokens
366
- }
516
+ # Calculate costs if model name is available
517
+ prompt_cost = None
518
+ completion_cost = None
519
+ total_cost_usd = None
520
+
521
+ if model_name and prompt_tokens is not None and completion_tokens is not None:
522
+ try:
523
+ prompt_cost, completion_cost = cost_per_token(
524
+ model=model_name,
525
+ prompt_tokens=prompt_tokens,
526
+ completion_tokens=completion_tokens
527
+ )
528
+ total_cost_usd = (prompt_cost + completion_cost) if prompt_cost and completion_cost else None
529
+ except Exception as e:
530
+ # If cost calculation fails, continue without costs
531
+ import warnings
532
+ warnings.warn(f"Failed to calculate token costs for model {model_name}: {e}")
533
+
534
+ # Create TraceUsage object
535
+ usage = TraceUsage(
536
+ prompt_tokens=prompt_tokens,
537
+ completion_tokens=completion_tokens,
538
+ total_tokens=total_tokens or (prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None),
539
+ prompt_tokens_cost_usd=prompt_cost,
540
+ completion_tokens_cost_usd=completion_cost,
541
+ total_cost_usd=total_cost_usd,
542
+ model_name=model_name
543
+ )
544
+
545
+ # Set usage on the actual span (not in outputs)
546
+ span_id = self._run_id_to_span_id.get(run_id)
547
+ if span_id and span_id in trace_client.span_id_to_span:
548
+ trace_span = trace_client.span_id_to_span[span_id]
549
+ trace_span.usage = usage
550
+
367
551
 
368
552
  self._end_span_tracking(trace_client, run_id, outputs=outputs)
369
553
  # --- End Token Usage ---
@@ -416,4 +600,4 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
416
600
  if not trace_client: return
417
601
 
418
602
  outputs = {'return_values': finish.return_values, 'log': finish.log, 'messages': finish.messages, 'kwargs': kwargs}
419
- self._end_span_tracking(trace_client, run_id, outputs=outputs)
603
+ self._end_span_tracking(trace_client, run_id, outputs=outputs)
judgeval/rules.py CHANGED
@@ -9,13 +9,13 @@ import asyncio
9
9
  from concurrent.futures import ThreadPoolExecutor
10
10
  import time
11
11
  import uuid
12
+ import os
13
+ import re
14
+ import json
15
+ from datetime import datetime
12
16
 
13
17
  from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
14
-
15
- class AlertStatus(str, Enum):
16
- """Status of an alert evaluation."""
17
- TRIGGERED = "triggered"
18
- NOT_TRIGGERED = "not_triggered"
18
+ from judgeval.utils.alerts import AlertStatus, AlertResult
19
19
 
20
20
  class Condition(BaseModel):
21
21
  """
@@ -68,6 +68,36 @@ class Condition(BaseModel):
68
68
  # Fallback to default comparison (greater than or equal)
69
69
  return value >= self.threshold if self.threshold is not None else False
70
70
 
71
+ class PagerDutyConfig(BaseModel):
72
+ """
73
+ Configuration for PagerDuty notifications.
74
+
75
+ Attributes:
76
+ routing_key: PagerDuty integration routing key
77
+ severity: Severity level (critical, error, warning, info)
78
+ source: Source of the alert (defaults to "judgeval")
79
+ component: Optional component that triggered the alert
80
+ group: Optional logical grouping for the alert
81
+ class_type: Optional class/type of alert event
82
+ """
83
+ routing_key: str
84
+ severity: str = "error" # critical, error, warning, info
85
+ source: str = "judgeval"
86
+ component: Optional[str] = None
87
+ group: Optional[str] = None
88
+ class_type: Optional[str] = None
89
+
90
+ def model_dump(self, **kwargs):
91
+ """Convert the PagerDutyConfig to a dictionary for JSON serialization."""
92
+ return {
93
+ "routing_key": self.routing_key,
94
+ "severity": self.severity,
95
+ "source": self.source,
96
+ "component": self.component,
97
+ "group": self.group,
98
+ "class_type": self.class_type
99
+ }
100
+
71
101
  class NotificationConfig(BaseModel):
72
102
  """
73
103
  Configuration for notifications when a rule is triggered.
@@ -75,8 +105,12 @@ class NotificationConfig(BaseModel):
75
105
  Example:
76
106
  {
77
107
  "enabled": true,
78
- "communication_methods": ["email", "broadcast_slack", "broadcast_email"],
108
+ "communication_methods": ["email", "broadcast_slack", "broadcast_email", "pagerduty"],
79
109
  "email_addresses": ["user1@example.com", "user2@example.com"],
110
+ "pagerduty_config": {
111
+ "routing_key": "R0ABCD1234567890123456789",
112
+ "severity": "error"
113
+ },
80
114
  "send_at": 1632150000 # Unix timestamp (specific date/time)
81
115
  }
82
116
 
@@ -84,10 +118,12 @@ class NotificationConfig(BaseModel):
84
118
  - "email": Send emails to specified email addresses
85
119
  - "broadcast_slack": Send broadcast notifications to all configured Slack channels
86
120
  - "broadcast_email": Send broadcast emails to all organization emails
121
+ - "pagerduty": Send alerts to PagerDuty using the configured routing key
87
122
  """
88
123
  enabled: bool = True
89
124
  communication_methods: List[str] = []
90
125
  email_addresses: Optional[List[str]] = None
126
+ pagerduty_config: Optional[PagerDutyConfig] = None
91
127
  send_at: Optional[int] = None # Unix timestamp for scheduled notifications
92
128
 
93
129
  def model_dump(self, **kwargs):
@@ -96,6 +132,7 @@ class NotificationConfig(BaseModel):
96
132
  "enabled": self.enabled,
97
133
  "communication_methods": self.communication_methods,
98
134
  "email_addresses": self.email_addresses,
135
+ "pagerduty_config": self.pagerduty_config.model_dump() if self.pagerduty_config else None,
99
136
  "send_at": self.send_at
100
137
  }
101
138
 
@@ -144,7 +181,8 @@ class Rule(BaseModel):
144
181
  # Create standardized metric representation needed by server API
145
182
  metric_data = {
146
183
  "score_type": "",
147
- "threshold": 0.0
184
+ "threshold": 0.0,
185
+ "name": ""
148
186
  }
149
187
 
150
188
  # First try to use object's own serialization methods
@@ -182,6 +220,16 @@ class Rule(BaseModel):
182
220
  # Use condition threshold if metric doesn't have one
183
221
  metric_data['threshold'] = self.conditions[i].threshold
184
222
 
223
+ # Make sure name is set
224
+ if not metric_data.get('name'):
225
+ if hasattr(metric_obj, '__name__'):
226
+ metric_data['name'] = metric_obj.__name__
227
+ elif hasattr(metric_obj, 'name'):
228
+ metric_data['name'] = metric_obj.name
229
+ else:
230
+ # Fallback to score_type if available
231
+ metric_data['name'] = metric_data.get('score_type', str(metric_obj))
232
+
185
233
  # Update the condition with our properly serialized metric
186
234
  condition["metric"] = metric_data
187
235
 
@@ -199,47 +247,6 @@ class Rule(BaseModel):
199
247
  raise ValueError(f"combine_type must be 'all' or 'any', got: {v}")
200
248
  return v
201
249
 
202
- class AlertResult(BaseModel):
203
- """
204
- Result of evaluating a rule.
205
-
206
- Example:
207
- {
208
- "status": "triggered",
209
- "rule_name": "Quality Check",
210
- "conditions_result": [
211
- {"metric": "faithfulness", "value": 0.6, "threshold": 0.7, "passed": False},
212
- {"metric": "relevancy", "value": 0.9, "threshold": 0.8, "passed": True}
213
- ],
214
- "rule_id": "123e4567-e89b-12d3-a456-426614174000",
215
- "metadata": {
216
- "example_id": "example_123",
217
- "timestamp": "20240321_123456"
218
- },
219
- "notification": {
220
- "enabled": true,
221
- "communication_methods": ["slack", "email"],
222
- "email_addresses": ["user1@example.com", "user2@example.com"]
223
- }
224
- }
225
- """
226
- status: AlertStatus
227
- rule_id: Optional[str] = None # The unique identifier of the rule
228
- rule_name: str
229
- conditions_result: List[Dict[str, Any]]
230
- metadata: Dict[str, Any] = {}
231
- notification: Optional[NotificationConfig] = None # Configuration for notifications
232
-
233
- @property
234
- def example_id(self) -> Optional[str]:
235
- """Get example_id from metadata for backward compatibility"""
236
- return self.metadata.get("example_id")
237
-
238
- @property
239
- def timestamp(self) -> Optional[str]:
240
- """Get timestamp from metadata for backward compatibility"""
241
- return self.metadata.get("timestamp")
242
-
243
250
  class RulesEngine:
244
251
  """
245
252
  Engine for creating and evaluating rules against metrics.
@@ -406,7 +413,7 @@ class RulesEngine:
406
413
  # If rule has a notification config and the alert is triggered, include it in the result
407
414
  notification_config = rule.notification
408
415
 
409
- # Set the alert status based on whether the rule was triggered
416
+ # Set the alert status based on whether the rule was triggered using proper enum values
410
417
  status = AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED
411
418
 
412
419
  # Create the alert result
@@ -416,7 +423,10 @@ class RulesEngine:
416
423
  rule_name=rule.name,
417
424
  conditions_result=condition_results,
418
425
  notification=notification_config,
419
- metadata=example_metadata or {}
426
+ metadata=example_metadata or {},
427
+ combine_type=rule.combine_type,
428
+ project_id=example_metadata.get("project_id") if example_metadata else None,
429
+ trace_span_id=example_metadata.get("trace_span_id") if example_metadata else None
420
430
  )
421
431
 
422
432
  results[rule_id] = alert_result