judgeval 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/tracer.py +528 -166
- judgeval/constants.py +7 -4
- judgeval/data/__init__.py +0 -3
- judgeval/data/datasets/dataset.py +42 -19
- judgeval/data/datasets/eval_dataset_client.py +59 -20
- judgeval/data/result.py +34 -56
- judgeval/integrations/langgraph.py +16 -12
- judgeval/judgment_client.py +85 -23
- judgeval/rules.py +177 -60
- judgeval/run_evaluation.py +143 -122
- judgeval/scorers/score.py +21 -18
- judgeval/utils/alerts.py +32 -1
- {judgeval-0.0.25.dist-info → judgeval-0.0.27.dist-info}/METADATA +1 -1
- {judgeval-0.0.25.dist-info → judgeval-0.0.27.dist-info}/RECORD +16 -17
- judgeval/data/api_example.py +0 -98
- {judgeval-0.0.25.dist-info → judgeval-0.0.27.dist-info}/WHEEL +0 -0
- {judgeval-0.0.25.dist-info → judgeval-0.0.27.dist-info}/licenses/LICENSE.md +0 -0
judgeval/judgment_client.py
CHANGED
@@ -27,7 +27,8 @@ from judgeval.constants import (
|
|
27
27
|
JUDGMENT_EVAL_FETCH_API_URL,
|
28
28
|
JUDGMENT_EVAL_DELETE_API_URL,
|
29
29
|
JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
|
30
|
-
JUDGMENT_PROJECT_DELETE_API_URL
|
30
|
+
JUDGMENT_PROJECT_DELETE_API_URL,
|
31
|
+
JUDGMENT_PROJECT_CREATE_API_URL
|
31
32
|
)
|
32
33
|
from judgeval.common.exceptions import JudgmentAPIError
|
33
34
|
from pydantic import BaseModel
|
@@ -38,8 +39,21 @@ class EvalRunRequestBody(BaseModel):
|
|
38
39
|
project_name: str
|
39
40
|
judgment_api_key: str
|
40
41
|
|
42
|
+
class DeleteEvalRunRequestBody(BaseModel):
|
43
|
+
eval_names: List[str]
|
44
|
+
project_name: str
|
45
|
+
judgment_api_key: str
|
46
|
+
|
47
|
+
class SingletonMeta(type):
|
48
|
+
_instances = {}
|
41
49
|
|
42
|
-
|
50
|
+
def __call__(cls, *args, **kwargs):
|
51
|
+
if cls not in cls._instances:
|
52
|
+
instance = super().__call__(*args, **kwargs)
|
53
|
+
cls._instances[cls] = instance
|
54
|
+
return cls._instances[cls]
|
55
|
+
|
56
|
+
class JudgmentClient(metaclass=SingletonMeta):
|
43
57
|
def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("JUDGMENT_ORG_ID")):
|
44
58
|
self.judgment_api_key = judgment_api_key
|
45
59
|
self.organization_id = organization_id
|
@@ -51,8 +65,25 @@ class JudgmentClient:
|
|
51
65
|
# May be bad to output their invalid API key...
|
52
66
|
raise JudgmentAPIError(f"Issue with passed in Judgment API key: {response}")
|
53
67
|
else:
|
54
|
-
print(f"Successfully initialized JudgmentClient
|
55
|
-
|
68
|
+
print(f"Successfully initialized JudgmentClient!")
|
69
|
+
|
70
|
+
def a_run_evaluation(
|
71
|
+
self,
|
72
|
+
examples: List[Example],
|
73
|
+
scorers: List[Union[ScorerWrapper, JudgevalScorer]],
|
74
|
+
model: Union[str, List[str], JudgevalJudge],
|
75
|
+
aggregator: Optional[str] = None,
|
76
|
+
metadata: Optional[Dict[str, Any]] = None,
|
77
|
+
log_results: bool = True,
|
78
|
+
project_name: str = "default_project",
|
79
|
+
eval_run_name: str = "default_eval_run",
|
80
|
+
override: bool = False,
|
81
|
+
use_judgment: bool = True,
|
82
|
+
ignore_errors: bool = True,
|
83
|
+
rules: Optional[List[Rule]] = None
|
84
|
+
) -> List[ScoringResult]:
|
85
|
+
return self.run_evaluation(examples, scorers, model, aggregator, metadata, log_results, project_name, eval_run_name, override, use_judgment, ignore_errors, True, rules)
|
86
|
+
|
56
87
|
def run_evaluation(
|
57
88
|
self,
|
58
89
|
examples: List[Example],
|
@@ -65,6 +96,8 @@ class JudgmentClient:
|
|
65
96
|
eval_run_name: str = "default_eval_run",
|
66
97
|
override: bool = False,
|
67
98
|
use_judgment: bool = True,
|
99
|
+
ignore_errors: bool = True,
|
100
|
+
async_execution: bool = False,
|
68
101
|
rules: Optional[List[Rule]] = None
|
69
102
|
) -> List[ScoringResult]:
|
70
103
|
"""
|
@@ -81,6 +114,7 @@ class JudgmentClient:
|
|
81
114
|
eval_run_name (str): A name for this evaluation run
|
82
115
|
override (bool): Whether to override an existing evaluation run with the same name
|
83
116
|
use_judgment (bool): Whether to use Judgment API for evaluation
|
117
|
+
ignore_errors (bool): Whether to ignore errors during evaluation (safely handled)
|
84
118
|
rules (Optional[List[Rule]]): Rules to evaluate against scoring results
|
85
119
|
|
86
120
|
Returns:
|
@@ -141,7 +175,7 @@ class JudgmentClient:
|
|
141
175
|
rules=loaded_rules,
|
142
176
|
organization_id=self.organization_id
|
143
177
|
)
|
144
|
-
return run_eval(eval, override)
|
178
|
+
return run_eval(eval, override, ignore_errors=ignore_errors, async_execution=async_execution)
|
145
179
|
except ValueError as e:
|
146
180
|
raise ValueError(f"Please check your EvaluationRun object, one or more fields are invalid: \n{str(e)}")
|
147
181
|
except Exception as e:
|
@@ -242,7 +276,7 @@ class JudgmentClient:
|
|
242
276
|
def create_dataset(self) -> EvalDataset:
|
243
277
|
return self.eval_dataset_client.create_dataset()
|
244
278
|
|
245
|
-
def push_dataset(self, alias: str, dataset: EvalDataset, overwrite: Optional[bool] = False) -> bool:
|
279
|
+
def push_dataset(self, alias: str, dataset: EvalDataset, project_name: str, overwrite: Optional[bool] = False) -> bool:
|
246
280
|
"""
|
247
281
|
Uploads an `EvalDataset` to the Judgment platform for storage.
|
248
282
|
|
@@ -256,9 +290,9 @@ class JudgmentClient:
|
|
256
290
|
"""
|
257
291
|
# Set judgment_api_key just in case it was not set
|
258
292
|
dataset.judgment_api_key = self.judgment_api_key
|
259
|
-
return self.eval_dataset_client.push(dataset, alias, overwrite)
|
293
|
+
return self.eval_dataset_client.push(dataset, alias, project_name, overwrite)
|
260
294
|
|
261
|
-
def pull_dataset(self, alias: str) -> EvalDataset:
|
295
|
+
def pull_dataset(self, alias: str, project_name: str) -> EvalDataset:
|
262
296
|
"""
|
263
297
|
Retrieves a saved `EvalDataset` from the Judgment platform.
|
264
298
|
|
@@ -268,25 +302,31 @@ class JudgmentClient:
|
|
268
302
|
Returns:
|
269
303
|
EvalDataset: The retrieved dataset
|
270
304
|
"""
|
271
|
-
return self.eval_dataset_client.pull(alias)
|
305
|
+
return self.eval_dataset_client.pull(alias, project_name)
|
306
|
+
|
307
|
+
def delete_dataset(self, alias: str, project_name: str) -> bool:
|
308
|
+
"""
|
309
|
+
Deletes a saved `EvalDataset` from the Judgment platform.
|
310
|
+
"""
|
311
|
+
return self.eval_dataset_client.delete(alias, project_name)
|
272
312
|
|
273
|
-
def
|
313
|
+
def pull_project_dataset_stats(self, project_name: str) -> dict:
|
274
314
|
"""
|
275
|
-
Retrieves all dataset stats from the Judgment platform for the
|
315
|
+
Retrieves all dataset stats from the Judgment platform for the project.
|
276
316
|
|
277
317
|
Args:
|
278
|
-
|
318
|
+
project_name (str): The name of the project to retrieve
|
279
319
|
|
280
320
|
Returns:
|
281
|
-
|
321
|
+
dict: The retrieved dataset stats
|
282
322
|
"""
|
283
|
-
return self.eval_dataset_client.
|
323
|
+
return self.eval_dataset_client.pull_project_dataset_stats(project_name)
|
284
324
|
|
285
|
-
def
|
325
|
+
def insert_dataset(self, alias: str, examples: List[Example], project_name: str) -> bool:
|
286
326
|
"""
|
287
327
|
Edits the dataset on Judgment platform by adding new examples
|
288
328
|
"""
|
289
|
-
return self.eval_dataset_client.
|
329
|
+
return self.eval_dataset_client.insert_dataset(alias, examples, project_name)
|
290
330
|
|
291
331
|
# Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
|
292
332
|
def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
|
@@ -324,19 +364,22 @@ class JudgmentClient:
|
|
324
364
|
eval_run_result[0]["results"] = [ScoringResult(**filtered_result)]
|
325
365
|
return eval_run_result
|
326
366
|
|
327
|
-
def delete_eval(self, project_name: str,
|
367
|
+
def delete_eval(self, project_name: str, eval_run_names: List[str]) -> bool:
|
328
368
|
"""
|
329
|
-
Deletes an evaluation from the server by project and run
|
369
|
+
Deletes an evaluation from the server by project and run names.
|
330
370
|
|
331
371
|
Args:
|
332
372
|
project_name (str): Name of the project
|
333
|
-
|
373
|
+
eval_run_names (List[str]): List of names of the evaluation runs
|
334
374
|
|
335
375
|
Returns:
|
336
376
|
bool: Whether the evaluation was successfully deleted
|
337
377
|
"""
|
338
|
-
|
339
|
-
|
378
|
+
if not eval_run_names:
|
379
|
+
raise ValueError("No evaluation run names provided")
|
380
|
+
|
381
|
+
eval_run_request_body = DeleteEvalRunRequestBody(project_name=project_name,
|
382
|
+
eval_names=eval_run_names,
|
340
383
|
judgment_api_key=self.judgment_api_key)
|
341
384
|
response = requests.delete(JUDGMENT_EVAL_DELETE_API_URL,
|
342
385
|
json=eval_run_request_body.model_dump(),
|
@@ -345,9 +388,11 @@ class JudgmentClient:
|
|
345
388
|
"Authorization": f"Bearer {self.judgment_api_key}",
|
346
389
|
"X-Organization-Id": self.organization_id
|
347
390
|
})
|
348
|
-
if response.status_code
|
391
|
+
if response.status_code == 404:
|
392
|
+
raise ValueError(f"Eval results not found: {response.json()}")
|
393
|
+
elif response.status_code == 500:
|
349
394
|
raise ValueError(f"Error deleting eval results: {response.json()}")
|
350
|
-
return response.json()
|
395
|
+
return bool(response.json())
|
351
396
|
|
352
397
|
def delete_project_evals(self, project_name: str) -> bool:
|
353
398
|
"""
|
@@ -372,6 +417,23 @@ class JudgmentClient:
|
|
372
417
|
raise ValueError(f"Error deleting eval results: {response.json()}")
|
373
418
|
return response.json()
|
374
419
|
|
420
|
+
def create_project(self, project_name: str) -> bool:
|
421
|
+
"""
|
422
|
+
Creates a project on the server.
|
423
|
+
"""
|
424
|
+
response = requests.post(JUDGMENT_PROJECT_CREATE_API_URL,
|
425
|
+
json={
|
426
|
+
"project_name": project_name,
|
427
|
+
},
|
428
|
+
headers={
|
429
|
+
"Content-Type": "application/json",
|
430
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
431
|
+
"X-Organization-Id": self.organization_id
|
432
|
+
})
|
433
|
+
if response.status_code != requests.codes.ok:
|
434
|
+
raise ValueError(f"Error creating project: {response.json()}")
|
435
|
+
return response.json()
|
436
|
+
|
375
437
|
def delete_project(self, project_name: str) -> bool:
|
376
438
|
"""
|
377
439
|
Deletes a project from the server. Which also deletes all evaluations and traces associated with the project.
|
judgeval/rules.py
CHANGED
@@ -17,15 +17,6 @@ class AlertStatus(str, Enum):
|
|
17
17
|
TRIGGERED = "triggered"
|
18
18
|
NOT_TRIGGERED = "not_triggered"
|
19
19
|
|
20
|
-
class Operator(str, Enum):
|
21
|
-
"""Comparison operators for conditions."""
|
22
|
-
GT = ">"
|
23
|
-
GTE = ">="
|
24
|
-
LT = "<"
|
25
|
-
LTE = "<="
|
26
|
-
EQ = "=="
|
27
|
-
NEQ = "!="
|
28
|
-
|
29
20
|
class Condition(BaseModel):
|
30
21
|
"""
|
31
22
|
A single metric condition.
|
@@ -33,15 +24,13 @@ class Condition(BaseModel):
|
|
33
24
|
Example:
|
34
25
|
{
|
35
26
|
"metric": FaithfulnessScorer(threshold=0.7) # Must be a scorer object: APIJudgmentScorer, JudgevalScorer, or ScorerWrapper
|
36
|
-
"operator": ">=",
|
37
|
-
"threshold": 0.7
|
38
27
|
}
|
28
|
+
|
29
|
+
The Condition class uses the scorer's threshold and success function internally.
|
39
30
|
"""
|
40
31
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
41
32
|
|
42
|
-
metric: Union[APIJudgmentScorer, JudgevalScorer, ScorerWrapper]
|
43
|
-
operator: Operator
|
44
|
-
threshold: float
|
33
|
+
metric: Union[APIJudgmentScorer, JudgevalScorer, ScorerWrapper]
|
45
34
|
|
46
35
|
@property
|
47
36
|
def metric_name(self) -> str:
|
@@ -58,22 +47,60 @@ class Condition(BaseModel):
|
|
58
47
|
# Fallback to string representation
|
59
48
|
return str(self.metric)
|
60
49
|
|
50
|
+
@property
|
51
|
+
def threshold(self) -> float:
|
52
|
+
"""Get the threshold from the metric."""
|
53
|
+
return self.metric.threshold if hasattr(self.metric, 'threshold') else 0.5
|
54
|
+
|
61
55
|
def evaluate(self, value: float) -> bool:
|
62
|
-
"""
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
56
|
+
"""
|
57
|
+
Evaluate the condition against a value.
|
58
|
+
Returns True if the condition passes, False otherwise.
|
59
|
+
Uses the scorer's success check function if available.
|
60
|
+
"""
|
61
|
+
# Store the value in the scorer
|
62
|
+
if hasattr(self.metric, 'score'):
|
63
|
+
self.metric.score = value
|
64
|
+
|
65
|
+
# Use the scorer's success check function if available
|
66
|
+
if hasattr(self.metric, 'success_check'):
|
67
|
+
return self.metric.success_check()
|
68
|
+
elif hasattr(self.metric, '_success_check'):
|
69
|
+
return self.metric._success_check()
|
75
70
|
else:
|
76
|
-
|
71
|
+
# Fallback to default comparison (greater than or equal)
|
72
|
+
return value >= self.threshold if self.threshold is not None else False
|
73
|
+
|
74
|
+
class NotificationConfig(BaseModel):
|
75
|
+
"""
|
76
|
+
Configuration for notifications when a rule is triggered.
|
77
|
+
|
78
|
+
Example:
|
79
|
+
{
|
80
|
+
"enabled": true,
|
81
|
+
"communication_methods": ["email", "broadcast_slack", "broadcast_email"],
|
82
|
+
"email_addresses": ["user1@example.com", "user2@example.com"],
|
83
|
+
"send_at": 1632150000 # Unix timestamp (specific date/time)
|
84
|
+
}
|
85
|
+
|
86
|
+
Communication Methods:
|
87
|
+
- "email": Send emails to specified email addresses
|
88
|
+
- "broadcast_slack": Send broadcast notifications to all configured Slack channels
|
89
|
+
- "broadcast_email": Send broadcast emails to all organization emails
|
90
|
+
"""
|
91
|
+
enabled: bool = True
|
92
|
+
communication_methods: List[str] = []
|
93
|
+
email_addresses: Optional[List[str]] = None
|
94
|
+
send_at: Optional[int] = None # Unix timestamp for scheduled notifications
|
95
|
+
|
96
|
+
def model_dump(self, **kwargs):
|
97
|
+
"""Convert the NotificationConfig to a dictionary for JSON serialization."""
|
98
|
+
return {
|
99
|
+
"enabled": self.enabled,
|
100
|
+
"communication_methods": self.communication_methods,
|
101
|
+
"email_addresses": self.email_addresses,
|
102
|
+
"send_at": self.send_at
|
103
|
+
}
|
77
104
|
|
78
105
|
class Rule(BaseModel):
|
79
106
|
"""
|
@@ -85,10 +112,15 @@ class Rule(BaseModel):
|
|
85
112
|
"name": "Quality Check",
|
86
113
|
"description": "Check if quality metrics meet thresholds",
|
87
114
|
"conditions": [
|
88
|
-
{"metric": FaithfulnessScorer(threshold=0.7)
|
89
|
-
{"metric": AnswerRelevancyScorer(threshold=0.8)
|
115
|
+
{"metric": FaithfulnessScorer(threshold=0.7)},
|
116
|
+
{"metric": AnswerRelevancyScorer(threshold=0.8)}
|
90
117
|
],
|
91
|
-
"combine_type": "all" # "all" or "any"
|
118
|
+
"combine_type": "all", # "all" or "any"
|
119
|
+
"notification": {
|
120
|
+
"enabled": true,
|
121
|
+
"communication_methods": ["slack", "email"],
|
122
|
+
"email_addresses": ["user1@example.com", "user2@example.com"]
|
123
|
+
}
|
92
124
|
}
|
93
125
|
"""
|
94
126
|
rule_id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Random UUID string as default value
|
@@ -96,6 +128,8 @@ class Rule(BaseModel):
|
|
96
128
|
description: Optional[str] = None
|
97
129
|
conditions: List[Condition]
|
98
130
|
combine_type: str = Field(..., pattern="^(all|any)$") # all = AND, any = OR
|
131
|
+
notification: Optional[NotificationConfig] = None # Configuration for notifications
|
132
|
+
|
99
133
|
|
100
134
|
def model_dump(self, **kwargs):
|
101
135
|
"""
|
@@ -168,7 +202,6 @@ class Rule(BaseModel):
|
|
168
202
|
raise ValueError(f"combine_type must be 'all' or 'any', got: {v}")
|
169
203
|
return v
|
170
204
|
|
171
|
-
|
172
205
|
class AlertResult(BaseModel):
|
173
206
|
"""
|
174
207
|
Result of evaluating a rule.
|
@@ -185,6 +218,11 @@ class AlertResult(BaseModel):
|
|
185
218
|
"metadata": {
|
186
219
|
"example_id": "example_123",
|
187
220
|
"timestamp": "20240321_123456"
|
221
|
+
},
|
222
|
+
"notification": {
|
223
|
+
"enabled": true,
|
224
|
+
"communication_methods": ["slack", "email"],
|
225
|
+
"email_addresses": ["user1@example.com", "user2@example.com"]
|
188
226
|
}
|
189
227
|
}
|
190
228
|
"""
|
@@ -193,6 +231,7 @@ class AlertResult(BaseModel):
|
|
193
231
|
rule_name: str
|
194
232
|
conditions_result: List[Dict[str, Any]]
|
195
233
|
metadata: Dict[str, Any] = {}
|
234
|
+
notification: Optional[NotificationConfig] = None # Configuration for notifications
|
196
235
|
|
197
236
|
@property
|
198
237
|
def example_id(self) -> Optional[str]:
|
@@ -206,36 +245,105 @@ class AlertResult(BaseModel):
|
|
206
245
|
|
207
246
|
class RulesEngine:
|
208
247
|
"""
|
209
|
-
Engine for evaluating rules
|
248
|
+
Engine for creating and evaluating rules against metrics.
|
210
249
|
|
211
|
-
Example
|
250
|
+
Example:
|
251
|
+
```python
|
252
|
+
# Define rules
|
212
253
|
rules = {
|
213
|
-
"
|
254
|
+
"1": Rule(
|
214
255
|
name="Quality Check",
|
256
|
+
description="Check if quality metrics meet thresholds",
|
215
257
|
conditions=[
|
216
|
-
Condition(metric=FaithfulnessScorer(threshold=0.7)
|
217
|
-
Condition(metric=AnswerRelevancyScorer(threshold=0.8)
|
258
|
+
Condition(metric=FaithfulnessScorer(threshold=0.7)),
|
259
|
+
Condition(metric=AnswerRelevancyScorer(threshold=0.8))
|
218
260
|
],
|
219
261
|
combine_type="all"
|
220
262
|
)
|
221
263
|
}
|
222
264
|
|
265
|
+
# Create rules engine
|
223
266
|
engine = RulesEngine(rules)
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
"
|
228
|
-
|
267
|
+
|
268
|
+
# Configure notifications
|
269
|
+
engine.configure_notification(
|
270
|
+
rule_id="1",
|
271
|
+
enabled=True,
|
272
|
+
communication_methods=["slack", "email"],
|
273
|
+
email_addresses=["user@example.com"]
|
274
|
+
)
|
275
|
+
|
276
|
+
# Evaluate rules
|
277
|
+
scores = {"faithfulness": 0.65, "relevancy": 0.85}
|
278
|
+
results = engine.evaluate_rules(scores, {"example_id": "example_123"})
|
279
|
+
```
|
229
280
|
"""
|
230
281
|
|
231
282
|
def __init__(self, rules: Dict[str, Rule]):
|
232
283
|
"""
|
233
|
-
Initialize the
|
284
|
+
Initialize the rules engine.
|
234
285
|
|
235
286
|
Args:
|
236
|
-
rules: Dictionary mapping rule IDs to
|
287
|
+
rules: Dictionary mapping rule IDs to Rule objects
|
237
288
|
"""
|
238
289
|
self.rules = rules
|
290
|
+
|
291
|
+
def configure_notification(self, rule_id: str, enabled: bool = True,
|
292
|
+
communication_methods: List[str] = None,
|
293
|
+
email_addresses: List[str] = None,
|
294
|
+
send_at: Optional[int] = None) -> None:
|
295
|
+
"""
|
296
|
+
Configure notification settings for a specific rule.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
rule_id: ID of the rule to configure notifications for
|
300
|
+
enabled: Whether notifications are enabled for this rule
|
301
|
+
communication_methods: List of notification methods (e.g., ["slack", "email"])
|
302
|
+
email_addresses: List of email addresses to send notifications to
|
303
|
+
send_at: Optional Unix timestamp for when to send the notification
|
304
|
+
"""
|
305
|
+
if rule_id not in self.rules:
|
306
|
+
raise ValueError(f"Rule ID '{rule_id}' not found")
|
307
|
+
|
308
|
+
rule = self.rules[rule_id]
|
309
|
+
|
310
|
+
# Create notification configuration if it doesn't exist
|
311
|
+
if rule.notification is None:
|
312
|
+
rule.notification = NotificationConfig()
|
313
|
+
|
314
|
+
# Set notification parameters
|
315
|
+
rule.notification.enabled = enabled
|
316
|
+
|
317
|
+
if communication_methods is not None:
|
318
|
+
rule.notification.communication_methods = communication_methods
|
319
|
+
|
320
|
+
if email_addresses is not None:
|
321
|
+
rule.notification.email_addresses = email_addresses
|
322
|
+
|
323
|
+
if send_at is not None:
|
324
|
+
rule.notification.send_at = send_at
|
325
|
+
|
326
|
+
def configure_all_notifications(self, enabled: bool = True,
|
327
|
+
communication_methods: List[str] = None,
|
328
|
+
email_addresses: List[str] = None,
|
329
|
+
send_at: Optional[int] = None) -> None:
|
330
|
+
"""
|
331
|
+
Configure notification settings for all rules.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
enabled: Whether notifications are enabled
|
335
|
+
communication_methods: List of notification methods (e.g., ["slack", "email"])
|
336
|
+
email_addresses: List of email addresses to send notifications to
|
337
|
+
send_at: Optional Unix timestamp for when to send the notification
|
338
|
+
"""
|
339
|
+
for rule_id, rule in self.rules.items():
|
340
|
+
self.configure_notification(
|
341
|
+
rule_id=rule_id,
|
342
|
+
enabled=enabled,
|
343
|
+
communication_methods=communication_methods,
|
344
|
+
email_addresses=email_addresses,
|
345
|
+
send_at=send_at
|
346
|
+
)
|
239
347
|
|
240
348
|
def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Dict[str, Any]] = None) -> Dict[str, AlertResult]:
|
241
349
|
"""
|
@@ -257,13 +365,13 @@ class RulesEngine:
|
|
257
365
|
# Get the metric name for lookup
|
258
366
|
metric_name = condition.metric_name
|
259
367
|
value = scores.get(metric_name)
|
368
|
+
|
260
369
|
if value is None:
|
261
370
|
# Skip this condition instead of evaluating it as false
|
262
371
|
condition_results.append({
|
263
372
|
"metric": metric_name,
|
264
373
|
"value": None,
|
265
374
|
"threshold": condition.threshold,
|
266
|
-
"operator": condition.operator,
|
267
375
|
"passed": None, # Using None to indicate the condition was skipped
|
268
376
|
"skipped": True # Add a flag to indicate this condition was skipped
|
269
377
|
})
|
@@ -274,7 +382,6 @@ class RulesEngine:
|
|
274
382
|
"metric": metric_name,
|
275
383
|
"value": value,
|
276
384
|
"threshold": condition.threshold,
|
277
|
-
"operator": condition.operator,
|
278
385
|
"passed": passed,
|
279
386
|
"skipped": False # Indicate this condition was evaluated
|
280
387
|
})
|
@@ -285,23 +392,36 @@ class RulesEngine:
|
|
285
392
|
# If all conditions were skipped, the rule doesn't trigger
|
286
393
|
triggered = False
|
287
394
|
else:
|
288
|
-
|
395
|
+
if rule.combine_type == "all":
|
396
|
+
# For "all" combine_type:
|
397
|
+
# - All evaluated conditions must pass
|
398
|
+
# - All conditions must have been evaluated (none skipped)
|
399
|
+
all_conditions_passed = all(passed_conditions)
|
400
|
+
all_conditions_evaluated = len(passed_conditions) == len(rule.conditions)
|
401
|
+
triggered = all_conditions_passed and all_conditions_evaluated
|
402
|
+
else:
|
403
|
+
# For "any" combine_type, at least one condition must pass
|
404
|
+
triggered = any(passed_conditions)
|
289
405
|
|
290
406
|
# Create alert result with example metadata
|
407
|
+
notification_config = None
|
408
|
+
if triggered and rule.notification:
|
409
|
+
# If rule has a notification config and the alert is triggered, include it in the result
|
410
|
+
notification_config = rule.notification
|
411
|
+
|
412
|
+
# Set the alert status based on whether the rule was triggered
|
413
|
+
status = AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED
|
414
|
+
|
415
|
+
# Create the alert result
|
291
416
|
alert_result = AlertResult(
|
292
|
-
status=
|
293
|
-
rule_id=rule.rule_id,
|
417
|
+
status=status,
|
418
|
+
rule_id=rule.rule_id,
|
294
419
|
rule_name=rule.name,
|
295
|
-
conditions_result=condition_results
|
420
|
+
conditions_result=condition_results,
|
421
|
+
notification=notification_config,
|
422
|
+
metadata=example_metadata or {}
|
296
423
|
)
|
297
424
|
|
298
|
-
# Add example metadata if provided
|
299
|
-
if example_metadata:
|
300
|
-
if "example_id" in example_metadata:
|
301
|
-
alert_result.metadata["example_id"] = example_metadata["example_id"]
|
302
|
-
if "timestamp" in example_metadata:
|
303
|
-
alert_result.metadata["timestamp"] = example_metadata["timestamp"]
|
304
|
-
|
305
425
|
results[rule_id] = alert_result
|
306
426
|
|
307
427
|
return results
|
@@ -376,7 +496,4 @@ class RulesEngine:
|
|
376
496
|
)
|
377
497
|
end_time = time.perf_counter()
|
378
498
|
|
379
|
-
# Could log performance metrics here if needed
|
380
|
-
# debug(f"Rule evaluation for example {example_id} took {end_time - start_time:.4f} seconds")
|
381
|
-
|
382
499
|
return (example_id, rule_results)
|