judgeval 0.0.27__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {judgeval-0.0.27 → judgeval-0.0.28}/PKG-INFO +1 -1
  2. judgeval-0.0.28/docs/alerts/notifications.mdx +191 -0
  3. judgeval-0.0.28/docs/alerts/platform_notifications.mdx +74 -0
  4. judgeval-0.0.28/docs/alerts/rules.mdx +111 -0
  5. judgeval-0.0.28/docs/images/notifications_page.png +0 -0
  6. judgeval-0.0.28/docs/images/reports_modal.png +0 -0
  7. {judgeval-0.0.27 → judgeval-0.0.28}/docs/mint.json +8 -0
  8. {judgeval-0.0.27 → judgeval-0.0.28}/pyproject.toml +1 -1
  9. judgeval-0.0.28/src/demo/cookbooks/JNPR_Mist/test.py +21 -0
  10. judgeval-0.0.28/src/demo/cookbooks/linkd/text2sql.py +14 -0
  11. judgeval-0.0.28/src/demo/custom_example_demo/osiris_test.py +22 -0
  12. judgeval-0.0.28/src/demo/custom_example_demo/qodo_scorer.py +78 -0
  13. {judgeval-0.0.27 → judgeval-0.0.28}/src/demo/travel_agent.py +1 -1
  14. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/common/tracer.py +53 -46
  15. judgeval-0.0.28/src/judgeval/data/custom_api_example.py +91 -0
  16. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/evaluation_run.py +1 -0
  17. judgeval-0.0.28/src/test.py +21 -0
  18. {judgeval-0.0.27 → judgeval-0.0.28}/.github/workflows/ci.yaml +0 -0
  19. {judgeval-0.0.27 → judgeval-0.0.28}/.gitignore +0 -0
  20. {judgeval-0.0.27 → judgeval-0.0.28}/LICENSE.md +0 -0
  21. {judgeval-0.0.27 → judgeval-0.0.28}/Pipfile +0 -0
  22. {judgeval-0.0.27 → judgeval-0.0.28}/Pipfile.lock +0 -0
  23. {judgeval-0.0.27 → judgeval-0.0.28}/README.md +0 -0
  24. {judgeval-0.0.27 → judgeval-0.0.28}/docs/README.md +0 -0
  25. {judgeval-0.0.27 → judgeval-0.0.28}/docs/api_reference/judgment_client.mdx +0 -0
  26. {judgeval-0.0.27 → judgeval-0.0.28}/docs/api_reference/trace.mdx +0 -0
  27. {judgeval-0.0.27 → judgeval-0.0.28}/docs/development.mdx +0 -0
  28. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/code.mdx +0 -0
  29. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/images.mdx +0 -0
  30. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/markdown.mdx +0 -0
  31. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/navigation.mdx +0 -0
  32. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/reusable-snippets.mdx +0 -0
  33. {judgeval-0.0.27 → judgeval-0.0.28}/docs/essentials/settings.mdx +0 -0
  34. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/data_datasets.mdx +0 -0
  35. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/data_examples.mdx +0 -0
  36. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/introduction.mdx +0 -0
  37. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/judges.mdx +0 -0
  38. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/answer_correctness.mdx +0 -0
  39. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/answer_relevancy.mdx +0 -0
  40. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  41. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/comparison.mdx +0 -0
  42. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/contextual_precision.mdx +0 -0
  43. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/contextual_recall.mdx +0 -0
  44. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/contextual_relevancy.mdx +0 -0
  45. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  46. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/execution_order.mdx +0 -0
  47. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/faithfulness.mdx +0 -0
  48. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/groundedness.mdx +0 -0
  49. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/hallucination.mdx +0 -0
  50. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/introduction.mdx +0 -0
  51. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/json_correctness.mdx +0 -0
  52. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/scorers/summarization.mdx +0 -0
  53. {judgeval-0.0.27 → judgeval-0.0.28}/docs/evaluation/unit_testing.mdx +0 -0
  54. {judgeval-0.0.27 → judgeval-0.0.28}/docs/favicon.svg +0 -0
  55. {judgeval-0.0.27 → judgeval-0.0.28}/docs/getting_started.mdx +0 -0
  56. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/basic_trace_example.png +0 -0
  57. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/checks-passed.png +0 -0
  58. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/create_aggressive_scorer.png +0 -0
  59. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/create_scorer.png +0 -0
  60. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/evaluation_diagram.png +0 -0
  61. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/hero-dark.svg +0 -0
  62. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/hero-light.svg +0 -0
  63. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/online_eval_fault.png +0 -0
  64. {judgeval-0.0.27 → judgeval-0.0.28}/docs/images/trace_ss.png +0 -0
  65. {judgeval-0.0.27 → judgeval-0.0.28}/docs/integration/langgraph.mdx +0 -0
  66. {judgeval-0.0.27 → judgeval-0.0.28}/docs/introduction.mdx +0 -0
  67. {judgeval-0.0.27 → judgeval-0.0.28}/docs/judgment/introduction.mdx +0 -0
  68. {judgeval-0.0.27 → judgeval-0.0.28}/docs/logo/dark.svg +0 -0
  69. {judgeval-0.0.27 → judgeval-0.0.28}/docs/logo/light.svg +0 -0
  70. {judgeval-0.0.27 → judgeval-0.0.28}/docs/monitoring/introduction.mdx +0 -0
  71. {judgeval-0.0.27 → judgeval-0.0.28}/docs/monitoring/production_insights.mdx +0 -0
  72. {judgeval-0.0.27 → judgeval-0.0.28}/docs/monitoring/tracing.mdx +0 -0
  73. {judgeval-0.0.27 → judgeval-0.0.28}/docs/notebooks/create_dataset.ipynb +0 -0
  74. {judgeval-0.0.27 → judgeval-0.0.28}/docs/notebooks/create_scorer.ipynb +0 -0
  75. {judgeval-0.0.27 → judgeval-0.0.28}/docs/notebooks/demo.ipynb +0 -0
  76. {judgeval-0.0.27 → judgeval-0.0.28}/docs/notebooks/prompt_scorer.ipynb +0 -0
  77. {judgeval-0.0.27 → judgeval-0.0.28}/docs/notebooks/quickstart.ipynb +0 -0
  78. {judgeval-0.0.27 → judgeval-0.0.28}/docs/quickstart.mdx +0 -0
  79. {judgeval-0.0.27 → judgeval-0.0.28}/docs/snippets/snippet-intro.mdx +0 -0
  80. {judgeval-0.0.27 → judgeval-0.0.28}/pytest.ini +0 -0
  81. {judgeval-0.0.27 → judgeval-0.0.28}/src/demo/demo.py +0 -0
  82. {judgeval-0.0.27 → judgeval-0.0.28}/src/demo/new_trace/example_complex_async.py +0 -0
  83. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/__init__.py +0 -0
  84. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/clients.py +0 -0
  85. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/common/__init__.py +0 -0
  86. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/common/exceptions.py +0 -0
  87. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/common/logger.py +0 -0
  88. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/common/utils.py +0 -0
  89. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/constants.py +0 -0
  90. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/__init__.py +0 -0
  91. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/datasets/__init__.py +0 -0
  92. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/datasets/dataset.py +0 -0
  93. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/datasets/eval_dataset_client.py +0 -0
  94. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/example.py +0 -0
  95. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/result.py +1 -1
  96. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/data/scorer_data.py +0 -0
  97. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/integrations/langgraph.py +0 -0
  98. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/__init__.py +0 -0
  99. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/base_judge.py +0 -0
  100. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/litellm_judge.py +0 -0
  101. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/mixture_of_judges.py +0 -0
  102. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/together_judge.py +0 -0
  103. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judges/utils.py +0 -0
  104. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/judgment_client.py +0 -0
  105. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/rules.py +0 -0
  106. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/run_evaluation.py +0 -0
  107. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/__init__.py +0 -0
  108. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/api_scorer.py +0 -0
  109. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/base_scorer.py +0 -0
  110. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/exceptions.py +0 -0
  111. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorer.py +0 -0
  112. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  113. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  114. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  115. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  116. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
  117. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  118. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  119. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  120. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  121. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  122. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
  123. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  124. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  125. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  126. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  127. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  128. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  129. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  130. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -0
  131. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  132. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -0
  133. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -0
  134. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  135. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -0
  136. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -0
  137. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  138. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -0
  139. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -0
  140. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  141. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  142. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -0
  143. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  144. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  145. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -0
  146. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  147. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  148. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -0
  149. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -0
  150. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -0
  151. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  152. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -0
  153. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  154. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  155. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -0
  156. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -0
  157. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -0
  158. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -0
  159. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  160. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -0
  161. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  162. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  163. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -0
  164. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/prompt_scorer.py +0 -0
  165. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/score.py +0 -0
  166. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/scorers/utils.py +0 -0
  167. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/tracer/__init__.py +0 -0
  168. {judgeval-0.0.27 → judgeval-0.0.28}/src/judgeval/utils/alerts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.27
3
+ Version: 0.0.28
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -0,0 +1,191 @@
1
+ ---
2
+ title: 'Notifications'
3
+ description: 'Get alerted when your rules trigger through multiple communication channels'
4
+ ---
5
+
6
+ # Notifications
7
+
8
+ Notifications allow you to receive alerts through various communication channels when your [rules](/alerts/rules) are triggered. This feature helps you stay informed about potential issues with your AI system's performance in real-time.
9
+
10
+ ## Overview
11
+
12
+ The notification system works with [rules](/alerts/rules) to:
13
+
14
+ 1. Monitor your evaluation metrics
15
+ 2. Check if they meet your defined [conditions](/alerts/rules#conditions)
16
+ 3. Send alerts through your preferred channels when conditions are met
17
+
18
+ Notifications can be configured globally or per rule, allowing you to customize how you're alerted based on the specific rule that was triggered.
19
+
20
+ <Warning>
21
+ Rules and notifications only work with built-in APIScorers. Local scorers and custom scorers are not supported for triggering notifications.
22
+ </Warning>
23
+
24
+ ## Notification Configuration
25
+
26
+ Notifications are configured using the `NotificationConfig` class from the `judgeval.rules` module.
27
+
28
+ ### Configuration Options
29
+
30
+ | Parameter | Type | Description |
31
+ |-----------|------|-------------|
32
+ | `enabled` | boolean | Whether notifications are enabled (default: `True`) |
33
+ | `communication_methods` | list of strings | The methods to use for sending notifications (e.g., `["email", "slack"]`) |
34
+ | `email_addresses` | list of strings | Email addresses to send notifications to |
35
+ | `send_at` | integer (Unix timestamp) | Schedule notifications for a specific time ([learn more](#scheduled-notifications)) |
36
+
37
+ <Note>
38
+ For aggregated reports and periodic summaries of multiple alerts, use the [Scheduled Reports feature](/alerts/platform_notifications#scheduled-reports-recaps) in the Judgment Platform.
39
+ </Note>
40
+
41
+ ### Basic Configuration
42
+
43
+ ```python
44
+ from judgeval.rules import NotificationConfig
45
+
46
+ # Create a notification configuration
47
+ notification_config = NotificationConfig(
48
+ enabled=True,
49
+ communication_methods=["slack", "email"],
50
+ email_addresses=["user@example.com"],
51
+ send_at=None # Send immediately
52
+ )
53
+ ```
54
+
55
+ ## Communication Methods
56
+
57
+ Judgeval supports multiple communication methods for notifications:
58
+
59
+ - `"email"`: Send emails to specified email addresses
60
+ - `"slack"`: Send messages to configured Slack channels
61
+
62
+ You can configure multiple methods to be used simultaneously.
63
+
64
+ ## Slack Integration
65
+
66
+ For detailed information on integrating Slack with Judgment notifications, see the [Platform Notification Center documentation](/alerts/platform_notifications#slack-integration).
67
+
68
+ ## Attaching Notifications to Rules
69
+
70
+ Notifications can be attached to [rules](/alerts/rules) during rule creation or added/configured later.
71
+
72
+ ### During Rule Creation
73
+
74
+ ```python
75
+ from judgeval.rules import Rule, Condition, NotificationConfig
76
+ from judgeval.scorers import FaithfulnessScorer
77
+
78
+ # Create notification config
79
+ notification_config = NotificationConfig(
80
+ enabled=True,
81
+ communication_methods=["slack", "email"],
82
+ email_addresses=["user@example.com"]
83
+ )
84
+
85
+ # Create rule with notification config
86
+ rule = Rule(
87
+ name="Faithfulness Check",
88
+ description="Check if faithfulness meets threshold",
89
+ conditions=[
90
+ # Note: Only built-in APIScorers are supported
91
+ Condition(metric=FaithfulnessScorer(threshold=0.7))
92
+ ],
93
+ combine_type="all", # Trigger when all conditions fail (see Combine Types in Rules documentation)
94
+ notification=notification_config
95
+ )
96
+ ```
97
+
98
+ ## Scheduled Notifications
99
+
100
+ You can schedule one-time notifications to be sent at a specific time using the `send_at` parameter:
101
+
102
+ ```python
103
+ from judgeval.rules import NotificationConfig
104
+ import time
105
+
106
+ # Schedule notification for 1 hour from now
107
+ one_hour_from_now = int(time.time()) + 3600
108
+
109
+ notification_config = NotificationConfig(
110
+ enabled=True,
111
+ communication_methods=["email"],
112
+ email_addresses=["user@example.com"],
113
+ send_at=one_hour_from_now
114
+ )
115
+ ```
116
+
117
+ The `send_at` parameter accepts a Unix timestamp (integer) that specifies when the notification should be sent. This is useful for delaying notifications or grouping them to be sent at a specific time of day.
118
+
119
+ <Warning>
120
+ The `send_at` parameter only delays when a single notification is sent. It doesn't create recurring notifications or group multiple alerts together. Each time a rule is triggered, a separate notification is generated.
121
+ </Warning>
122
+
123
+ ## Notification Types in the Platform
124
+
125
+ The Judgment Platform offers two main types of notifications:
126
+
127
+ 1. **Evaluation Alerts** - Real-time notifications sent when specific rules are triggered. When using the API, these can be scheduled for a specific time using the `send_at` parameter.
128
+
129
+ 2. **Custom Alert Recaps** - Periodic summaries (daily, weekly, monthly) of evaluation metrics and alerts. These are configured in the [Platform Notification Center](/alerts/platform_notifications).
130
+
131
+ ### Setting Up Custom Alert Recaps
132
+
133
+ To set up periodic notification summaries:
134
+
135
+ 1. Navigate to the Notifications page in your Judgment account settings
136
+ 2. Under "Custom Alert Recaps," click the "+" button to create a new report
137
+ 3. Configure your preferred frequency (Daily, Weekly, Monthly) and delivery time
138
+ 4. Add recipient email addresses
139
+
140
+ For more details, see the [Scheduled Reports](/alerts/platform_notifications#scheduled-reports-recaps) documentation.
141
+
142
+ ## Judgment Platform Features
143
+
144
+ For information about configuring notifications in the Judgment web platform, including email alerts, scheduled reports, and Slack integration, see the [Platform Notification Center](/alerts/platform_notifications) documentation.
145
+
146
+ ## Practical Example
147
+
148
+ Here's a complete example showing how to set up rules with notifications and integrate them with the Tracer:
149
+
150
+ ```python
151
+ import os
152
+ from judgeval.common.tracer import Tracer, wrap
153
+ from judgeval.scorers import FaithfulnessScorer, AnswerRelevancyScorer
154
+ from judgeval.rules import Rule, Condition, NotificationConfig
155
+ from openai import OpenAI
156
+
157
+ # Create notification config
158
+ notification_config = NotificationConfig(
159
+ enabled=True,
160
+ communication_methods=["slack", "email"],
161
+ email_addresses=["alerts@example.com"],
162
+ send_at=None # Send immediately
163
+ )
164
+
165
+ # Create rules with notification config
166
+ rules = [
167
+ Rule(
168
+ name="Quality Check",
169
+ description="Check if all quality metrics meet thresholds",
170
+ conditions=[
171
+ # Only built-in APIScorers can be used as metrics
172
+ Condition(metric=FaithfulnessScorer(threshold=0.7)),
173
+ Condition(metric=AnswerRelevancyScorer(threshold=0.8))
174
+ ],
175
+ combine_type="all", # Trigger when all conditions fail
176
+ notification=notification_config
177
+ )
178
+ ]
179
+
180
+ # Initialize tracer with rules for notifications
181
+ judgment = Tracer(
182
+ api_key=os.getenv("JUDGMENT_API_KEY"),
183
+ project_name="my_project",
184
+ rules=rules
185
+ )
186
+
187
+ # Wrap OpenAI client for tracing
188
+ client = wrap(OpenAI())
189
+
190
+ # Now any evaluations that trigger the rules will send notifications
191
+ ```
@@ -0,0 +1,74 @@
1
+ ---
2
+ title: 'Platform Notification Center'
3
+ description: 'Configure and manage notifications through the Judgment web interface'
4
+ ---
5
+
6
+ # Platform Notification Center
7
+
8
+ The Judgment Platform provides a comprehensive notification system through its web interface, allowing you to configure email notifications, scheduled reports, and app integrations like Slack.
9
+
10
+ <Frame>
11
+ <img src="/images/notifications_page.png" alt="Notifications Settings Page" />
12
+ </Frame>
13
+
14
+ ## Slack Integration
15
+
16
+ Judgment allows you to receive notifications directly in your Slack workspace.
17
+
18
+ ### Connecting Slack
19
+
20
+ 1. Navigate to the Notifications page in your Judgment account settings
21
+ 2. In the "App Integrations" section, find the Slack card
22
+ 3. Click the "Connect" button
23
+ 4. You'll be redirected to Slack's authorization page
24
+ 5. Select the workspace you want to connect and authorize the Judgment application
25
+ 6. Once connected, you'll be redirected back to Judgment
26
+
27
+ ### Slack Notification Features
28
+
29
+ After connecting Slack:
30
+
31
+ - Receive real-time alerts when evaluation rules are triggered
32
+ - Get notifications about model performance issues
33
+ - Track Judgment activity in your Slack channels
34
+
35
+ ### Managing Slack Notifications
36
+
37
+ Once connected, you can:
38
+
39
+ - Disconnect your Slack workspace at any time
40
+ - Add specific channels for different types of notifications
41
+ - Configure which notifications are sent to Slack
42
+
43
+ ## Email Notifications
44
+
45
+ In the Notifications settings page, you can configure:
46
+
47
+ 1. **Evaluation Alerts** - Receive real-time email notifications whenever an evaluation alert is triggered
48
+ 2. **Custom Alert Recaps** - Receive periodic email summaries of evaluations, traces, and metric scores
49
+
50
+ ## Scheduled Reports (Recaps)
51
+
52
+ You can create custom scheduled reports to receive regular updates on your agent's performance.
53
+
54
+ ### Creating a Report
55
+
56
+ 1. Navigate to the Notifications page in your Judgment account settings
57
+ 2. Under "Custom Alert Recaps," click the "+" button to create a new report
58
+ 3. Configure your report with the following options:
59
+
60
+ <Frame>
61
+ <img src="/images/reports_modal.png" alt="Scheduled Reports Modal" />
62
+ </Frame>
63
+
64
+ | Setting | Description |
65
+ |---------|-------------|
66
+ | Report Name | A descriptive name for your report (e.g., "Daily Alert Summary") |
67
+ | Recipient Emails | Email addresses that will receive the report |
68
+ | Frequency | How often the report should be sent (Daily, Weekly, Monthly) |
69
+ | Select Days | For weekly reports, specify which days of the week |
70
+ | Time | When the report should be sent |
71
+ | Timezone | Your local timezone for accurate scheduling |
72
+ | Compare to Previous Period | Enable to see performance changes over time |
73
+
74
+ Your reports will be sent automatically based on your schedule settings, providing insights into your model's performance over time.
@@ -0,0 +1,111 @@
1
+ ---
2
+ title: 'Rules'
3
+ description: 'Define custom triggers and conditions for your evaluation metrics'
4
+ ---
5
+
6
+ # Rules
7
+
8
+ Rules allow you to define specific conditions for your evaluation metrics that can trigger alerts and [notifications](/alerts/notifications) when met. They serve as the foundation for the alerting system and help you monitor your AI system's performance against predetermined thresholds.
9
+
10
+ ## Overview
11
+
12
+ A rule consists of one or more [conditions](#conditions), each tied to a specific metric, that is supported by our Scorer (like Faithfulness or AnswerRelevancy). When evaluations are performed, the rules engine checks if the measured scores satisfy the conditions set in your rules. Based on the rule's configuration, alerts can be triggered and notifications sent through various channels.
13
+
14
+ <Note>
15
+ Rules and notifications only work with built-in APIScorers. Local scorers and custom scorers are not supported for triggering rules.
16
+ </Note>
17
+
18
+ ## Creating Rules
19
+
20
+ Rules can be created using the `Rule` class from the `judgeval.rules` module. Each rule requires:
21
+
22
+ - A name
23
+ - A list of [conditions](#conditions)
24
+ - A [combine type](#combine-types) (how conditions should be evaluated together)
25
+
26
+ Optional parameters include:
27
+ - A description
28
+ - [Notification configuration](/alerts/notifications#notification-configuration)
29
+
30
+ ### Basic Rule Structure
31
+
32
+ ```python
33
+ from judgeval.rules import Rule, Condition
34
+ from judgeval.scorers import FaithfulnessScorer, AnswerRelevancyScorer
35
+
36
+ # Create a rule
37
+ rule = Rule(
38
+ name="Quality Check",
39
+ description="Check if quality metrics meet thresholds",
40
+ conditions=[
41
+ Condition(metric=FaithfulnessScorer(threshold=0.7)),
42
+ Condition(metric=AnswerRelevancyScorer(threshold=0.8))
43
+ ],
44
+ combine_type="all" # "all" = AND, "any" = OR
45
+ )
46
+ ```
47
+
48
+ ## Conditions
49
+
50
+ Conditions are the building blocks of rules. Each condition specifies a metric (must be a built-in APIScorer like FaithfulnessScorer or AnswerRelevancyScorer). The condition is met when the score for that metric is greater than or equal to the threshold specified in the scorer.
51
+
52
+ ### Creating Conditions
53
+
54
+ ```python
55
+ from judgeval.rules import Condition
56
+ from judgeval.scorers import FaithfulnessScorer
57
+
58
+ # Create a condition that passes when faithfulness score is greater than or equal to 0.7
59
+ condition = Condition(
60
+ metric=FaithfulnessScorer(threshold=0.7)
61
+ )
62
+ ```
63
+
64
+ ### How Conditions are Evaluated
65
+
66
+ When a condition is evaluated, it uses the scorer's threshold and internal evaluation logic:
67
+
68
+ 1. By default, a condition passes when the actual score is greater than or equal to the threshold
69
+ 2. If the scorer has a custom `success_check()` method, that method will be used instead
70
+ 3. The threshold is retrieved from the scorer's `threshold` attribute
71
+
72
+ ## Combine Types
73
+
74
+ Rules support two combine types that determine how multiple conditions are evaluated:
75
+
76
+ - `"all"`: The rule triggers when all conditions fail (logical AND)
77
+ - `"any"`: The rule triggers when any condition fails (logical OR)
78
+
79
+ This design is meant for setting up alerts that trigger when your metrics indicate a problem with your AI system's performance.
80
+
81
+ ## Using Rules with the Tracer
82
+
83
+ Rules are most commonly used with the `Tracer` to monitor your AI system's performance:
84
+
85
+ ```python
86
+ from judgeval.common.tracer import Tracer
87
+ from judgeval.rules import Rule, Condition
88
+ from judgeval.scorers import FaithfulnessScorer, AnswerRelevancyScorer
89
+
90
+ # Create rules
91
+ rules = [
92
+ Rule(
93
+ name="Quality Check",
94
+ description="Check if quality metrics meet thresholds",
95
+ conditions=[
96
+ Condition(metric=FaithfulnessScorer(threshold=0.7)),
97
+ Condition(metric=AnswerRelevancyScorer(threshold=0.8))
98
+ ],
99
+ combine_type="all" # Trigger when all conditions fail
100
+ )
101
+ ]
102
+
103
+ # Initialize tracer with rules
104
+ judgment = Tracer(
105
+ api_key="your_api_key",
106
+ project_name="your_project",
107
+ rules=rules
108
+ )
109
+ ```
110
+
111
+ For more information on configuring notifications with rules, see the [Notifications documentation](/alerts/notifications#attaching-notifications-to-rules).
@@ -89,6 +89,14 @@
89
89
  "integration/langgraph"
90
90
  ]
91
91
  },
92
+ {
93
+ "group": "Alerts",
94
+ "pages": [
95
+ "alerts/rules",
96
+ "alerts/notifications",
97
+ "alerts/platform_notifications"
98
+ ]
99
+ },
92
100
  {
93
101
  "group": "Judgment Platform",
94
102
  "pages": [
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.27"
3
+ version = "0.0.28"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -0,0 +1,21 @@
1
+ from judgeval import JudgmentClient
2
+ from judgeval.data import Example
3
+ from judgeval.scorers import FaithfulnessScorer
4
+
5
+ client = JudgmentClient()
6
+
7
+ example = Example(
8
+ input="What if these shoes don't fit?",
9
+ actual_output="We offer a 30-day full refund at no extra cost.",
10
+ retrieval_context=["All customers are eligible for a 30 day full refund at no extra cost."],
11
+ )
12
+
13
+ scorer = FaithfulnessScorer(threshold=0.5)
14
+ results = client.run_evaluation(
15
+ examples=[example],
16
+ scorers=[scorer],
17
+ model="gpt-4o",
18
+ eval_run_name="TestRun",
19
+ project_name="TestProject",
20
+ )
21
+ print(results)
@@ -0,0 +1,14 @@
1
+ """
2
+ ClassifierScorer implementation for basic Text-to-SQL evaluation.
3
+
4
+ Takes a natural language query, a corresponding LLM-generated SQL query, and a table schema + (optional) metadata.
5
+ Determines if the LLM-generated SQL query is valid and works for the natural language query.
6
+ """
7
+ from judgeval.scorers import ClassifierScorer
8
+ from judgeval import JudgmentClient
9
+ from judgeval.scorers.judgeval_scorers.classifiers.text2sql.text2sql_scorer import Text2SQLScorer
10
+
11
+ judgment_client = JudgmentClient()
12
+
13
+ print(judgment_client.push_classifier_scorer(Text2SQLScorer, slug="text2sql-eric-linkd"))
14
+ print(judgment_client.fetch_classifier_scorer("text2sql-eric-linkd"))
@@ -0,0 +1,22 @@
1
+
2
+ from judgeval.data import CustomExample
3
+ from judgeval import JudgmentClient
4
+ from qodo_scorer import QodoScorer
5
+
6
+ judgment = JudgmentClient()
7
+
8
+ custom_example = CustomExample(
9
+ code="print('Hello, world!')",
10
+ original_code="print('Hello, world!')",
11
+ )
12
+
13
+ qodo_scorer = QodoScorer()
14
+ results = judgment.run_evaluation(
15
+ examples=[custom_example],
16
+ scorers=[qodo_scorer],
17
+ model="gpt-4o",
18
+ project_name="QoDoDemo",
19
+ eval_run_name="QoDoDemoRun1",
20
+ )
21
+
22
+ print(f"{results=}")
@@ -0,0 +1,78 @@
1
+
2
+ from judgeval.data import Example
3
+ from judgeval.common.tracer import Tracer, wrap
4
+ from judgeval.scorers import JudgevalScorer, AnswerCorrectnessScorer
5
+ from judgeval import JudgmentClient
6
+ from openai import OpenAI, AsyncOpenAI
7
+ import os
8
+
9
+ client = OpenAI()
10
+ async_client = AsyncOpenAI()
11
+
12
+
13
+ class QodoScorer(JudgevalScorer):
14
+
15
+ def __init__(self,
16
+ threshold=0.5,
17
+ score_type="CodeReviewScorer",
18
+ include_reason=True,
19
+ async_mode=True,
20
+ strict_mode=False,
21
+ verbose_mode=True):
22
+ super().__init__(
23
+ threshold=threshold,
24
+ score_type=score_type,
25
+ include_reason=include_reason,
26
+ async_mode=async_mode,
27
+ strict_mode=strict_mode,
28
+ verbose_mode=verbose_mode)
29
+
30
+ def score_example(self, example: Example) -> float:
31
+ """
32
+ Score the trace based on the code review criteria.
33
+ """
34
+
35
+ response = client.chat.completions.create(
36
+ model="gpt-4o",
37
+ messages=[
38
+ {"role": "system", "content": "You are a QoDo reviewer. You will be given CODE, a PR_REQUEST and QoDo's improved summary of the PR_REQUEST as well as its review of the PR_REQUEST given as PR_QUALITY. Your job is to review the CODE and PR_REQUEST and determine how factually accurate and thorough QoDo is. Give reasoning for why or why not you think the QoDo's review if accurate and thorough."},
39
+ {"role": "user", "content": f"INPUT: {example.input}, CONTEXT: {example.context}, QoDo's REViEW: {example.actual_output}"},
40
+ ],
41
+ )
42
+ self.reason = response.choices[0].message.content
43
+
44
+ score_response = client.chat.completions.create(
45
+ model="gpt-4o",
46
+ messages=[
47
+ {"role": "system",
48
+ "content": "You are a judge, you will be given a review of the performance of Qodo (a code review tool) on the accuracy and thoroughness of its review of a PR_REQUEST given as PR_QUALITY. Your job is to give a score from 0 to 1 on how well Qodo performed based on the REVIEW given to you. Do not output anything except the score."},
49
+ {"role": "user", "content": f"REVIEW: {self.reason}"},
50
+ ],
51
+ )
52
+ self.score = float(score_response.choices[0].message.content)
53
+ return self.score
54
+
55
+ async def a_score_example(self, example: Example) -> float:
56
+ """
57
+ Score the trace based on the code review criteria.
58
+ """
59
+ # In this case, the async implementation is the same as the sync one
60
+ # In a real scenario, you might want to use async APIs for better performance
61
+ response = await async_client.chat.completions.create(
62
+ model="gpt-4o",
63
+ messages=[
64
+ {"role": "system", "content": "You are a QoDo reviewer. You will be given CODE, a PR_REQUEST and QoDo's improved summary of the PR_REQUEST as well as its review of the PR_REQUEST given as PR_QUALITY. Your job is to review the CODE and PR_REQUEST and determine how factually accurate and thorough QoDo is. Give reasoning for why or why not you think the QoDo's review if accurate and thorough."},
65
+ {"role": "user", "content": f"INPUT: {example.input}, CONTEXT: {example.context}, QoDo's REViEW: {example.actual_output}"},
66
+ ],
67
+ )
68
+ self.score = 1.0
69
+ return self.score_example(example)
70
+
71
+ def _success_check(self):
72
+ if self.error is not None:
73
+ return False
74
+ return self.score >= self.threshold
75
+
76
+ @property
77
+ def __name__(self):
78
+ return "Qodo Scorer"
@@ -84,7 +84,7 @@ Key Information:
84
84
  ]
85
85
 
86
86
  client = wrap(openai.Client(api_key=os.getenv("OPENAI_API_KEY")))
87
- judgment = Tracer(api_key=os.getenv("JUDGMENT_API_KEY"), project_name="travel_agent_demo", enable_evaluations=False, enable_monitoring=False)
87
+ judgment = Tracer(api_key=os.getenv("JUDGMENT_API_KEY"), project_name="travel_agent_demo")
88
88
 
89
89
  def populate_vector_db(collection, destinations_data):
90
90
  """