judgeval 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. judgeval/common/logger.py +46 -199
  2. judgeval/common/s3_storage.py +2 -6
  3. judgeval/common/tracer.py +182 -262
  4. judgeval/common/utils.py +16 -36
  5. judgeval/constants.py +14 -20
  6. judgeval/data/__init__.py +0 -2
  7. judgeval/data/datasets/dataset.py +6 -10
  8. judgeval/data/datasets/eval_dataset_client.py +25 -27
  9. judgeval/data/example.py +5 -138
  10. judgeval/data/judgment_types.py +214 -0
  11. judgeval/data/result.py +7 -25
  12. judgeval/data/scorer_data.py +28 -40
  13. judgeval/data/scripts/fix_default_factory.py +23 -0
  14. judgeval/data/scripts/openapi_transform.py +123 -0
  15. judgeval/data/tool.py +3 -54
  16. judgeval/data/trace.py +31 -50
  17. judgeval/data/trace_run.py +3 -3
  18. judgeval/evaluation_run.py +16 -23
  19. judgeval/integrations/langgraph.py +11 -12
  20. judgeval/judges/litellm_judge.py +3 -6
  21. judgeval/judges/mixture_of_judges.py +8 -25
  22. judgeval/judges/together_judge.py +3 -6
  23. judgeval/judgment_client.py +22 -24
  24. judgeval/rules.py +7 -19
  25. judgeval/run_evaluation.py +79 -242
  26. judgeval/scorers/__init__.py +4 -20
  27. judgeval/scorers/agent_scorer.py +21 -0
  28. judgeval/scorers/api_scorer.py +28 -38
  29. judgeval/scorers/base_scorer.py +98 -0
  30. judgeval/scorers/example_scorer.py +19 -0
  31. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
  32. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
  34. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
  35. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
  36. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
  37. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
  38. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
  40. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
  41. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
  42. judgeval/scorers/score.py +45 -330
  43. judgeval/scorers/utils.py +6 -88
  44. judgeval/utils/file_utils.py +4 -6
  45. judgeval/version_check.py +3 -2
  46. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/METADATA +3 -2
  47. judgeval-0.0.53.dist-info/RECORD +65 -0
  48. judgeval/data/custom_example.py +0 -19
  49. judgeval/scorers/judgeval_scorer.py +0 -177
  50. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
  51. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
  52. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
  53. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
  54. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
  55. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
  56. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
  57. judgeval/scorers/prompt_scorer.py +0 -296
  58. judgeval-0.0.51.dist-info/RECORD +0 -69
  59. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/WHEEL +0 -0
  60. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.51
3
+ Version: 0.0.53
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: anthropic
14
14
  Requires-Dist: boto3
15
+ Requires-Dist: datamodel-code-generator>=0.31.1
15
16
  Requires-Dist: google-genai
16
17
  Requires-Dist: langchain-anthropic
17
18
  Requires-Dist: langchain-core
@@ -51,7 +52,7 @@ We're hiring! Join us in our mission to enable self-learning agents by providing
51
52
 
52
53
  </div>
53
54
 
54
- Judgeval offers **open-source tooling** for tracing, evaluating, and monitoring LLM agents. **Provides comprehensive data from agent-environment interactions** for continuous learning and self-improvement—**enabling the future of autonomous agents**.
55
+ Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
55
56
 
56
57
  ## 🎬 See Judgeval in Action
57
58
 
@@ -0,0 +1,65 @@
1
+ judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
2
+ judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
3
+ judgeval/constants.py,sha256=lqPVUR7XAr1zbmByJil3i0eY24ymWGzcgg88Npk-U20,5772
4
+ judgeval/evaluation_run.py,sha256=B5w6UiB2cu8km93p4XT3jtganOtIKAZJI3UKc5Qgrew,2936
5
+ judgeval/judgment_client.py,sha256=QT6jV1moshs_-1xjX8jAhQpr9vjznqqcXuobQ7eDBks,21343
6
+ judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
7
+ judgeval/run_evaluation.py,sha256=WXQi2AIKu_iPSLZWnhgLarVbHE6nzyjHJcbKSHu3zYc,42568
8
+ judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
9
+ judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
10
+ judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
11
+ judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
12
+ judgeval/common/s3_storage.py,sha256=UvAKGSa0S1BnNprzDKHMAfyT-8zlMAOM5kCrXcVN0HE,3743
13
+ judgeval/common/tracer.py,sha256=qrvriShLG6INpE58sAhlQ6YZfZa3TtfJfsP-cVDyBe4,126135
14
+ judgeval/common/utils.py,sha256=wkdBg86OHROQBXpIPtMyNku5cGckwPpaiATeuilLNbE,34304
15
+ judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
16
+ judgeval/data/example.py,sha256=6xtPTwWUsZ0HdErU-g954nCv64fsbnS1I5xuEvs14EA,2027
17
+ judgeval/data/judgment_types.py,sha256=VM941NM7_uqwx6bKABV1cH2cocuYgclfORxCK3sPQZo,9853
18
+ judgeval/data/result.py,sha256=7FFD9kOla6ijvu2-Wx3tFE98Ry7ECeV-f8aiDeHNaHs,2449
19
+ judgeval/data/scorer_data.py,sha256=ty4clGts-Zp6NiU1SZXKbrVsyKvHhD5Tm1kbXx6we1k,2977
20
+ judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
21
+ judgeval/data/trace.py,sha256=szugEHAb2R0YljmBQllQEVE5pOlBUC6eOSzbm_WXf-Y,4830
22
+ judgeval/data/trace_run.py,sha256=kovRZduC0l-9nM5YWM6lKaQNEVy_WtHwt4lvIwPbHvY,1825
23
+ judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
24
+ judgeval/data/datasets/dataset.py,sha256=dDmTYSBRj4YEUhgYOebAcDm4N14nj3tcCqHj9y2Z1z0,12725
25
+ judgeval/data/datasets/eval_dataset_client.py,sha256=0XS8irOA-gI1aEX3hk0LikzLjb6DOLuj18j2w64BoQM,12614
26
+ judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
27
+ judgeval/data/scripts/openapi_transform.py,sha256=mT8qrzhvtMrMMC6Q_amSOGt-X-hUbDlT3xvpgEfcuEs,3828
28
+ judgeval/integrations/langgraph.py,sha256=WuaHqer8i2QV_yZWoB18RNDLAYeH_Z_quVERvTOySQU,36151
29
+ judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
30
+ judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
31
+ judgeval/judges/litellm_judge.py,sha256=LX4_KXb1Jp8IXif3vvOiKfRYH7ZkbQLs9AtWPGmj544,2483
32
+ judgeval/judges/mixture_of_judges.py,sha256=wcHwLi9zU0uwKMqRVhcPdjiYKgWflX4dpUbU2kS9yg0,14825
33
+ judgeval/judges/together_judge.py,sha256=r5k8ZcC6lnsFttGHhrocFtmglx2Cb3G-4ORKAeK-Nmw,2253
34
+ judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
35
+ judgeval/scorers/__init__.py,sha256=7f_zsJV29gO_u4o0n2011SovJ1ZGAI5Zk11WPUBPWcs,858
36
+ judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
37
+ judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
38
+ judgeval/scorers/base_scorer.py,sha256=rZfRPolxbsghWS0-FMqXrbJKuLobysMGjAeZkqn0cr0,3581
39
+ judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
40
+ judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
41
+ judgeval/scorers/score.py,sha256=oQC6LMsalL4XAtXlA3S84MB9YiHjqYIgMhRRi-zaXJ4,6577
42
+ judgeval/scorers/utils.py,sha256=I13XwyBKMUpZK2oacgkwaieUOGlQbKxKKn6SdiA4lmE,4532
43
+ judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=saQmMol_CMzp1yovjgiF3YYhLTu-4O9xtmhygj1LRh8,1496
45
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
46
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
47
+ judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py,sha256=rbG80J88cer7yfVRvLpu-x2cdwiTl-ztnF2wgOoIlcE,2624
48
+ judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
49
+ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBdkOo3fdPVcoWfUkeN-FTX3t3-bErMjdqBXdk,1361
50
+ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
51
+ judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
52
+ judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
53
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
54
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
55
+ judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
56
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
57
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=gloLzThkFsr8sHQargDAH8XaDrlF6OCuc_69hyNslFU,2589
58
+ judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
59
+ judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
60
+ judgeval/utils/file_utils.py,sha256=wIEn8kjM0WrP216RGU_yhZhFOMWIS5ckigyHbzFSOMk,1774
61
+ judgeval/utils/requests.py,sha256=rbmZTaiyWI8t2YUkhk11SIe3dF7j2j25L1BuFp_1PII,770
62
+ judgeval-0.0.53.dist-info/METADATA,sha256=dwUw2htkiBkwbwWouoezCpKM-g5IsOkMr0KRgKytMQg,54767
63
+ judgeval-0.0.53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
64
+ judgeval-0.0.53.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
65
+ judgeval-0.0.53.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import Optional, List, Dict, Any
3
- from uuid import uuid4
4
-
5
-
6
- class CustomExample(BaseModel):
7
- input: Optional[Dict[str, Any]] = None
8
- actual_output: Optional[Dict[str, Any]] = None
9
- expected_output: Optional[Dict[str, Any]] = None
10
- context: Optional[List[str]] = None
11
- retrieval_context: Optional[List[str]] = None
12
- additional_metadata: Optional[Dict[str, Any]] = None
13
- tools_called: Optional[List[str]] = None
14
- expected_tools: Optional[List[str]] = None
15
- name: Optional[str] = None
16
- example_id: str = Field(default_factory=lambda: str(uuid4()))
17
- example_index: Optional[int] = None
18
- timestamp: Optional[str] = None
19
- trace_id: Optional[str] = None
@@ -1,177 +0,0 @@
1
- """
2
- Judgeval Scorer class
3
-
4
- Enables client to create custom scorers that do not fall under any of the ready-made Judgment scorers.
5
- To create a custom scorer, extend this class and implement the `score_example`, `a_score_example`, and `success_check` methods.
6
- """
7
-
8
- from typing import Optional, Dict, Union, List
9
- from abc import abstractmethod
10
-
11
- from judgeval.common.logger import debug, info, warning, error
12
- from judgeval.judges import JudgevalJudge
13
- from judgeval.judges.utils import create_judge
14
- from judgeval.constants import UNBOUNDED_SCORERS
15
- from judgeval.data.example import ExampleParams
16
-
17
-
18
- class JudgevalScorer:
19
- """
20
- Base class for scorers in `judgeval`.
21
-
22
- In practice, you should not implement this class unless you are creating a custom scorer.
23
- Judgeval offers 10+ default scorers that you can use out of the box.
24
-
25
- If you want to create a scorer that does not fall under any of the ready-made Judgment scorers,
26
- you can create a custom scorer by extending this class.
27
- """
28
-
29
- score_type: str # name of your new scorer
30
- threshold: float # The threshold to pass a test while using this scorer as a scorer
31
- score: Optional[float] = None # The float score of the scorer run on the test case
32
- score_breakdown: Optional[Dict] = None
33
- reason: Optional[str] = (
34
- None # The reason for the score when evaluating the test case
35
- )
36
- success: Optional[bool] = None # Whether the test case passed or failed
37
- evaluation_model: Optional[str] = None # The model used to evaluate the test case
38
- strict_mode: bool = False # Whether to run the scorer in strict mode
39
- async_mode: bool = True # Whether to run the scorer in async mode
40
- verbose_mode: bool = True # Whether to run the scorer in verbose mode
41
- include_reason: bool = False # Whether to include the reason in the output
42
- custom_example: bool = False # Whether the scorer corresponds to CustomExamples
43
- error: Optional[str] = None # The error message if the scorer failed
44
- evaluation_cost: Optional[float] = None # The cost of running the scorer
45
- verbose_logs: Optional[str] = None # The verbose logs of the scorer
46
- additional_metadata: Optional[Dict] = None # Additional metadata for the scorer
47
- required_params: Optional[List[ExampleParams]] = (
48
- None # The required parameters for the scorer
49
- )
50
-
51
- def __init__(
52
- self,
53
- score_type: str,
54
- threshold: float,
55
- score: Optional[float] = None,
56
- score_breakdown: Optional[Dict] = None,
57
- reason: Optional[str] = None,
58
- success: Optional[bool] = None,
59
- evaluation_model: Optional[str] = None,
60
- required_params: Optional[List[ExampleParams]] = None,
61
- strict_mode: bool = False,
62
- async_mode: bool = True,
63
- verbose_mode: bool = True,
64
- include_reason: bool = False,
65
- custom_example: bool = False,
66
- error: Optional[str] = None,
67
- evaluation_cost: Optional[float] = None,
68
- verbose_logs: Optional[str] = None,
69
- additional_metadata: Optional[Dict] = None,
70
- ):
71
- debug(
72
- f"Initializing JudgevalScorer with score_type={score_type}, threshold={threshold}"
73
- )
74
- if score_type in UNBOUNDED_SCORERS:
75
- if threshold < 0:
76
- raise ValueError(
77
- f"Threshold for {score_type} must be greater than 0, got: {threshold}"
78
- )
79
- else:
80
- if not 0 <= threshold <= 1:
81
- raise ValueError(
82
- f"Threshold for {score_type} must be between 0 and 1, got: {threshold}"
83
- )
84
- if strict_mode:
85
- warning("Strict mode enabled - scoring will be more rigorous")
86
- info(f"JudgevalScorer initialized with evaluation_model: {evaluation_model}")
87
- self.score_type = score_type
88
- self.threshold = threshold
89
- self.score = score
90
- self.score_breakdown = score_breakdown
91
- self.reason = reason
92
- self.success = success
93
- self.evaluation_model = evaluation_model
94
- self.strict_mode = strict_mode
95
- self.async_mode = async_mode
96
- self.verbose_mode = verbose_mode
97
- self.include_reason = include_reason
98
- self.custom_example = custom_example
99
- self.error = error
100
- self.evaluation_cost = evaluation_cost
101
- self.verbose_logs = verbose_logs
102
- self.additional_metadata = additional_metadata
103
- self.required_params = required_params
104
-
105
- def _add_model(self, model: Optional[Union[str, List[str], JudgevalJudge]] = None):
106
- """
107
- Adds the evaluation model to the JudgevalScorer instance
108
-
109
- This method is used at eval time
110
- """
111
- self.model, self.using_native_model = create_judge(model)
112
- self.evaluation_model = self.model.get_model_name()
113
-
114
- @abstractmethod
115
- def score_example(self, example, *args, **kwargs) -> float:
116
- """
117
- Measures the score on a single example
118
- """
119
- warning("Attempting to call unimplemented score_example method")
120
- error("score_example method not implemented")
121
- raise NotImplementedError(
122
- "You must implement the `score` method in your custom scorer"
123
- )
124
-
125
- @abstractmethod
126
- async def a_score_example(self, example, *args, **kwargs) -> float:
127
- """
128
- Asynchronously measures the score on a single example
129
- """
130
- warning("Attempting to call unimplemented a_score_example method")
131
- error("a_score_example method not implemented")
132
- raise NotImplementedError(
133
- "You must implement the `a_score` method in your custom scorer"
134
- )
135
-
136
- @abstractmethod
137
- def _success_check(self) -> bool:
138
- """
139
- For unit testing, determines whether the test case passes or fails
140
- """
141
- warning("Attempting to call unimplemented success_check method")
142
- error("_success_check method not implemented")
143
- raise NotImplementedError(
144
- "You must implement the `_success_check` method in your custom scorer"
145
- )
146
-
147
- def __str__(self):
148
- debug("Converting JudgevalScorer instance to string representation")
149
- if self.error:
150
- warning(f"JudgevalScorer contains error: {self.error}")
151
- info(f"JudgevalScorer status - success: {self.success}, score: {self.score}")
152
- attributes = {
153
- "score_type": self.score_type,
154
- "threshold": self.threshold,
155
- "score": self.score,
156
- "score_breakdown": self.score_breakdown,
157
- "reason": self.reason,
158
- "success": self.success,
159
- "evaluation_model": self.evaluation_model,
160
- "strict_mode": self.strict_mode,
161
- "async_mode": self.async_mode,
162
- "verbose_mode": self.verbose_mode,
163
- "include_reason": self.include_reason,
164
- "error": self.error,
165
- "evaluation_cost": self.evaluation_cost,
166
- "verbose_logs": self.verbose_logs,
167
- "additional_metadata": self.additional_metadata,
168
- }
169
- return f"JudgevalScorer({attributes})"
170
-
171
- def to_dict(self):
172
- return {
173
- "score_type": str(
174
- self.score_type
175
- ), # Convert enum to string for serialization
176
- "threshold": self.threshold,
177
- }
@@ -1,45 +0,0 @@
1
- """
2
- `judgeval` comparison scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from typing import Optional, Dict
12
- from judgeval.data import ExampleParams
13
-
14
-
15
- class ComparisonScorer(APIJudgmentScorer):
16
- kwargs: Optional[Dict] = None
17
-
18
- def __init__(self, threshold: float, criteria: str, description: str):
19
- super().__init__(
20
- threshold=threshold,
21
- score_type=APIScorer.COMPARISON,
22
- required_params=[
23
- ExampleParams.INPUT,
24
- ExampleParams.ACTUAL_OUTPUT,
25
- ExampleParams.EXPECTED_OUTPUT,
26
- ],
27
- )
28
- self.kwargs = {"criteria": criteria, "description": description}
29
-
30
- @property
31
- def __name__(self):
32
- return f"Comparison-{self.kwargs['criteria']}"
33
-
34
- def to_dict(self) -> dict:
35
- """
36
- Converts the scorer configuration to a dictionary format.
37
-
38
- Returns:
39
- dict: A dictionary containing the scorer's configuration
40
- """
41
- return {
42
- "score_type": self.score_type,
43
- "threshold": self.threshold,
44
- "kwargs": self.kwargs,
45
- }
@@ -1,29 +0,0 @@
1
- """
2
- `judgeval` contextual precision scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from judgeval.data import ExampleParams
12
-
13
-
14
- class ContextualPrecisionScorer(APIJudgmentScorer):
15
- def __init__(self, threshold: float):
16
- super().__init__(
17
- threshold=threshold,
18
- score_type=APIScorer.CONTEXTUAL_PRECISION,
19
- required_params=[
20
- ExampleParams.INPUT,
21
- ExampleParams.ACTUAL_OUTPUT,
22
- ExampleParams.RETRIEVAL_CONTEXT,
23
- ExampleParams.EXPECTED_OUTPUT,
24
- ],
25
- )
26
-
27
- @property
28
- def __name__(self):
29
- return "Contextual Precision"
@@ -1,29 +0,0 @@
1
- """
2
- `judgeval` contextual recall scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from judgeval.data import ExampleParams
12
-
13
-
14
- class ContextualRecallScorer(APIJudgmentScorer):
15
- def __init__(self, threshold: float):
16
- super().__init__(
17
- threshold=threshold,
18
- score_type=APIScorer.CONTEXTUAL_RECALL,
19
- required_params=[
20
- ExampleParams.INPUT,
21
- ExampleParams.ACTUAL_OUTPUT,
22
- ExampleParams.EXPECTED_OUTPUT,
23
- ExampleParams.RETRIEVAL_CONTEXT,
24
- ],
25
- )
26
-
27
- @property
28
- def __name__(self):
29
- return "Contextual Recall"
@@ -1,32 +0,0 @@
1
- """
2
- `judgeval` contextual relevancy scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from judgeval.data import ExampleParams
12
-
13
-
14
- class ContextualRelevancyScorer(APIJudgmentScorer):
15
- """
16
- Scorer that checks if the output of a model is relevant to the retrieval context
17
- """
18
-
19
- def __init__(self, threshold: float):
20
- super().__init__(
21
- threshold=threshold,
22
- score_type=APIScorer.CONTEXTUAL_RELEVANCY,
23
- required_params=[
24
- ExampleParams.INPUT,
25
- ExampleParams.ACTUAL_OUTPUT,
26
- ExampleParams.RETRIEVAL_CONTEXT,
27
- ],
28
- )
29
-
30
- @property
31
- def __name__(self):
32
- return "Contextual Relevancy"
@@ -1,28 +0,0 @@
1
- """
2
- `judgeval` Groundedness scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from judgeval.data import ExampleParams
12
-
13
-
14
- class GroundednessScorer(APIJudgmentScorer):
15
- def __init__(self, threshold: float):
16
- super().__init__(
17
- threshold=threshold,
18
- score_type=APIScorer.GROUNDEDNESS,
19
- required_params=[
20
- ExampleParams.INPUT,
21
- ExampleParams.ACTUAL_OUTPUT,
22
- ExampleParams.RETRIEVAL_CONTEXT,
23
- ],
24
- )
25
-
26
- @property
27
- def __name__(self):
28
- return "Groundedness"
@@ -1,38 +0,0 @@
1
- """
2
- `judgeval` JSON correctness scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # External imports
9
- from pydantic import BaseModel, Field
10
-
11
- # Internal imports
12
- from judgeval.scorers.api_scorer import APIJudgmentScorer
13
- from judgeval.constants import APIScorer
14
- from judgeval.data import ExampleParams
15
-
16
-
17
- class JSONCorrectnessScorer(APIJudgmentScorer):
18
- json_schema: BaseModel = Field(None, exclude=True)
19
-
20
- def __init__(self, threshold: float, json_schema: BaseModel):
21
- super().__init__(
22
- threshold=threshold,
23
- score_type=APIScorer.JSON_CORRECTNESS,
24
- required_params=[
25
- ExampleParams.INPUT,
26
- ExampleParams.ACTUAL_OUTPUT,
27
- ],
28
- )
29
- object.__setattr__(self, "json_schema", json_schema)
30
-
31
- def to_dict(self):
32
- base_dict = super().to_dict() # Get the parent class's dictionary
33
- base_dict["kwargs"] = {"json_schema": self.json_schema.model_json_schema()}
34
- return base_dict
35
-
36
- @property
37
- def __name__(self):
38
- return "JSON Correctness"
@@ -1,27 +0,0 @@
1
- """
2
- `judgeval` summarization scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
- from judgeval.data import ExampleParams
12
-
13
-
14
- class SummarizationScorer(APIJudgmentScorer):
15
- def __init__(self, threshold: float):
16
- super().__init__(
17
- threshold=threshold,
18
- score_type=APIScorer.SUMMARIZATION,
19
- required_params=[
20
- ExampleParams.INPUT,
21
- ExampleParams.ACTUAL_OUTPUT,
22
- ],
23
- )
24
-
25
- @property
26
- def __name__(self):
27
- return "Summarization"