judgeval 0.0.52__py3-none-any.whl → 0.0.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. judgeval/common/logger.py +46 -199
  2. judgeval/common/s3_storage.py +2 -6
  3. judgeval/common/tracer.py +182 -262
  4. judgeval/common/utils.py +16 -36
  5. judgeval/constants.py +14 -20
  6. judgeval/data/__init__.py +0 -2
  7. judgeval/data/datasets/dataset.py +6 -10
  8. judgeval/data/datasets/eval_dataset_client.py +25 -27
  9. judgeval/data/example.py +5 -138
  10. judgeval/data/judgment_types.py +214 -0
  11. judgeval/data/result.py +7 -25
  12. judgeval/data/scorer_data.py +28 -40
  13. judgeval/data/scripts/fix_default_factory.py +23 -0
  14. judgeval/data/scripts/openapi_transform.py +123 -0
  15. judgeval/data/tool.py +3 -54
  16. judgeval/data/trace.py +31 -50
  17. judgeval/data/trace_run.py +3 -3
  18. judgeval/evaluation_run.py +16 -23
  19. judgeval/integrations/langgraph.py +11 -12
  20. judgeval/judges/litellm_judge.py +3 -6
  21. judgeval/judges/mixture_of_judges.py +8 -25
  22. judgeval/judges/together_judge.py +3 -6
  23. judgeval/judgment_client.py +22 -24
  24. judgeval/rules.py +7 -19
  25. judgeval/run_evaluation.py +79 -242
  26. judgeval/scorers/__init__.py +4 -20
  27. judgeval/scorers/agent_scorer.py +21 -0
  28. judgeval/scorers/api_scorer.py +28 -38
  29. judgeval/scorers/base_scorer.py +98 -0
  30. judgeval/scorers/example_scorer.py +19 -0
  31. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
  32. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
  34. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
  35. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
  36. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
  37. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
  38. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
  40. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
  41. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
  42. judgeval/scorers/score.py +45 -330
  43. judgeval/scorers/utils.py +6 -88
  44. judgeval/utils/file_utils.py +4 -6
  45. judgeval/version_check.py +3 -2
  46. {judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/METADATA +6 -5
  47. judgeval-0.0.54.dist-info/RECORD +65 -0
  48. judgeval/data/custom_example.py +0 -19
  49. judgeval/scorers/judgeval_scorer.py +0 -177
  50. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
  51. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
  52. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
  53. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
  54. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
  55. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
  56. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
  57. judgeval/scorers/prompt_scorer.py +0 -296
  58. judgeval-0.0.52.dist-info/RECORD +0 -69
  59. {judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/WHEEL +0 -0
  60. {judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,296 +0,0 @@
1
- """
2
- Code that implements a prompt-based scorer for evaluating examples.
3
-
4
- The PromptScorer class is a base class that can be used to create custom scoring metrics using LLM prompts.
5
- To implement a subclass of PromptScorer, you need to implement the following methods:
6
- - build_measure_prompt(): builds the conversation prompt that is sent to the LLM judge
7
- - build_schema(): defines the expected response schema from the LLM
8
- - process_response(): parses the response from the LLM judge
9
- - success_check(): determines whether the evaluation was successful
10
-
11
- The core idea of PromptScorer is to provide a flexible way to create custom scoring metrics
12
- by leveraging LLM judges to evaluate examples. The scorer constructs a prompt, sends it to
13
- the judge, and parses the structured response to determine a score.
14
-
15
- For example, the SentimentScorer subclass uses PromptScorer to detect negative sentiment in responses
16
- by prompting an LLM to rate the negativity on a 1-5 scale and provide a reason for the rating.
17
-
18
- The PromptScorer supports both synchronous and asynchronous evaluation modes, includes optional
19
- reason fields in responses, and can operate in strict mode with higher thresholds.
20
-
21
- NOTE: When implementing build_measure_prompt and build_schema:
22
- - The prompt should guide the LLM to generate a response matching your schema
23
- - The schema should include "score" and optionally "reason" fields
24
- - The score field type and range should match your scoring criteria
25
- - The reason field provides explanatory context for the score
26
- """
27
-
28
- from abc import abstractmethod
29
- from typing import List, Optional, Tuple, Any
30
- from pydantic import BaseModel, Field
31
-
32
- from judgeval.data import Example
33
- from judgeval.data.example import ExampleParams
34
- from judgeval.scorers import JudgevalScorer
35
- from judgeval.scorers.utils import (
36
- scorer_progress_meter,
37
- parse_response_json,
38
- get_or_create_event_loop,
39
- create_verbose_logs,
40
- )
41
- from judgeval.judges import JudgevalJudge
42
-
43
-
44
- class ReasonScore(BaseModel):
45
- reason: str
46
- score: float
47
-
48
-
49
- class PromptScorer(JudgevalScorer, BaseModel):
50
- name: str
51
- score_type: str
52
- threshold: float = Field(default=0.5)
53
- using_native_model: bool = Field(default=True)
54
- model: Optional[JudgevalJudge] = Field(default=None)
55
- skipped: bool = Field(default=False)
56
- # DO NOT SET THESE FIELDS MANUALLY, THEY ARE SET BY THE SCORE_EXAMPLE METHOD
57
- _response: Optional[dict] = None
58
- _result: Optional[float] = None
59
-
60
- def __init__(
61
- self,
62
- name: str,
63
- threshold: float = 0.5,
64
- include_reason: bool = True,
65
- async_mode: bool = True,
66
- strict_mode: bool = False,
67
- verbose_mode: bool = False,
68
- required_params: Optional[List[ExampleParams]] = None,
69
- ):
70
- # Initialize BaseModel first
71
- BaseModel.__init__(
72
- self,
73
- name=name,
74
- score_type=name,
75
- threshold=1 if strict_mode else threshold,
76
- include_reason=include_reason,
77
- async_mode=async_mode,
78
- strict_mode=strict_mode,
79
- verbose_mode=verbose_mode,
80
- )
81
- # Then initialize JudgevalScorer
82
- JudgevalScorer.__init__(
83
- self,
84
- score_type=name,
85
- threshold=1 if strict_mode else threshold,
86
- include_reason=include_reason,
87
- async_mode=async_mode,
88
- strict_mode=strict_mode,
89
- verbose_mode=verbose_mode,
90
- required_params=required_params,
91
- )
92
-
93
- def score_example(
94
- self, example: Example, _show_indicator: bool = True
95
- ) -> float | None:
96
- """
97
- Synchronous method for scoring an example using the prompt criteria.
98
- """
99
- with scorer_progress_meter(self, display_meter=_show_indicator):
100
- if self.async_mode:
101
- loop = get_or_create_event_loop()
102
- loop.run_until_complete(
103
- self.a_score_example(example, _show_indicator=False)
104
- )
105
- return self._result
106
- else:
107
- result, reason = self.evaluate(example)
108
- self.reason = reason
109
- self._result = result
110
- self.verbose_logs = create_verbose_logs(
111
- self,
112
- steps=[
113
- f"Results: {self._result}\nReason: {self.reason}",
114
- ],
115
- )
116
- return result
117
-
118
- async def a_score_example(
119
- self,
120
- example: Example,
121
- _show_indicator: bool = True,
122
- ) -> float:
123
- """
124
- Async method for scoring an example using the prompt criteria.
125
- """
126
- with scorer_progress_meter(self, display_meter=_show_indicator):
127
- result, reason = await self.a_evaluate(example)
128
- self.reason = reason
129
- self._result = result
130
- self.verbose_logs = create_verbose_logs(
131
- self,
132
- steps=[
133
- f"Results: {self._result}\nReason: {self.reason}",
134
- ],
135
- )
136
- return result
137
-
138
- def evaluate(self, example: Example) -> Tuple[Any, str]:
139
- """
140
- Synchronous helper method for evaluating an example using the prompt criteria.
141
-
142
- Builds a custom prompt using `build_measure_prompt` and sends it to the judge model
143
- for evaluation. The result is then parsed as JSON and returned.
144
-
145
- NOTE: It is assumed that the model response will be JSON and contain a "score" and "reason" field.
146
- """
147
- prompt = self._build_measure_prompt(example)
148
- if self.using_native_model and self.model:
149
- res = self.model.generate(prompt)
150
- response = parse_response_json(res, self)
151
- result, reason = self._process_response(response)
152
- return result, reason
153
- else:
154
- raise NotImplementedError(
155
- "Non-native judge models are not supported in synchronous mode yet."
156
- )
157
-
158
- async def a_evaluate(self, example: Example) -> Tuple[Any, str]:
159
- """
160
- Asynchronous helper method for evaluating an example using the prompt criteria.
161
-
162
- Builds a custom prompt using `build_measure_prompt` and sends it to the judge model
163
- for evaluation. The result is then parsed as JSON and returned.
164
-
165
- NOTE: It is assumed that the model response will be JSON and contain a "score" and "reason" field.
166
- """
167
- judge_prompt = self._build_measure_prompt(example)
168
- schema = self._build_schema()
169
- prompt = self._enforce_prompt_format(judge_prompt=judge_prompt, schema=schema)
170
- if self.using_native_model and self.model:
171
- res = await self.model.a_generate(prompt)
172
- response = parse_response_json(res, self)
173
- self._response = response
174
-
175
- result, reason = self._process_response(response)
176
- self.score = result
177
- self.reason = reason
178
- self._response = response
179
- return result, reason
180
- else:
181
- raise NotImplementedError(
182
- "Non-native judge models are not supported in async mode yet."
183
- )
184
-
185
- # TODO: can we make this take *args and **kwargs? How does that work with a_evaluate() since we'd have to pass the same args
186
- @abstractmethod
187
- def _build_measure_prompt(self, example: Example) -> List[dict]:
188
- # builds the prompt that is sent to the model inside of the `score_example()` method
189
- # returns either a string prompt or a conversation prompt of the form [{"role": "system", "content": "..."}, ...]
190
-
191
- """
192
- This function creates the prompt that the judge model uses to evaluate examples.
193
-
194
- The prompt is typically a set of instructions that the judge model uses to evaluate the example.
195
-
196
- This function returns a conversation prompt of the form
197
- [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]
198
-
199
- A basic version of implementing this function could be as follows:
200
- SYSTEM_ROLE = ...
201
- return [
202
- {"role": "system", "content": SYSTEM_ROLE},
203
- {"role": "user", "content": f"Response: {example.actual_output}\n\nYour judgment: "}
204
- ]
205
- """
206
- pass
207
-
208
- # TODO: does this need to take *args and **kwargs? How does that work with a_evaluate() since we'd have to pass the same args
209
- @abstractmethod
210
- def _build_schema(self) -> dict:
211
- """
212
- This function returns a dictionary that represents the schema of the JSON response that the judge model should return.
213
-
214
- The keys of the dictionary are the expected keys in the response, and the values are the types of the corresponding values.
215
-
216
- Example: If you want to have the judge model return a score and a reason, you would write:
217
- return {"score": int, "reason": str}
218
- """
219
- pass
220
-
221
- def _enforce_prompt_format(self, judge_prompt: List[dict], schema: dict):
222
- """
223
- Formats the final prompt to the judge model.
224
-
225
- This function takes a list of dictionaries (`judge_prompt`) and a schema dictionary (`schema`),
226
- and appends a schema enforcement prompt to the content of the first dictionary in the list, which is assumed to be the system prompt.
227
- The schema enforcement prompt instructs the judge model to provide its response in a specific JSON format.
228
-
229
- Args:
230
- judge_prompt (List[dict]): A list of dictionaries representing the judge prompt.
231
- Each dictionary should contain a "content" key.
232
- schema (dict): A dictionary representing the schema. The keys are the expected keys in the response,
233
- and the values are the types of the corresponding values.
234
-
235
- Returns:
236
- List[dict]: The modified judge prompt with the schema enforcement prompt appended to the content
237
- of the first dictionary.
238
-
239
- Raises:
240
- TypeError: If `judge_prompt` is not a list of dictionaries.
241
-
242
- Example:
243
- judge_prompt = [{"content": "Please evaluate the following:"}]
244
- schema = {"score": int, "comments": str}
245
- formatted_prompt = format_measure_prompt(judge_prompt, schema)
246
- # formatted_prompt[0]["content"] will include the schema enforcement prompt
247
- """
248
- SCHEMA_ENFORCEMENT_PROMPT = (
249
- "\n\nPlease provide your response in the following JSON format: {"
250
- )
251
- if isinstance(judge_prompt, list) and all(
252
- isinstance(item, dict) for item in judge_prompt
253
- ):
254
- # create formatting string for schema enforcement
255
- # schema is a map between key and type of the value
256
- for key, key_type in schema.items():
257
- SCHEMA_ENFORCEMENT_PROMPT += f'"{key}": <{key}> ({key_type.__name__}), '
258
- SCHEMA_ENFORCEMENT_PROMPT = (
259
- SCHEMA_ENFORCEMENT_PROMPT[:-2] + "}"
260
- ) # remove trailing comma and space
261
- judge_prompt[0]["content"] += SCHEMA_ENFORCEMENT_PROMPT
262
- return judge_prompt
263
- else:
264
- raise TypeError(
265
- f"Prompt must be a list of dictionaries. Got {type(judge_prompt)} instead."
266
- )
267
-
268
- @abstractmethod
269
- def _process_response(self, response: dict):
270
- """
271
- Customizable method for processing the response from the judge model.
272
-
273
- You can add any additional logic to parse the JSON response here and return the result and reason for decision.
274
-
275
- If you don't need a reason for the decision, you can simply return (score, None).
276
-
277
- Example:
278
- score = response["score"]
279
- reason = response["reason"]
280
- return score, reason
281
- """
282
- pass
283
-
284
- @abstractmethod
285
- def _success_check(self, **kwargs) -> bool:
286
- """
287
- Determines whether or not the PromptScorer should consider the evaluation of a single example successful.
288
- """
289
- pass
290
-
291
- @property
292
- def __name__(self):
293
- return self.name
294
-
295
- class Config:
296
- arbitrary_types_allowed = True
@@ -1,69 +0,0 @@
1
- judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
2
- judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
3
- judgeval/constants.py,sha256=IwW428u2VxThczHiL6ZnRwrIzb6QwOE4kdKonktVFYA,6032
4
- judgeval/evaluation_run.py,sha256=9fYFWJ2ZXtnNcRqxLjzKkZHAba2xi_f1uzOXDJ37Pgw,3233
5
- judgeval/judgment_client.py,sha256=RGqjw6Q50DOaTPa5SfCzSSGjsm7zlkZ6N7LOvewCxVU,21510
6
- judgeval/rules.py,sha256=TKI1K_Wlo3GDoSCztGcDoTioVKpvfG6zVkONyou8v5c,20465
7
- judgeval/run_evaluation.py,sha256=JohxsU5EajwPgBhBGt_wTrNSGdVIbSJmMAR5ffCSg7c,51478
8
- judgeval/version_check.py,sha256=FlKE8AQGwu50d3kdWSiBZYVW9sicnFInCZjakKt37w4,1003
9
- judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
10
- judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
11
- judgeval/common/logger.py,sha256=_nNV4waaMB4NkjwAG0kYZ3cfBe19BY6b2vsCdKd2YR4,6112
12
- judgeval/common/s3_storage.py,sha256=ukylTrBZ2QuT8BGbOY7D738RvHFAzVaPwmuWQ4R5xkE,3986
13
- judgeval/common/tracer.py,sha256=7vvPY632z4ExsqIuNRjfpJfa6CpJKohz8kvBiSwbjFE,129624
14
- judgeval/common/utils.py,sha256=p8C_BM0nNcIiVHTBH4BqsR106RNUlZ9wM0SxWY4IozE,35543
15
- judgeval/data/__init__.py,sha256=Nuy_F6oll5c5qLOF2gGFWFYyXeOgXSh7R4Vm2kMiXDM,531
16
- judgeval/data/custom_example.py,sha256=o4baSEeyNhS-k9PiOJdN4NfBFBGJMvhnU5RBvVRFRd8,734
17
- judgeval/data/example.py,sha256=8wsqBJ98Nw7IaVNXZmUoz3UuQUWkBbnHI6t4_1pqmr8,7234
18
- judgeval/data/result.py,sha256=4TfBPukRpF2iaF14zEU1RP-wHxsPWrX8PaXYnhxN8MM,3132
19
- judgeval/data/scorer_data.py,sha256=FnePIXS-4oNqrM2Eo97-hL3g3ZKFIvEKLdkl0CnpHEI,3283
20
- judgeval/data/tool.py,sha256=QMYJO8kyhGum8iiXxZZ_9pGcxcqp7Fjp0R0sh6i_9rU,1915
21
- judgeval/data/trace.py,sha256=tn1ctv99UI_vG_1UmFlzvt2L20mviUSwbvVs8ow8X-o,5797
22
- judgeval/data/trace_run.py,sha256=NMUkf5bxMW_jWXxZ-JI8-gOKSASldS7oAMH4MH4oSYE,1841
23
- judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
24
- judgeval/data/datasets/dataset.py,sha256=VDHQpOUoWUfaPmCeolKP-hhSzQcCHq1muRg3EtLRpf0,12933
25
- judgeval/data/datasets/eval_dataset_client.py,sha256=93Pxb3aCgDwvi263N0CgugApIwKbHbPSfuz7j0IhHSY,12880
26
- judgeval/integrations/langgraph.py,sha256=3fKMOhAjuDdH_q3F9OlW2T_fx_vzBg2Sz4WP4WFvBuw,35909
27
- judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
28
- judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
29
- judgeval/judges/litellm_judge.py,sha256=pHKdNkhdBMlrok3ZMTWaomGX6DKFXYV9zHqvCL7_2jo,2653
30
- judgeval/judges/mixture_of_judges.py,sha256=jcE3H47bVMdqzYRuxa0LD8wudF1kxkRujEpbVV-rkcM,15913
31
- judgeval/judges/together_judge.py,sha256=DZKlsij2ikmDiYbLZKWm8oqDRNNuvCBiGM0JcycwqWM,2424
32
- judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
33
- judgeval/scorers/__init__.py,sha256=VKPveyGCv5Rc0YtuT7iAxSv-M5EuikqAVeaGNnYMuWE,1340
34
- judgeval/scorers/api_scorer.py,sha256=2LNqcwIMerb37WooGD-hw5WIVLcTXnxWxzwZ0h9CXq0,2795
35
- judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
36
- judgeval/scorers/judgeval_scorer.py,sha256=VoiAQdJzgoiVyFYS9gLEGtQwfQY6tUBoWBBDyGBfo-Q,7321
37
- judgeval/scorers/prompt_scorer.py,sha256=w0tW76J956smL4D8PsOHswjwYFb8W08E_0E9ad5_aQ8,12124
38
- judgeval/scorers/score.py,sha256=_mKQuoZHEqrF9PaydPtzWN3zjE6PeKYETw_1UryzJ3s,19399
39
- judgeval/scorers/utils.py,sha256=UKssYyqsJ_hckeqa1aGcXLLxiScRDzYilyuT1RqkVyo,6853
40
- judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mmGIBCWN2WByjSUn9o5-xmHV2W-fDNyRofNsEpSuqyQ,2248
42
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=xY7vY4uIfncEiCksGu5SFT8dUjzkY9suNgyvipQ1avU,712
43
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=t2ClO5nL6rM_atKV9YFgOCrQEPI_KjNqs1tyF3WqQig,659
44
- judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py,sha256=USeIQ1nozvQhMIrRLpST3nqNOekOFW5XJG4NSP7w0RI,4430
45
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=H4K_NIMabYd_OPlMz3CNNMIM3vYk7PunTXygMnyp6sc,1240
46
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=QldMhW7k16jPPiHQAeLH-2VilPTuNHVi6OMsWvWnycE,771
47
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=GDxEljGD4E-8j6t9DpV2cve0gcKZiUYHn2bfyXChbu0,759
48
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=4E6Sa1aaI1k9PvA8afzNwIdrBCxv4UOqMtmfnLlWeWs,826
49
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=jiKi8EfwP_yuOwHhYStbIUQIn2LPwJEbkh8PQeOoDTs,475
50
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=guG37tQm8m4Gs1bwYS1eaNau-RJYwteb1hwYQ0YMIbk,1357
51
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=6iK6Da0FWoyDe_OH7UMnc4gpnByNqfIx6BW8nUbvlC0,693
52
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=RrGgBMgwVPpxb9cHm-yXQBgoh6CHUm_GkFYGSp-KcUc,693
53
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=VbvEEawOZ1XA3SWS986cbR2m3Clyliv21nzHe9GrQxo,687
54
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=nk4_lpq2eIe6v8GtBm2g6O1CLCg5sP7-wspye6qNuXE,679
55
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=9gKX32g9dAvFdHXzQmR-CFabBPLIZHu8aCnICK3t7j8,1066
56
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=Wz5wtpqeXMdK8oRXRKnWqow4s1mmqGFQqHK42wo6cNQ,648
57
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=wzgprwQ3hcsc9itHG0DkcXyNnvVVd-s1UpNyZxw49Sw,590
58
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=462fR2m-67FR2TdHu6cCNZLRkIT_yTAOrMeb-1AuQe8,576
59
- judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
60
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
61
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=gloLzThkFsr8sHQargDAH8XaDrlF6OCuc_69hyNslFU,2589
62
- judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
63
- judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
64
- judgeval/utils/file_utils.py,sha256=M6a_BPRGMwEFBPdF_Tbcbbk4YldHcOhuoU9oRlmninE,1858
65
- judgeval/utils/requests.py,sha256=rbmZTaiyWI8t2YUkhk11SIe3dF7j2j25L1BuFp_1PII,770
66
- judgeval-0.0.52.dist-info/METADATA,sha256=bEoAZvedhy0uEclNseDaiC37z8ST_zKg82QTtQ5EV98,54719
67
- judgeval-0.0.52.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
- judgeval-0.0.52.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
69
- judgeval-0.0.52.dist-info/RECORD,,