judgeval 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +0 -71
- judgeval/common/tracer.py +57 -31
- judgeval/constants.py +1 -0
- judgeval/data/__init__.py +2 -1
- judgeval/data/scorer_data.py +2 -2
- judgeval/evaluation_run.py +16 -15
- judgeval/judges/__init__.py +2 -2
- judgeval/judges/base_judge.py +1 -1
- judgeval/judges/litellm_judge.py +2 -2
- judgeval/judges/mixture_of_judges.py +2 -2
- judgeval/judges/together_judge.py +2 -2
- judgeval/judges/utils.py +4 -4
- judgeval/judgment_client.py +67 -15
- judgeval/run_evaluation.py +79 -14
- judgeval/scorers/__init__.py +8 -4
- judgeval/scorers/api_scorer.py +64 -0
- judgeval/scorers/base_scorer.py +3 -2
- judgeval/scorers/exceptions.py +11 -0
- judgeval/scorers/{custom_scorer.py → judgeval_scorer.py} +9 -5
- judgeval/scorers/judgeval_scorers/__init__.py +132 -9
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +23 -0
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +19 -0
- judgeval/scorers/judgeval_scorers/{answer_relevancy.py → api_scorers/answer_relevancy.py} +2 -2
- judgeval/scorers/judgeval_scorers/{contextual_precision.py → api_scorers/contextual_precision.py} +2 -2
- judgeval/scorers/judgeval_scorers/{contextual_recall.py → api_scorers/contextual_recall.py} +2 -2
- judgeval/scorers/judgeval_scorers/{contextual_relevancy.py → api_scorers/contextual_relevancy.py} +2 -2
- judgeval/scorers/judgeval_scorers/{faithfulness.py → api_scorers/faithfulness.py} +2 -2
- judgeval/scorers/judgeval_scorers/{hallucination.py → api_scorers/hallucination.py} +2 -2
- judgeval/scorers/judgeval_scorers/{json_correctness.py → api_scorers/json_correctness.py} +7 -7
- judgeval/scorers/judgeval_scorers/{summarization.py → api_scorers/summarization.py} +2 -2
- judgeval/scorers/judgeval_scorers/{tool_correctness.py → api_scorers/tool_correctness.py} +2 -2
- judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +24 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +4 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +272 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +169 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +4 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +292 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +174 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +259 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +106 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +249 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +142 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +240 -0
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +121 -0
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +318 -0
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +265 -0
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +258 -0
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +104 -0
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +127 -0
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +247 -0
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +541 -0
- judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +3 -0
- judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +151 -0
- judgeval/scorers/prompt_scorer.py +4 -4
- judgeval/scorers/score.py +14 -14
- judgeval/scorers/utils.py +40 -6
- {judgeval-0.0.3.dist-info → judgeval-0.0.4.dist-info}/METADATA +1 -1
- judgeval-0.0.4.dist-info/RECORD +78 -0
- judgeval-0.0.3.dist-info/RECORD +0 -46
- {judgeval-0.0.3.dist-info → judgeval-0.0.4.dist-info}/WHEEL +0 -0
- {judgeval-0.0.3.dist-info → judgeval-0.0.4.dist-info}/licenses/LICENSE.md +0 -0
@@ -30,7 +30,7 @@ from typing import List, Optional, Union, Tuple, Any, Mapping
|
|
30
30
|
from pydantic import BaseModel, model_serializer, Field
|
31
31
|
|
32
32
|
from judgeval.data import Example
|
33
|
-
from judgeval.scorers import
|
33
|
+
from judgeval.scorers import JudgevalScorer
|
34
34
|
from judgeval.scorers.utils import (scorer_progress_meter,
|
35
35
|
parse_response_json,
|
36
36
|
get_or_create_event_loop,
|
@@ -42,7 +42,7 @@ class ReasonScore(BaseModel):
|
|
42
42
|
score: float
|
43
43
|
|
44
44
|
|
45
|
-
class PromptScorer(
|
45
|
+
class PromptScorer(JudgevalScorer, BaseModel):
|
46
46
|
name: str
|
47
47
|
score_type: str
|
48
48
|
threshold: float = Field(default=0.5)
|
@@ -73,7 +73,7 @@ class PromptScorer(CustomScorer, BaseModel):
|
|
73
73
|
verbose_mode=verbose_mode,
|
74
74
|
)
|
75
75
|
# Then initialize CustomScorer
|
76
|
-
|
76
|
+
JudgevalScorer.__init__(
|
77
77
|
self,
|
78
78
|
score_type=name,
|
79
79
|
threshold=1 if strict_mode else threshold,
|
@@ -310,7 +310,7 @@ class ClassifierScorer(PromptScorer):
|
|
310
310
|
verbose_mode=verbose_mode,
|
311
311
|
)
|
312
312
|
# Then initialize CustomScorer
|
313
|
-
|
313
|
+
JudgevalScorer.__init__(
|
314
314
|
self,
|
315
315
|
score_type=name,
|
316
316
|
threshold=threshold,
|
judgeval/scorers/score.py
CHANGED
@@ -16,14 +16,14 @@ from judgeval.data import (
|
|
16
16
|
create_process_example,
|
17
17
|
create_scorer_data,
|
18
18
|
)
|
19
|
-
from judgeval.scorers import
|
19
|
+
from judgeval.scorers import JudgevalScorer
|
20
20
|
from judgeval.scorers.utils import clone_scorers, scorer_console_msg
|
21
21
|
from judgeval.common.exceptions import MissingTestCaseParamsError
|
22
22
|
from judgeval.common.logger import example_logging_context, debug, error, warning, info
|
23
|
-
from judgeval.judges import
|
23
|
+
from judgeval.judges import JudgevalJudge
|
24
24
|
|
25
25
|
async def safe_a_score_example(
|
26
|
-
scorer:
|
26
|
+
scorer: JudgevalScorer,
|
27
27
|
example: Example,
|
28
28
|
ignore_errors: bool,
|
29
29
|
skip_on_missing_params: bool,
|
@@ -96,7 +96,7 @@ async def safe_a_score_example(
|
|
96
96
|
async def score_task(
|
97
97
|
task_id: int,
|
98
98
|
progress: Progress,
|
99
|
-
scorer:
|
99
|
+
scorer: JudgevalScorer,
|
100
100
|
example: Example,
|
101
101
|
ignore_errors: bool = True,
|
102
102
|
skip_on_missing_params: bool = True,
|
@@ -182,7 +182,7 @@ async def score_task(
|
|
182
182
|
|
183
183
|
|
184
184
|
async def score_with_indicator(
|
185
|
-
scorers: List[
|
185
|
+
scorers: List[JudgevalScorer],
|
186
186
|
example: Example,
|
187
187
|
ignore_errors: bool,
|
188
188
|
skip_on_missing_params: bool,
|
@@ -242,8 +242,8 @@ async def score_with_indicator(
|
|
242
242
|
|
243
243
|
async def a_execute_scoring(
|
244
244
|
examples: List[Example],
|
245
|
-
scorers: List[
|
246
|
-
model: Optional[Union[str, List[str],
|
245
|
+
scorers: List[JudgevalScorer],
|
246
|
+
model: Optional[Union[str, List[str], JudgevalJudge]] = None,
|
247
247
|
ignore_errors: bool = True,
|
248
248
|
skip_on_missing_params: bool = True,
|
249
249
|
show_indicator: bool = True,
|
@@ -256,9 +256,10 @@ async def a_execute_scoring(
|
|
256
256
|
Executes evaluations of `Example`s asynchronously using one or more `CustomScorer`s.
|
257
257
|
Each `Example` will be evaluated by all of the `CustomScorer`s in the `scorers` list.
|
258
258
|
|
259
|
+
Args:
|
259
260
|
examples (List[Example]): A list of `Example` objects to be evaluated.
|
260
|
-
scorers (List[
|
261
|
-
model (Union[str, List[str],
|
261
|
+
scorers (List[JudgevalScorer]): A list of `JudgevalScorer` objects to evaluate the examples.
|
262
|
+
model (Union[str, List[str], JudgevalJudge]): The model to use for evaluation.
|
262
263
|
ignore_errors (bool): Whether to ignore errors during evaluation.
|
263
264
|
skip_on_missing_params (bool): Whether to skip evaluation if parameters are missing.
|
264
265
|
show_indicator (bool): Whether to show a progress indicator.
|
@@ -316,8 +317,8 @@ async def a_execute_scoring(
|
|
316
317
|
if len(scorers) == 0:
|
317
318
|
pbar.update(1)
|
318
319
|
continue
|
319
|
-
|
320
|
-
cloned_scorers: List[
|
320
|
+
|
321
|
+
cloned_scorers: List[JudgevalScorer] = clone_scorers(
|
321
322
|
scorers
|
322
323
|
)
|
323
324
|
task = execute_with_semaphore(
|
@@ -342,7 +343,7 @@ async def a_execute_scoring(
|
|
342
343
|
if len(scorers) == 0:
|
343
344
|
continue
|
344
345
|
|
345
|
-
cloned_scorers: List[
|
346
|
+
cloned_scorers: List[JudgevalScorer] = clone_scorers(
|
346
347
|
scorers
|
347
348
|
)
|
348
349
|
task = execute_with_semaphore(
|
@@ -364,7 +365,7 @@ async def a_execute_scoring(
|
|
364
365
|
|
365
366
|
|
366
367
|
async def a_eval_examples_helper(
|
367
|
-
scorers: List[
|
368
|
+
scorers: List[JudgevalScorer],
|
368
369
|
example: Example,
|
369
370
|
scoring_results: List[ScoringResult],
|
370
371
|
score_index: int,
|
@@ -413,7 +414,6 @@ async def a_eval_examples_helper(
|
|
413
414
|
# At this point, the scorer has been executed and already contains data.
|
414
415
|
if getattr(scorer, 'skipped', False):
|
415
416
|
continue
|
416
|
-
|
417
417
|
scorer_data = create_scorer_data(scorer) # Fetch scorer data from completed scorer evaluation
|
418
418
|
process_example.update_scorer_data(scorer_data) # Update process example with the same scorer data
|
419
419
|
|
judgeval/scorers/utils.py
CHANGED
@@ -13,10 +13,12 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
13
13
|
from rich.console import Console
|
14
14
|
from typing import List, Optional, Any
|
15
15
|
|
16
|
-
from judgeval.scorers import
|
16
|
+
from judgeval.scorers import JudgevalScorer
|
17
|
+
from judgeval.data import Example, ExampleParams
|
18
|
+
from judgeval.scorers.exceptions import MissingExampleParamsError
|
17
19
|
|
18
20
|
|
19
|
-
def clone_scorers(scorers: List[
|
21
|
+
def clone_scorers(scorers: List[JudgevalScorer]) -> List[JudgevalScorer]:
|
20
22
|
"""
|
21
23
|
Creates duplicates of the scorers passed as argument.
|
22
24
|
"""
|
@@ -38,7 +40,7 @@ def clone_scorers(scorers: List[CustomScorer]) -> List[CustomScorer]:
|
|
38
40
|
|
39
41
|
|
40
42
|
def scorer_console_msg(
|
41
|
-
scorer:
|
43
|
+
scorer: JudgevalScorer,
|
42
44
|
async_mode: Optional[bool] = None,
|
43
45
|
):
|
44
46
|
"""
|
@@ -55,7 +57,7 @@ def scorer_console_msg(
|
|
55
57
|
|
56
58
|
@contextmanager
|
57
59
|
def scorer_progress_meter(
|
58
|
-
scorer:
|
60
|
+
scorer: JudgevalScorer,
|
59
61
|
async_mode: Optional[bool] = None,
|
60
62
|
display_meter: bool = True,
|
61
63
|
total: int = 100,
|
@@ -81,7 +83,7 @@ def scorer_progress_meter(
|
|
81
83
|
yield
|
82
84
|
|
83
85
|
|
84
|
-
def parse_response_json(llm_response: str, scorer: Optional[
|
86
|
+
def parse_response_json(llm_response: str, scorer: Optional[JudgevalScorer] = None) -> dict:
|
85
87
|
"""
|
86
88
|
Extracts JSON output from an LLM response and returns it as a dictionary.
|
87
89
|
|
@@ -122,7 +124,7 @@ def print_verbose_logs(metric: str, logs: str):
|
|
122
124
|
print("=" * 70)
|
123
125
|
|
124
126
|
|
125
|
-
def create_verbose_logs(metric:
|
127
|
+
def create_verbose_logs(metric: JudgevalScorer, steps: List[str]) -> str:
|
126
128
|
"""
|
127
129
|
Creates verbose logs for a scorer object.
|
128
130
|
|
@@ -173,3 +175,35 @@ def get_or_create_event_loop() -> asyncio.AbstractEventLoop:
|
|
173
175
|
loop = asyncio.new_event_loop()
|
174
176
|
asyncio.set_event_loop(loop)
|
175
177
|
return loop
|
178
|
+
|
179
|
+
|
180
|
+
def check_example_params(
|
181
|
+
example: Example,
|
182
|
+
example_params: List[ExampleParams],
|
183
|
+
scorer: JudgevalScorer,
|
184
|
+
):
|
185
|
+
if isinstance(example, Example) is False:
|
186
|
+
error_str = f"in check_example_params(): Expected example to be of type 'Example', but got {type(example)}"
|
187
|
+
scorer.error = error_str
|
188
|
+
raise MissingExampleParamsError(error_str)
|
189
|
+
|
190
|
+
missing_params = []
|
191
|
+
for param in example_params:
|
192
|
+
if getattr(example, param.value) is None:
|
193
|
+
missing_params.append(f"'{param.value}'")
|
194
|
+
|
195
|
+
if missing_params:
|
196
|
+
if len(missing_params) == 1:
|
197
|
+
missing_params_str = missing_params[0]
|
198
|
+
elif len(missing_params) == 2:
|
199
|
+
missing_params_str = " and ".join(missing_params)
|
200
|
+
else:
|
201
|
+
missing_params_str = (
|
202
|
+
", ".join(missing_params[:-1]) + ", and " + missing_params[-1]
|
203
|
+
)
|
204
|
+
|
205
|
+
error_str = f"{missing_params_str} fields in example cannot be None for the '{scorer.__name__}' scorer"
|
206
|
+
scorer.error = error_str
|
207
|
+
raise MissingExampleParamsError(error_str)
|
208
|
+
|
209
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
|
2
|
+
judgeval/clients.py,sha256=boWW-nA7Yqt2zBflMxl2NpdUIFSSKebv3rJiG50K1s4,594
|
3
|
+
judgeval/constants.py,sha256=5O1jWvxMCRyMSWhmkrvPqfBctx42c7kMtgTS7ORVcFw,1965
|
4
|
+
judgeval/evaluation_run.py,sha256=KcIS7mDR_9XEdqYrJXFcrLz5IDMof34HcD5VtjZgV8w,5884
|
5
|
+
judgeval/judgment_client.py,sha256=8Z4Woiv56qphYqlMI3bNy4rvQItZl_z9vNNd3UdrCes,11241
|
6
|
+
judgeval/run_evaluation.py,sha256=A9jjtWPH2_5W43a1f98R8u-8PuVczoJZNCZIyCoRqi8,18918
|
7
|
+
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
8
|
+
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
9
|
+
judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
|
10
|
+
judgeval/common/tracer.py,sha256=JWUmsjxs2N6Cu5nol7vRbwWKFRLHJlwCnHWgg3W17GM,23812
|
11
|
+
judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
|
12
|
+
judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
|
13
|
+
judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
|
14
|
+
judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
|
15
|
+
judgeval/data/result.py,sha256=CVp_mZrBbKjIH9rPB6rg7T2jY1jUy7JVyI7_kUbRC7w,3490
|
16
|
+
judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
|
17
|
+
judgeval/data/datasets/__init__.py,sha256=Xh6TSsCcEsJeYjjubfeGa3WU8YQfuwKXH3jR9EeDFgg,171
|
18
|
+
judgeval/data/datasets/dataset.py,sha256=9GGspdKDhMw2dJAS7ZvOZHSoNGwMzCtgnFYDe6y4yog,16484
|
19
|
+
judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
|
20
|
+
judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
|
21
|
+
judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
|
22
|
+
judgeval/judges/base_judge.py,sha256=qhYSFxE21WajYNaT4X-qwWGtpo_tqzBzdqbszSheSD8,1000
|
23
|
+
judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
|
24
|
+
judgeval/judges/mixture_of_judges.py,sha256=OuGWCuXyqe7s_Y74ij90TJFRfHU-VAFyJVVrwBM0RO0,15532
|
25
|
+
judgeval/judges/together_judge.py,sha256=x3jf-tq77QPXHeeoF739f69hE_0VceXD9FHLrVFdGVA,2275
|
26
|
+
judgeval/judges/utils.py,sha256=YUvivcGV1OKLPMJ9N6aTvhA0r_zzJ2NXriPguiiaVaY,2110
|
27
|
+
judgeval/scorers/__init__.py,sha256=3rq2VtszrJk9gZ3oAMVd7EGlSugr8aRlHWprMDgQPaQ,956
|
28
|
+
judgeval/scorers/api_scorer.py,sha256=88kCWr6IetLFn3ziTPG-lwDWvMhFUC6xfINU1MJBoho,2125
|
29
|
+
judgeval/scorers/base_scorer.py,sha256=mbOReG88fWaqCnC8F0u5QepRlzgVkuOz89KEKYxrmMc,1794
|
30
|
+
judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
|
31
|
+
judgeval/scorers/judgeval_scorer.py,sha256=qpjyj7JZEgxoF8LR3pwkKeebmVC8qlITnKFN4YOUKXc,6165
|
32
|
+
judgeval/scorers/prompt_scorer.py,sha256=bUv8eZNy1XGVM1gNMt33dgIVX6zj63bGAV6O0o0c7yg,17821
|
33
|
+
judgeval/scorers/score.py,sha256=zJKG21h9Njyj2vS36CAFK2wlbOcHSKgrLgHV5_25KKw,18630
|
34
|
+
judgeval/scorers/utils.py,sha256=dtueaJm8e3Ph3wj1vC-srzadgK_CoIlOefdvMQ-cwK8,6826
|
35
|
+
judgeval/scorers/judgeval_scorers/__init__.py,sha256=077QnuBfw9Sy9RP2TF2oKCtt5PbaqBZLyiP-gczKShk,5092
|
36
|
+
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=zFwH2TC5AFlpDRfVKc6GN4YTtnmeyALl-JRLoZD_Jco,1284
|
37
|
+
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
|
38
|
+
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=CqvvjV7AZqPlXh-PZaPKYPILHr15u4bIYiKBFjlk5i0,457
|
39
|
+
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=2zBrm_EEc143bmPA4HVcf8XtQeuc_BexczGx-SHlwRY,473
|
40
|
+
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=NyojBWy_lRYx8diREulSK8s9dfYdZav4eZjg3TwUm0M,461
|
41
|
+
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=wROMWOliCnB39ftX9TdeZmG9y0vrnxIGVby65tLOQRU,574
|
42
|
+
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=gNf_i5c0jjpz2zCGhe7TtDMLKxc1PdOExJMFB5X7hSg,442
|
43
|
+
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=ffYwH3CexPkKgo1rCALMivypROQjG5WWEsKXEFZxe2k,446
|
44
|
+
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=CAZBQKwNSqpqAoOgStYfr-yP1Brug_6VRimRIQY-zdg,894
|
45
|
+
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D_0q-_fGWh2jQHW9O4Pu7I7xvLWsHU6cn8,450
|
46
|
+
judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py,sha256=17ppPXm962ew67GU5m0npzbPu3CuhgdKY_KmfPvKfu4,457
|
47
|
+
judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=ZDbmYHwIbPD75Gj9JKtEWnpBdSVGGRmbn1_IOR6GR-c,1627
|
48
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
|
49
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=ZjLw3Usx3SsSRXGXLItNL_IEWo_UV8dxhc2mS9j-nGM,10073
|
50
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py,sha256=5B_G7PPEsfLq6cwWkKWcLuy2k_5RgoOzsW3wOZLIeMk,6703
|
51
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py,sha256=r6yae5iaWtlBL_cP8I-1SuhS9dulsy1e7W9Rcz82v6E,169
|
52
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py,sha256=3T3pDxJde6M8RxsPLXvQA16Dpo-sLECEnMCe7rpvNcY,10536
|
53
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py,sha256=GfbKv595s1a0dB1No_kDsap6gfcr6dYRGiXx0PDb89k,6557
|
54
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py,sha256=J6tc-T60AVOEaNVuoVU0XIG6dvQri99Q0tnX_Tm-0vc,108
|
55
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py,sha256=5EYjUusMyDfiqatg8-_OJg8IDax-8Ib6aI1sQgi-6JA,9493
|
56
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py,sha256=gddK6BQAFcW04vAad81kxIXCHJQp8CbCqMwudWKy7aM,4892
|
57
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py,sha256=4kjfqD_95muHZFo75S8_fbTcC1DI1onNIfMmr8gMZaI,99
|
58
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py,sha256=u8sknD9IsPfU7iF4725w5OmFKe1JEZbOYwsLcAy4m3E,9107
|
59
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py,sha256=boVq6IM7Iambc1ky_JJQ4ejnYoQQtYreG0SjO4iMyFU,6558
|
60
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py,sha256=JPCvrekKLbl_xdD49evhtiFIVocuegCpCBkn1auzTSE,184
|
61
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=NCGFLrdkpEK_LPVQC21qY-0pEOrsdVC0RRcNn9IdREE,8759
|
62
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=6EHBfxWvhur9z14l8zCw5Z4Hb2uRo9Yv7qIhTRT7-aM,4591
|
63
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
|
64
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=HdpxzQVYs79AdoNWmInS6yNYwPdwgqN23OHSzo1e9_4,11169
|
65
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=oxmCsouh5ExUMmlSuCDolpYR2y9c-yKth6PHrdsCH_g,11387
|
66
|
+
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py,sha256=fZk3UQxI9Nljf5qjCRLRkF0D-AERFHElI9cC83_cgV8,158
|
67
|
+
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=ud-P20erpiLR-i-ycma7Bg8M_mxJ2yQliXPXr-Iwq3M,9521
|
68
|
+
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py,sha256=BkEu7Q_jIVdcdZSq37tMjitZFzACd8-iBTDDXfGbZig,4346
|
69
|
+
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py,sha256=eYqHTWiw0NOPHueswknmpdxrmrmSm6Jadq56Ncmv9B0,4218
|
70
|
+
judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py,sha256=mv6-XeLSV5yj1H98YYV2iTYVd88zKftZJP42Lgl6R80,89
|
71
|
+
judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py,sha256=6GnRz2h-6Fwt4sl__0RgQOyo3n3iDO4MNuHWxdu-rrM,10242
|
72
|
+
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=3FMn7EhM5IxNwJLGTcpeHODaOPJefMHW6rRizmlA93U,20775
|
73
|
+
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
|
74
|
+
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=oxhVDR3Pb55Kxp9KsvmuvHWKtMiV1BQRG6yaXEr5Bp8,5309
|
75
|
+
judgeval-0.0.4.dist-info/METADATA,sha256=bMQ_F0sODVmPSO-SQD7QeJnWwegJRGgvBO-etUVQN2E,1156
|
76
|
+
judgeval-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
77
|
+
judgeval-0.0.4.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
78
|
+
judgeval-0.0.4.dist-info/RECORD,,
|
judgeval-0.0.3.dist-info/RECORD
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
judgeval/__init__.py,sha256=ZLaxoUwq-b86Ugj3543Z1v4Q0D8ukHRpxV7LcZrcOZs,1875
|
2
|
-
judgeval/clients.py,sha256=boWW-nA7Yqt2zBflMxl2NpdUIFSSKebv3rJiG50K1s4,594
|
3
|
-
judgeval/constants.py,sha256=5OiK-r6DLD2utExYaFnvfpSYxrXbVYzRDxSO6lVNE6k,1919
|
4
|
-
judgeval/evaluation_run.py,sha256=Z1Y9_w7xWK6sjH4sXupPYNl6-BTlJu4kYF8KJcB3MF8,5763
|
5
|
-
judgeval/judgment_client.py,sha256=QBEgWkkNvrHpkmhPIS0YhsDYDWYxGenQSjopz5QSsas,9574
|
6
|
-
judgeval/run_evaluation.py,sha256=LzEoWhtsXE_HwMRgmzkkMKfbqw2h1sh7WjChy5HOUfQ,16252
|
7
|
-
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
8
|
-
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
9
|
-
judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
|
10
|
-
judgeval/common/tracer.py,sha256=7t--uQMcxVLl2Hqemem_EUy90lAhbvrHss5_ujFlI8Y,22310
|
11
|
-
judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
|
12
|
-
judgeval/data/__init__.py,sha256=-Js66xgj0g7wtUk8Q2CIK9ynGL8pGlVRubVlfO7OgX4,438
|
13
|
-
judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
|
14
|
-
judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
|
15
|
-
judgeval/data/result.py,sha256=CVp_mZrBbKjIH9rPB6rg7T2jY1jUy7JVyI7_kUbRC7w,3490
|
16
|
-
judgeval/data/scorer_data.py,sha256=H7s-yEEUdWlR4mRgy5JkLOQXNkn2YLu9wkZwcAlh6QQ,3276
|
17
|
-
judgeval/data/datasets/__init__.py,sha256=Xh6TSsCcEsJeYjjubfeGa3WU8YQfuwKXH3jR9EeDFgg,171
|
18
|
-
judgeval/data/datasets/dataset.py,sha256=9GGspdKDhMw2dJAS7ZvOZHSoNGwMzCtgnFYDe6y4yog,16484
|
19
|
-
judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
|
20
|
-
judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
|
21
|
-
judgeval/judges/__init__.py,sha256=4DuGgrJAec2GavvU3wgQ5alOgi9q0GzFzrtGBJxO2Cs,339
|
22
|
-
judgeval/judges/base_judge.py,sha256=PUj7tITRjWjk6Www5Qcrt6Z3YNr9ix69aqjFs-dMQjA,1000
|
23
|
-
judgeval/judges/litellm_judge.py,sha256=xW0Ld7dumercO7ej8aTNDRow1lsVpgaDENTFq1WkiU0,2424
|
24
|
-
judgeval/judges/mixture_of_judges.py,sha256=WELi58rKtJuamkQWbhcqMn7AfRj1qX1LcDyOE0B3FZc,15532
|
25
|
-
judgeval/judges/together_judge.py,sha256=eISgPcnAS71Di9FL9z0AfvBK9nb57a_muCUzboxv7gQ,2275
|
26
|
-
judgeval/judges/utils.py,sha256=Aj1XrK0WXeJStsDLB-Cx757fRuJCWmJRMVD3ngoa9Oo,2110
|
27
|
-
judgeval/scorers/__init__.py,sha256=U9uiW7y707g8ibjc2ZCBlm61ijziJMM2xQSHDV7FerQ,845
|
28
|
-
judgeval/scorers/base_scorer.py,sha256=_n-w7b_PD_-DoW1gr2c3CtrT5dvLehRjDYLQDF-81LM,1786
|
29
|
-
judgeval/scorers/custom_scorer.py,sha256=SrkrtIqs9yaqvUZG3ilXIGm6S78uX1YOygqWpl4xXfw,6039
|
30
|
-
judgeval/scorers/prompt_scorer.py,sha256=-YVG9k03q85YnyLVR2lsC-RmxUM4Q5ynHdUECi63iCk,17813
|
31
|
-
judgeval/scorers/score.py,sha256=zvS5xF3qlUYS716TeyNPikYZWXrUTKirAr1uqXO-P14,18589
|
32
|
-
judgeval/scorers/utils.py,sha256=RCpHc7EQF_LnujsZ0KV-kTYvmHpVS1psJqIgZOuqOgA,5613
|
33
|
-
judgeval/scorers/judgeval_scorers/__init__.py,sha256=YcpuD8qFuhyPEwFiKBgqxQpjqo43I3ODHH3yJnR75B4,1044
|
34
|
-
judgeval/scorers/judgeval_scorers/answer_relevancy.py,sha256=xpS-WclR8SV7HDmpUPp3P55Ybfcm6Gj1kfr2hJG_-cg,452
|
35
|
-
judgeval/scorers/judgeval_scorers/contextual_precision.py,sha256=3nFcrfNTGmHnmfM3UxlLLzAK0qz9VKkEMjmDa6j7thI,468
|
36
|
-
judgeval/scorers/judgeval_scorers/contextual_recall.py,sha256=O8bnQxJJGmIjSUMgR7_8VG2qlSP-7OJiHauCxHGkcgw,456
|
37
|
-
judgeval/scorers/judgeval_scorers/contextual_relevancy.py,sha256=rrUmyWZdKrONjm_seRvFltS5YM9QiiO6o5-DVlNTYPE,569
|
38
|
-
judgeval/scorers/judgeval_scorers/faithfulness.py,sha256=CHqB-_KMnRw24jg0am0SpHuYMx3u3Gy5YtQNAPNXZ_I,437
|
39
|
-
judgeval/scorers/judgeval_scorers/hallucination.py,sha256=7omZbrQWTgZ4Fnw8wQSgvsmuj-g9bm74AP1xZM_mDhs,441
|
40
|
-
judgeval/scorers/judgeval_scorers/json_correctness.py,sha256=guGR5lgTtwG8qqVI7dFt6hHGEYM98XzZhqPzh69Ogqg,866
|
41
|
-
judgeval/scorers/judgeval_scorers/summarization.py,sha256=OwF-sNtRg2HN6FoRCCYTow_SNfS9Rcxzb7ZDBTEld7k,445
|
42
|
-
judgeval/scorers/judgeval_scorers/tool_correctness.py,sha256=7nAXrDKfPkeMzRLiNTevbUV5pjRXPTM2dbkfGRgSJ-s,452
|
43
|
-
judgeval-0.0.3.dist-info/METADATA,sha256=akLBzfSQQy8aTNLIuq0LR0M8mdWTM07Sdx7nEyU4FoA,1156
|
44
|
-
judgeval-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
45
|
-
judgeval-0.0.3.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
46
|
-
judgeval-0.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|