judgeval 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/judgment_client.py +6 -5
- judgeval/scorers/judgeval_scorer.py +2 -0
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +10 -5
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +11 -5
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +10 -5
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +10 -5
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +10 -5
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +11 -6
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +10 -5
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +5 -0
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +13 -6
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +10 -1
- judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +10 -4
- {judgeval-0.0.7.dist-info → judgeval-0.0.8.dist-info}/METADATA +1 -1
- {judgeval-0.0.7.dist-info → judgeval-0.0.8.dist-info}/RECORD +17 -16
- {judgeval-0.0.7.dist-info → judgeval-0.0.8.dist-info}/WHEEL +0 -0
- {judgeval-0.0.7.dist-info → judgeval-0.0.8.dist-info}/licenses/LICENSE.md +0 -0
judgeval/judgment_client.py
CHANGED
@@ -267,7 +267,6 @@ class JudgmentClient:
|
|
267
267
|
|
268
268
|
return response.json()["slug"]
|
269
269
|
|
270
|
-
|
271
270
|
def assert_test(
|
272
271
|
self,
|
273
272
|
examples: List[Example],
|
@@ -275,12 +274,14 @@ class JudgmentClient:
|
|
275
274
|
model: Union[str, List[str], JudgevalJudge],
|
276
275
|
aggregator: Optional[str] = None,
|
277
276
|
metadata: Optional[Dict[str, Any]] = None,
|
278
|
-
log_results: bool =
|
279
|
-
project_name: str = "",
|
280
|
-
eval_run_name: str = "",
|
277
|
+
log_results: bool = True,
|
278
|
+
project_name: str = "default_project",
|
279
|
+
eval_run_name: str = "default_eval_run",
|
281
280
|
override: bool = False,
|
282
281
|
) -> None:
|
283
|
-
|
282
|
+
"""
|
283
|
+
Asserts a test by running the evaluation and checking the results for success
|
284
|
+
"""
|
284
285
|
results = self.run_evaluation(
|
285
286
|
examples=examples,
|
286
287
|
scorers=scorers,
|
@@ -58,6 +58,8 @@ class JudgevalScorer:
|
|
58
58
|
additional_metadata: Optional[Dict] = None
|
59
59
|
):
|
60
60
|
debug(f"Initializing CustomScorer with score_type={score_type}, threshold={threshold}")
|
61
|
+
if not 0 <= threshold <= 1:
|
62
|
+
raise ValueError("Threshold must be between 0 and 1")
|
61
63
|
if strict_mode:
|
62
64
|
warning("Strict mode enabled - scoring will be more rigorous")
|
63
65
|
info(f"CustomScorer initialized with evaluation_model: {evaluation_model}")
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Optional, List, Union, Tuple
|
2
2
|
from pydantic import BaseModel
|
3
3
|
|
4
|
+
from judgeval.constants import APIScorer
|
4
5
|
from judgeval.judges import JudgevalJudge
|
5
6
|
from judgeval.judges.utils import create_judge
|
6
7
|
from judgeval.data import Example, ExampleParams
|
@@ -38,13 +39,17 @@ class AnswerCorrectnessScorer(JudgevalScorer):
|
|
38
39
|
strict_mode: bool = False,
|
39
40
|
verbose_mode: bool = False
|
40
41
|
):
|
41
|
-
|
42
|
-
|
42
|
+
super().__init__(
|
43
|
+
score_type=APIScorer.ANSWER_CORRECTNESS,
|
44
|
+
threshold=1 if strict_mode else threshold,
|
45
|
+
evaluation_model=None,
|
46
|
+
include_reason=include_reason,
|
47
|
+
async_mode=async_mode,
|
48
|
+
strict_mode=strict_mode,
|
49
|
+
verbose_mode=verbose_mode
|
50
|
+
)
|
43
51
|
self.model, self.using_native_model = create_judge(model)
|
44
52
|
self.evaluation_model = self.model.get_model_name()
|
45
|
-
self.async_mode = async_mode
|
46
|
-
self.strict_mode = strict_mode
|
47
|
-
self.verbose_mode = verbose_mode
|
48
53
|
|
49
54
|
async def _a_get_statements(self, expected_output: str) -> List[str]:
|
50
55
|
prompt = AnswerCorrectnessTemplate.deduce_statements(
|
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import Optional, List, Union, Tuple
|
2
2
|
|
3
|
+
from judgeval.constants import APIScorer
|
3
4
|
from judgeval.scorers.utils import (get_or_create_event_loop,
|
4
5
|
scorer_progress_meter,
|
5
6
|
create_verbose_logs,
|
@@ -34,13 +35,18 @@ class AnswerRelevancyScorer(JudgevalScorer):
|
|
34
35
|
strict_mode: bool = False,
|
35
36
|
verbose_mode: bool = False,
|
36
37
|
):
|
37
|
-
|
38
|
+
super().__init__(
|
39
|
+
score_type=APIScorer.ANSWER_RELEVANCY,
|
40
|
+
threshold=1 if strict_mode else threshold,
|
41
|
+
evaluation_model=None,
|
42
|
+
include_reason=include_reason,
|
43
|
+
async_mode=async_mode,
|
44
|
+
strict_mode=strict_mode,
|
45
|
+
verbose_mode=verbose_mode
|
46
|
+
)
|
38
47
|
self.model, self.using_native_model = create_judge(model)
|
39
48
|
self.evaluation_model = self.model.get_model_name()
|
40
|
-
self.
|
41
|
-
self.async_mode = async_mode
|
42
|
-
self.strict_mode = strict_mode
|
43
|
-
self.verbose_mode = verbose_mode
|
49
|
+
print(self.model)
|
44
50
|
|
45
51
|
def score_example(
|
46
52
|
self,
|
@@ -4,6 +4,7 @@ from judgeval.judges import JudgevalJudge
|
|
4
4
|
from judgeval.judges.utils import create_judge
|
5
5
|
from judgeval.data import Example, ExampleParams
|
6
6
|
from judgeval.scorers import JudgevalScorer
|
7
|
+
from judgeval.constants import APIScorer
|
7
8
|
from judgeval.scorers.utils import (
|
8
9
|
get_or_create_event_loop,
|
9
10
|
parse_response_json,
|
@@ -30,13 +31,17 @@ class ContextualPrecisionScorer(JudgevalScorer):
|
|
30
31
|
strict_mode: bool = False,
|
31
32
|
verbose_mode: bool = False,
|
32
33
|
):
|
33
|
-
|
34
|
-
|
34
|
+
super().__init__(
|
35
|
+
score_type=APIScorer.CONTEXTUAL_PRECISION,
|
36
|
+
threshold=1 if strict_mode else threshold,
|
37
|
+
evaluation_model=None,
|
38
|
+
include_reason=include_reason,
|
39
|
+
async_mode=async_mode,
|
40
|
+
strict_mode=strict_mode,
|
41
|
+
verbose_mode=verbose_mode
|
42
|
+
)
|
35
43
|
self.model, self.using_native_model = create_judge(model)
|
36
44
|
self.evaluation_model = self.model.get_model_name()
|
37
|
-
self.async_mode = async_mode
|
38
|
-
self.strict_mode = strict_mode
|
39
|
-
self.verbose_mode = verbose_mode
|
40
45
|
|
41
46
|
def score_example(
|
42
47
|
self,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import Optional, List, Union
|
2
2
|
|
3
|
+
from judgeval.constants import APIScorer
|
3
4
|
from judgeval.scorers.utils import (
|
4
5
|
get_or_create_event_loop,
|
5
6
|
parse_response_json,
|
@@ -32,14 +33,18 @@ class ContextualRecallScorer(JudgevalScorer):
|
|
32
33
|
verbose_mode: bool = False,
|
33
34
|
user: Optional[str] = None
|
34
35
|
):
|
36
|
+
super().__init__(
|
37
|
+
score_type=APIScorer.CONTEXTUAL_RECALL,
|
38
|
+
threshold=1 if strict_mode else threshold,
|
39
|
+
evaluation_model=None,
|
40
|
+
include_reason=include_reason,
|
41
|
+
async_mode=async_mode,
|
42
|
+
strict_mode=strict_mode,
|
43
|
+
verbose_mode=verbose_mode
|
44
|
+
)
|
35
45
|
self.user = user
|
36
|
-
self.threshold = 1 if strict_mode else threshold
|
37
46
|
self.model, self.using_native_model = create_judge(model)
|
38
47
|
self.evaluation_model = self.model.get_model_name()
|
39
|
-
self.include_reason = include_reason
|
40
|
-
self.async_mode = async_mode
|
41
|
-
self.strict_mode = strict_mode
|
42
|
-
self.verbose_mode = verbose_mode
|
43
48
|
|
44
49
|
def score_example(
|
45
50
|
self,
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Optional, List, Union
|
2
2
|
import asyncio
|
3
3
|
|
4
|
+
from judgeval.constants import APIScorer
|
4
5
|
from judgeval.scorers.utils import (get_or_create_event_loop,
|
5
6
|
scorer_progress_meter,
|
6
7
|
create_verbose_logs,
|
@@ -32,14 +33,18 @@ class ContextualRelevancyScorer(JudgevalScorer):
|
|
32
33
|
verbose_mode: bool = False,
|
33
34
|
user: Optional[str] = None
|
34
35
|
):
|
36
|
+
super().__init__(
|
37
|
+
score_type=APIScorer.CONTEXTUAL_RELEVANCY,
|
38
|
+
threshold=1 if strict_mode else threshold,
|
39
|
+
evaluation_model=None,
|
40
|
+
include_reason=include_reason,
|
41
|
+
async_mode=async_mode,
|
42
|
+
strict_mode=strict_mode,
|
43
|
+
verbose_mode=verbose_mode
|
44
|
+
)
|
35
45
|
self.user = user
|
36
|
-
self.threshold = 1 if strict_mode else threshold
|
37
46
|
self.model, self.using_native_model = create_judge(model)
|
38
47
|
self.evaluation_model = self.model.get_model_name()
|
39
|
-
self.include_reason = include_reason
|
40
|
-
self.async_mode = async_mode
|
41
|
-
self.strict_mode = strict_mode
|
42
|
-
self.verbose_mode = verbose_mode
|
43
48
|
|
44
49
|
def score_example(
|
45
50
|
self,
|
@@ -3,7 +3,7 @@ Code for the local implementation of the Faithfulness metric.
|
|
3
3
|
"""
|
4
4
|
from typing import List, Optional, Union
|
5
5
|
|
6
|
-
|
6
|
+
from judgeval.constants import APIScorer
|
7
7
|
from judgeval.data import (
|
8
8
|
Example,
|
9
9
|
ExampleParams
|
@@ -47,14 +47,19 @@ class FaithfulnessScorer(JudgevalScorer):
|
|
47
47
|
verbose_mode: bool = False,
|
48
48
|
user: Optional[str] = None
|
49
49
|
):
|
50
|
-
|
50
|
+
super().__init__(
|
51
|
+
score_type=APIScorer.FAITHFULNESS,
|
52
|
+
threshold=1 if strict_mode else threshold,
|
53
|
+
evaluation_model=None,
|
54
|
+
include_reason=include_reason,
|
55
|
+
async_mode=async_mode,
|
56
|
+
strict_mode=strict_mode,
|
57
|
+
verbose_mode=verbose_mode
|
58
|
+
)
|
59
|
+
self.user = user
|
51
60
|
self.model, self.using_native_model = create_judge(model)
|
52
61
|
self.using_native_model = True # NOTE: SETTING THIS FOR LITELLM and TOGETHER usage
|
53
62
|
self.evaluation_model = self.model.get_model_name()
|
54
|
-
self.include_reason = include_reason
|
55
|
-
self.async_mode = async_mode
|
56
|
-
self.strict_mode = strict_mode
|
57
|
-
self.verbose_mode = verbose_mode
|
58
63
|
|
59
64
|
def score_example(
|
60
65
|
self,
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py
CHANGED
@@ -20,6 +20,7 @@ Hallucination is measuring the fraction of contexts that agree with output (do n
|
|
20
20
|
|
21
21
|
from typing import Optional, Union, List
|
22
22
|
|
23
|
+
from judgeval.constants import APIScorer
|
23
24
|
from judgeval.scorers.utils import (get_or_create_event_loop,
|
24
25
|
scorer_progress_meter,
|
25
26
|
create_verbose_logs,
|
@@ -50,13 +51,17 @@ class HallucinationScorer(JudgevalScorer):
|
|
50
51
|
strict_mode: bool = False,
|
51
52
|
verbose_mode: bool = False,
|
52
53
|
):
|
53
|
-
|
54
|
+
super().__init__(
|
55
|
+
score_type=APIScorer.HALLUCINATION,
|
56
|
+
threshold=1 if strict_mode else threshold,
|
57
|
+
evaluation_model=None,
|
58
|
+
include_reason=include_reason,
|
59
|
+
async_mode=async_mode,
|
60
|
+
strict_mode=strict_mode,
|
61
|
+
verbose_mode=verbose_mode
|
62
|
+
)
|
54
63
|
self.model, self.using_native_model = create_judge(model)
|
55
64
|
self.evaluation_model = self.model.get_model_name()
|
56
|
-
self.include_reason = include_reason
|
57
|
-
self.async_mode = async_mode
|
58
|
-
self.strict_mode = strict_mode
|
59
|
-
self.verbose_mode = verbose_mode
|
60
65
|
|
61
66
|
def score_example(
|
62
67
|
self,
|
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
from typing import List, Optional, Union, Any
|
2
2
|
from pydantic import BaseModel, ValidationError, create_model
|
3
3
|
|
4
|
+
from judgeval.constants import APIScorer
|
4
5
|
from judgeval.judges import JudgevalJudge
|
6
|
+
from judgeval.judges.utils import create_judge
|
5
7
|
from judgeval.scorers.utils import (get_or_create_event_loop,
|
6
8
|
scorer_progress_meter,
|
7
9
|
create_verbose_logs,
|
@@ -30,13 +32,18 @@ class JsonCorrectnessScorer(JudgevalScorer):
|
|
30
32
|
verbose_mode: bool = False,
|
31
33
|
user: Optional[str] = None
|
32
34
|
):
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
super().__init__(
|
36
|
+
score_type=APIScorer.JSON_CORRECTNESS,
|
37
|
+
threshold=1 if strict_mode else threshold,
|
38
|
+
evaluation_model=None,
|
39
|
+
include_reason=False,
|
40
|
+
async_mode=async_mode,
|
41
|
+
strict_mode=strict_mode,
|
42
|
+
verbose_mode=verbose_mode
|
43
|
+
)
|
39
44
|
self.user = user
|
45
|
+
self.model, self.using_native_model = create_judge(model)
|
46
|
+
self.evaluation_model = self.model.get_model_name()
|
40
47
|
|
41
48
|
if isinstance(json_schema, dict):
|
42
49
|
# Convert to BaseModel
|
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import List, Optional, Union
|
2
2
|
import asyncio
|
3
3
|
|
4
|
+
from judgeval.constants import APIScorer
|
4
5
|
from judgeval.scorers.utils import (get_or_create_event_loop,
|
5
6
|
scorer_progress_meter,
|
6
7
|
create_verbose_logs,
|
@@ -36,7 +37,15 @@ class SummarizationScorer(JudgevalScorer):
|
|
36
37
|
strict_mode: bool = False,
|
37
38
|
verbose_mode: bool = False,
|
38
39
|
):
|
39
|
-
|
40
|
+
super().__init__(
|
41
|
+
score_type=APIScorer.SUMMARIZATION,
|
42
|
+
threshold=1 if strict_mode else threshold,
|
43
|
+
evaluation_model=None,
|
44
|
+
include_reason=include_reason,
|
45
|
+
async_mode=async_mode,
|
46
|
+
strict_mode=strict_mode,
|
47
|
+
verbose_mode=verbose_mode
|
48
|
+
)
|
40
49
|
self.model, self.using_native_model = create_judge(model)
|
41
50
|
self.evaluation_model = self.model.get_model_name()
|
42
51
|
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import List, Union
|
2
2
|
|
3
|
+
from judgeval.constants import APIScorer
|
3
4
|
from judgeval.scorers.utils import (
|
4
5
|
scorer_progress_meter,
|
5
6
|
create_verbose_logs,
|
@@ -55,10 +56,15 @@ class ToolCorrectnessScorer(JudgevalScorer):
|
|
55
56
|
should_exact_match: bool = False,
|
56
57
|
should_consider_ordering: bool = False,
|
57
58
|
):
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
super().__init__(
|
60
|
+
score_type=APIScorer.TOOL_CORRECTNESS,
|
61
|
+
threshold=1 if strict_mode else threshold,
|
62
|
+
evaluation_model=None,
|
63
|
+
include_reason=include_reason,
|
64
|
+
async_mode=False,
|
65
|
+
strict_mode=strict_mode,
|
66
|
+
verbose_mode=verbose_mode
|
67
|
+
)
|
62
68
|
self.should_exact_match = should_exact_match
|
63
69
|
self.should_consider_ordering = should_consider_ordering
|
64
70
|
|
@@ -2,7 +2,7 @@ judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
|
|
2
2
|
judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
|
3
3
|
judgeval/constants.py,sha256=5O1jWvxMCRyMSWhmkrvPqfBctx42c7kMtgTS7ORVcFw,1965
|
4
4
|
judgeval/evaluation_run.py,sha256=KcIS7mDR_9XEdqYrJXFcrLz5IDMof34HcD5VtjZgV8w,5884
|
5
|
-
judgeval/judgment_client.py,sha256=
|
5
|
+
judgeval/judgment_client.py,sha256=lVVVDxRQ750nd0wT827dca94YzThNjuFWWJ-BTFW7lg,11367
|
6
6
|
judgeval/run_evaluation.py,sha256=A9jjtWPH2_5W43a1f98R8u-8PuVczoJZNCZIyCoRqi8,18918
|
7
7
|
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
8
8
|
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
@@ -28,7 +28,7 @@ judgeval/scorers/__init__.py,sha256=3rq2VtszrJk9gZ3oAMVd7EGlSugr8aRlHWprMDgQPaQ,
|
|
28
28
|
judgeval/scorers/api_scorer.py,sha256=88kCWr6IetLFn3ziTPG-lwDWvMhFUC6xfINU1MJBoho,2125
|
29
29
|
judgeval/scorers/base_scorer.py,sha256=mbOReG88fWaqCnC8F0u5QepRlzgVkuOz89KEKYxrmMc,1794
|
30
30
|
judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
|
31
|
-
judgeval/scorers/judgeval_scorer.py,sha256=
|
31
|
+
judgeval/scorers/judgeval_scorer.py,sha256=14SZ3sBZtGNM3BCegKebkNad9LTs5Tyhs0kD6l3wLAA,6275
|
32
32
|
judgeval/scorers/prompt_scorer.py,sha256=bUv8eZNy1XGVM1gNMt33dgIVX6zj63bGAV6O0o0c7yg,17821
|
33
33
|
judgeval/scorers/score.py,sha256=zJKG21h9Njyj2vS36CAFK2wlbOcHSKgrLgHV5_25KKw,18630
|
34
34
|
judgeval/scorers/utils.py,sha256=dtueaJm8e3Ph3wj1vC-srzadgK_CoIlOefdvMQ-cwK8,6826
|
@@ -46,33 +46,34 @@ judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D
|
|
46
46
|
judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py,sha256=17ppPXm962ew67GU5m0npzbPu3CuhgdKY_KmfPvKfu4,457
|
47
47
|
judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=ZDbmYHwIbPD75Gj9JKtEWnpBdSVGGRmbn1_IOR6GR-c,1627
|
48
48
|
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
|
49
|
-
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=
|
49
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=PDThn6SzqxgMXT7BpQs2TEBOsgfD5fi6fnKk31qaCTo,10227
|
50
50
|
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py,sha256=5B_G7PPEsfLq6cwWkKWcLuy2k_5RgoOzsW3wOZLIeMk,6703
|
51
51
|
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py,sha256=r6yae5iaWtlBL_cP8I-1SuhS9dulsy1e7W9Rcz82v6E,169
|
52
|
-
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py,sha256=
|
52
|
+
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py,sha256=QG-oxa6-c74VzTuni17RQ9aeT0t1lCuxQXDMznqX8rc,10714
|
53
53
|
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py,sha256=GfbKv595s1a0dB1No_kDsap6gfcr6dYRGiXx0PDb89k,6557
|
54
54
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py,sha256=J6tc-T60AVOEaNVuoVU0XIG6dvQri99Q0tnX_Tm-0vc,108
|
55
|
-
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py,sha256=
|
55
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py,sha256=tRgRyjGpc4Pe3nQ1c-5NeNYFvbulL7YEnoRa9zLp1gc,9649
|
56
56
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py,sha256=gddK6BQAFcW04vAad81kxIXCHJQp8CbCqMwudWKy7aM,4892
|
57
57
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py,sha256=4kjfqD_95muHZFo75S8_fbTcC1DI1onNIfMmr8gMZaI,99
|
58
|
-
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py,sha256=
|
58
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py,sha256=hwAv_x3XwGDnSW3a75CTCgIW6eVg8ymdjDdJQvw5p0Y,9260
|
59
59
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py,sha256=boVq6IM7Iambc1ky_JJQ4ejnYoQQtYreG0SjO4iMyFU,6558
|
60
60
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py,sha256=JPCvrekKLbl_xdD49evhtiFIVocuegCpCBkn1auzTSE,184
|
61
|
-
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=
|
61
|
+
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=BtVgE7z-9PHfFRcvn96aEG5mXVcWBweVyty934hZdiU,8915
|
62
62
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=6EHBfxWvhur9z14l8zCw5Z4Hb2uRo9Yv7qIhTRT7-aM,4591
|
63
63
|
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
|
64
|
-
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=
|
64
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=4XqdcdgHg3evrg-IQwXmUHEyee1lZUjXRNEiQSvdpmQ,11341
|
65
65
|
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=oxmCsouh5ExUMmlSuCDolpYR2y9c-yKth6PHrdsCH_g,11387
|
66
66
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py,sha256=fZk3UQxI9Nljf5qjCRLRkF0D-AERFHElI9cC83_cgV8,158
|
67
|
-
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=
|
67
|
+
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=orCrEe1IH4NE7m-AkKMX0EHbysTuAwIqfohcQaU7XxQ,9670
|
68
68
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py,sha256=BkEu7Q_jIVdcdZSq37tMjitZFzACd8-iBTDDXfGbZig,4346
|
69
|
-
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/
|
69
|
+
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py,sha256=xQDw7o9JQ6qajusPnBH0MWBRJ5ct_Ao3pJELXxxVMRo,175
|
70
|
+
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py,sha256=WxIIK_sgHMQ3aLGvkzvYpcmUm6r62gvrAELimMLw3iM,4529
|
70
71
|
judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py,sha256=mv6-XeLSV5yj1H98YYV2iTYVd88zKftZJP42Lgl6R80,89
|
71
72
|
judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py,sha256=6GnRz2h-6Fwt4sl__0RgQOyo3n3iDO4MNuHWxdu-rrM,10242
|
72
|
-
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=
|
73
|
+
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=CBuE6oCxMzTdJoXFt_YPWBte88kedEQ9t3g52ZRztGY,21086
|
73
74
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
|
74
|
-
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=
|
75
|
-
judgeval-0.0.
|
76
|
-
judgeval-0.0.
|
77
|
-
judgeval-0.0.
|
78
|
-
judgeval-0.0.
|
75
|
+
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
|
76
|
+
judgeval-0.0.8.dist-info/METADATA,sha256=91SMIPO60Q_Ab7yTjL2sKmPgmfl6Bji6_QAzkjaOHlk,1204
|
77
|
+
judgeval-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
78
|
+
judgeval-0.0.8.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
79
|
+
judgeval-0.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|