judgeval 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/tracer.py +229 -44
- judgeval/constants.py +15 -3
- judgeval/data/datasets/__init__.py +2 -1
- judgeval/data/datasets/dataset.py +1 -122
- judgeval/data/datasets/eval_dataset_client.py +193 -0
- judgeval/data/result.py +16 -1
- judgeval/evaluation_run.py +2 -1
- judgeval/judges/utils.py +14 -2
- judgeval/judgment_client.py +64 -7
- judgeval/run_evaluation.py +19 -0
- judgeval/scorers/judgeval_scorer.py +8 -8
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +3 -1
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +6 -3
- judgeval/scorers/prompt_scorer.py +2 -2
- judgeval/scorers/score.py +11 -11
- judgeval/scorers/utils.py +3 -3
- judgeval/tracer/__init__.py +3 -0
- {judgeval-0.0.9.dist-info → judgeval-0.0.11.dist-info}/METADATA +5 -4
- {judgeval-0.0.9.dist-info → judgeval-0.0.11.dist-info}/RECORD +21 -19
- {judgeval-0.0.9.dist-info → judgeval-0.0.11.dist-info}/WHEEL +0 -0
- {judgeval-0.0.9.dist-info → judgeval-0.0.11.dist-info}/licenses/LICENSE.md +0 -0
judgeval/scorers/utils.py
CHANGED
@@ -32,7 +32,7 @@ def clone_scorers(scorers: List[JudgevalScorer]) -> List[JudgevalScorer]:
|
|
32
32
|
valid_args = {key: args[key] for key in valid_params if key in args}
|
33
33
|
|
34
34
|
cloned_scorer = scorer_class(**valid_args)
|
35
|
-
# kinda hacky, but in case the class inheriting from
|
35
|
+
# kinda hacky, but in case the class inheriting from JudgevalScorer doesn't have `model` in its __init__,
|
36
36
|
# we need to explicitly include it here so that we can add the judge model to the cloned scorer
|
37
37
|
cloned_scorer._add_model(model=args.get("model"))
|
38
38
|
cloned_scorers.append(cloned_scorer)
|
@@ -91,7 +91,7 @@ def parse_response_json(llm_response: str, scorer: Optional[JudgevalScorer] = No
|
|
91
91
|
|
92
92
|
Args:
|
93
93
|
llm_response (str): The response from an LLM.
|
94
|
-
scorer (
|
94
|
+
scorer (JudgevalScorer, optional): The scorer object to forward errors to (if any).
|
95
95
|
"""
|
96
96
|
start = llm_response.find("{") # opening bracket
|
97
97
|
end = llm_response.rfind("}") + 1 # closing bracket
|
@@ -129,7 +129,7 @@ def create_verbose_logs(metric: JudgevalScorer, steps: List[str]) -> str:
|
|
129
129
|
Creates verbose logs for a scorer object.
|
130
130
|
|
131
131
|
Args:
|
132
|
-
metric (
|
132
|
+
metric (JudgevalScorer): The scorer object.
|
133
133
|
steps (List[str]): The steps to be included in the verbose logs.
|
134
134
|
|
135
135
|
Returns:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -12,12 +12,10 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.11
|
13
13
|
Requires-Dist: anthropic
|
14
14
|
Requires-Dist: fastapi
|
15
|
-
Requires-Dist: langfuse==2.50.3
|
16
15
|
Requires-Dist: litellm
|
17
16
|
Requires-Dist: nest-asyncio
|
18
17
|
Requires-Dist: openai
|
19
18
|
Requires-Dist: pandas
|
20
|
-
Requires-Dist: patronus
|
21
19
|
Requires-Dist: pika
|
22
20
|
Requires-Dist: python-dotenv==1.0.1
|
23
21
|
Requires-Dist: requests
|
@@ -25,11 +23,14 @@ Requires-Dist: supabase
|
|
25
23
|
Requires-Dist: together
|
26
24
|
Requires-Dist: uvicorn
|
27
25
|
Provides-Extra: dev
|
26
|
+
Requires-Dist: langfuse==2.50.3; extra == 'dev'
|
27
|
+
Requires-Dist: patronus; extra == 'dev'
|
28
28
|
Requires-Dist: pytest-asyncio>=0.25.0; extra == 'dev'
|
29
29
|
Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
|
30
30
|
Requires-Dist: pytest>=8.3.4; extra == 'dev'
|
31
|
+
Requires-Dist: tavily-python; extra == 'dev'
|
31
32
|
Description-Content-Type: text/markdown
|
32
33
|
|
33
34
|
# judgeval
|
34
35
|
|
35
|
-
Judgeval is
|
36
|
+
Judgeval is an open-source evaluation framework for multi-agent LLM workflows, for both real-time and offline evaluations.
|
@@ -1,21 +1,22 @@
|
|
1
1
|
judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
|
2
2
|
judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
|
3
|
-
judgeval/constants.py,sha256=
|
4
|
-
judgeval/evaluation_run.py,sha256=
|
5
|
-
judgeval/judgment_client.py,sha256=
|
6
|
-
judgeval/run_evaluation.py,sha256=
|
3
|
+
judgeval/constants.py,sha256=oL3kWHg9CzQJiTInDTgJgxRhF3fgylhvEVP360UqG8A,2654
|
4
|
+
judgeval/evaluation_run.py,sha256=ev-IbL34SwRv8lwB4KHfYag1jYo6b049R8mmwNBqmnM,5923
|
5
|
+
judgeval/judgment_client.py,sha256=thmSXi2essIlmd_j5SjlBw9_8qJJp6N3djoWdLaMrj0,13770
|
6
|
+
judgeval/run_evaluation.py,sha256=YOQ6s9RuUrXPTgoYexf7r6Hl1QKIMSTdvHl9kw-ZMzw,20103
|
7
7
|
judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
|
8
8
|
judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
|
9
9
|
judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
|
10
|
-
judgeval/common/tracer.py,sha256=
|
10
|
+
judgeval/common/tracer.py,sha256=wp-oGl8rdAe3_UXcvrEKFg7V6Vnvrnz9y_RVVgYOjCY,29934
|
11
11
|
judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
|
12
12
|
judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
|
13
13
|
judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
|
14
14
|
judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
|
15
|
-
judgeval/data/result.py,sha256=
|
15
|
+
judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
|
16
16
|
judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
|
17
|
-
judgeval/data/datasets/__init__.py,sha256=
|
18
|
-
judgeval/data/datasets/dataset.py,sha256=
|
17
|
+
judgeval/data/datasets/__init__.py,sha256=eO6ayeM_bTGwIt0eDSlTBIIBvXvIWRWWSfYZrZROPiQ,265
|
18
|
+
judgeval/data/datasets/dataset.py,sha256=AGdU21vZ4iVjqbjQ7JY-u29FzJrdDFTgdvhzvYVJNyo,11833
|
19
|
+
judgeval/data/datasets/eval_dataset_client.py,sha256=TaCDzymGFNFjGRrieEdQB8dT8xqNPpsEi2XLGFyrJno,7113
|
19
20
|
judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
|
20
21
|
judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
|
21
22
|
judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
|
@@ -23,15 +24,15 @@ judgeval/judges/base_judge.py,sha256=qhYSFxE21WajYNaT4X-qwWGtpo_tqzBzdqbszSheSD8
|
|
23
24
|
judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
|
24
25
|
judgeval/judges/mixture_of_judges.py,sha256=OuGWCuXyqe7s_Y74ij90TJFRfHU-VAFyJVVrwBM0RO0,15532
|
25
26
|
judgeval/judges/together_judge.py,sha256=x3jf-tq77QPXHeeoF739f69hE_0VceXD9FHLrVFdGVA,2275
|
26
|
-
judgeval/judges/utils.py,sha256=
|
27
|
+
judgeval/judges/utils.py,sha256=sYxSJq5cI9LtyJaxurcW9IwngALC9Ty8F_Mb8gz81nE,2732
|
27
28
|
judgeval/scorers/__init__.py,sha256=XcDdLn_s16rSQob0896oj4JXTA8-Xfl271TUEBj6Oew,998
|
28
29
|
judgeval/scorers/api_scorer.py,sha256=88kCWr6IetLFn3ziTPG-lwDWvMhFUC6xfINU1MJBoho,2125
|
29
30
|
judgeval/scorers/base_scorer.py,sha256=mbOReG88fWaqCnC8F0u5QepRlzgVkuOz89KEKYxrmMc,1794
|
30
31
|
judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
|
31
|
-
judgeval/scorers/judgeval_scorer.py,sha256=
|
32
|
-
judgeval/scorers/prompt_scorer.py,sha256=
|
33
|
-
judgeval/scorers/score.py,sha256=
|
34
|
-
judgeval/scorers/utils.py,sha256=
|
32
|
+
judgeval/scorers/judgeval_scorer.py,sha256=T9fkJwFVYMzW88TFr-RWg-Fqmp-cdrA8bLFymqMzOa8,6291
|
33
|
+
judgeval/scorers/prompt_scorer.py,sha256=UHkOUts1aIQCoYFcr-sKyucmvv_8ONFE5LZO01aObd0,17825
|
34
|
+
judgeval/scorers/score.py,sha256=GALVmeApP1Cyih2vY93zRaU6RShtW4jJDG47Pm6yfnw,18657
|
35
|
+
judgeval/scorers/utils.py,sha256=X7lBI0LRBnBR8KUU-Fvont2Wq31t5p6zOTWGebWIcAU,6832
|
35
36
|
judgeval/scorers/judgeval_scorers/__init__.py,sha256=D12jJAKTcfmz8fDBkYeOmdzZMZsURuODIJ5p7Nk1lWE,5189
|
36
37
|
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=zFwH2TC5AFlpDRfVKc6GN4YTtnmeyALl-JRLoZD_Jco,1284
|
37
38
|
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
|
@@ -64,8 +65,8 @@ judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__i
|
|
64
65
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=BtVgE7z-9PHfFRcvn96aEG5mXVcWBweVyty934hZdiU,8915
|
65
66
|
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=6EHBfxWvhur9z14l8zCw5Z4Hb2uRo9Yv7qIhTRT7-aM,4591
|
66
67
|
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
|
67
|
-
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=
|
68
|
-
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=
|
68
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=fSxIn1uRvwCf7u4cOK4XrcPdS7OPzAWL9xt1pxujosY,11368
|
69
|
+
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=vNLjF4NKZJSV4VNenHzoAUB2xVZz6tt_5AzryKmOVrI,11690
|
69
70
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py,sha256=fZk3UQxI9Nljf5qjCRLRkF0D-AERFHElI9cC83_cgV8,158
|
70
71
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=orCrEe1IH4NE7m-AkKMX0EHbysTuAwIqfohcQaU7XxQ,9670
|
71
72
|
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py,sha256=BkEu7Q_jIVdcdZSq37tMjitZFzACd8-iBTDDXfGbZig,4346
|
@@ -76,7 +77,8 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
|
|
76
77
|
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=CBuE6oCxMzTdJoXFt_YPWBte88kedEQ9t3g52ZRztGY,21086
|
77
78
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
|
78
79
|
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
|
79
|
-
judgeval
|
80
|
-
judgeval-0.0.
|
81
|
-
judgeval-0.0.
|
82
|
-
judgeval-0.0.
|
80
|
+
judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
|
81
|
+
judgeval-0.0.11.dist-info/METADATA,sha256=WH8aPpUNCwE1Zr21qJ0H0WEVB_i_dilyLSbw9e5nXZo,1283
|
82
|
+
judgeval-0.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
83
|
+
judgeval-0.0.11.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
84
|
+
judgeval-0.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|