judgeval 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/cli.py +65 -0
- judgeval/common/api/api.py +44 -38
- judgeval/common/api/constants.py +18 -5
- judgeval/common/api/json_encoder.py +8 -9
- judgeval/common/tracer/core.py +448 -256
- judgeval/common/tracer/otel_span_processor.py +1 -1
- judgeval/common/tracer/span_processor.py +1 -1
- judgeval/common/tracer/span_transformer.py +2 -1
- judgeval/common/tracer/trace_manager.py +6 -1
- judgeval/common/trainer/__init__.py +5 -0
- judgeval/common/trainer/config.py +125 -0
- judgeval/common/trainer/console.py +151 -0
- judgeval/common/trainer/trainable_model.py +238 -0
- judgeval/common/trainer/trainer.py +301 -0
- judgeval/data/evaluation_run.py +104 -0
- judgeval/data/judgment_types.py +37 -8
- judgeval/data/trace.py +1 -0
- judgeval/data/trace_run.py +0 -2
- judgeval/integrations/langgraph.py +2 -1
- judgeval/judgment_client.py +90 -135
- judgeval/local_eval_queue.py +3 -5
- judgeval/run_evaluation.py +43 -299
- judgeval/scorers/base_scorer.py +9 -10
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
- {judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/METADATA +10 -47
- {judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/RECORD +29 -22
- judgeval-0.7.0.dist-info/entry_points.txt +2 -0
- judgeval/evaluation_run.py +0 -80
- {judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/WHEEL +0 -0
- {judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,43 +1,49 @@
|
|
1
1
|
judgeval/__init__.py,sha256=5Lm1JMYFREJGN_8X-Wpruu_ovwGLJ08gCzNAt-u-pQE,419
|
2
|
+
judgeval/cli.py,sha256=WTFTJKQ6LZI7K9o9KnCfTzsTEJnKfPuSURUpRFLiHp8,1756
|
2
3
|
judgeval/clients.py,sha256=HHul68PV1om0dxsVZZu90TtCiy5zaqAwph16jXTQzQo,989
|
3
4
|
judgeval/constants.py,sha256=UNoTLHgbpZHRInPM2ZaI3m0XokPkee5ILlg20reqhzo,4180
|
4
5
|
judgeval/dataset.py,sha256=vOrDKam2I-K1WcVF5IBkQruCDvXTc8PRaFm4-dV0lXs,6220
|
5
|
-
judgeval/
|
6
|
-
judgeval/
|
7
|
-
judgeval/local_eval_queue.py,sha256=fAI0_OlvCr-WOCQWw18C4JIRJHKYzlyGzsGUm8LcsYE,7076
|
6
|
+
judgeval/judgment_client.py,sha256=KxQP-EmhZUJOIFM2Zf_OJbxrgDpN1dRwxo4iVI9zLdA,9390
|
7
|
+
judgeval/local_eval_queue.py,sha256=GmlXeZt7bfAJe1hPUjDg_irth4RkNqL2Zdi7VzboBzI,6984
|
8
8
|
judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
|
9
|
-
judgeval/run_evaluation.py,sha256=
|
9
|
+
judgeval/run_evaluation.py,sha256=ETAP7srohMBAsRqvxHQHKsR5zt3Rzns_kNM_2ulxVdU,18084
|
10
10
|
judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
|
11
11
|
judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
|
12
12
|
judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
|
13
13
|
judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
|
14
14
|
judgeval/common/utils.py,sha256=oxGDRVWOICKWeyGgsoc36_yAyHSYF4XtH842Mkznwis,34739
|
15
15
|
judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
|
16
|
-
judgeval/common/api/api.py,sha256=
|
17
|
-
judgeval/common/api/constants.py,sha256=
|
18
|
-
judgeval/common/api/json_encoder.py,sha256=
|
16
|
+
judgeval/common/api/api.py,sha256=fWtMNln0o1wOhJ9wangWpyY_j3WF7P3at_LYPJEicP0,13670
|
17
|
+
judgeval/common/api/constants.py,sha256=N6rQZqMhFv2U8tOw-6pMH0uV7aGT9m8sw57ZkfDW97c,4689
|
18
|
+
judgeval/common/api/json_encoder.py,sha256=QQgCe2FBmW1uWKx8yvuhr4U7_b4D0sG97GZtXHKnBdk,5881
|
19
19
|
judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
|
20
20
|
judgeval/common/storage/s3_storage.py,sha256=0-bNKheqJJyBZ92KGrzQtd1zocIRWBlfn_58L4a-Ay0,3719
|
21
21
|
judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
|
22
22
|
judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
|
23
|
-
judgeval/common/tracer/core.py,sha256=
|
23
|
+
judgeval/common/tracer/core.py,sha256=Vhh2LRgLdxa_yxUfMunv7l83tksuztm7F_oSwD92EXs,91681
|
24
24
|
judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
|
25
|
-
judgeval/common/tracer/otel_span_processor.py,sha256=
|
25
|
+
judgeval/common/tracer/otel_span_processor.py,sha256=BD-FKXaZft5_3zqy1Qe_tpkudVOLop9AGhBjZUgp-Z8,6502
|
26
26
|
judgeval/common/tracer/providers.py,sha256=3c3YOtKuoBjlTL0rc2HAGnUpppqvsyzrN5H6EKCqEi0,2733
|
27
|
-
judgeval/common/tracer/span_processor.py,sha256=
|
28
|
-
judgeval/common/tracer/span_transformer.py,sha256=
|
29
|
-
judgeval/common/tracer/trace_manager.py,sha256=
|
27
|
+
judgeval/common/tracer/span_processor.py,sha256=1NQxNSVWcb8qCFLmslSVMnaWdkOZmiFJnxeeN0i6vnU,1150
|
28
|
+
judgeval/common/tracer/span_transformer.py,sha256=cfzz6RpTCOG9Io9knNlwtAW34p3wyK-u8jSNMu24p1w,7382
|
29
|
+
judgeval/common/tracer/trace_manager.py,sha256=FAlkTNomb_TzSSnF7DnmP5nImBgHaA_SFNW1INzE1aI,3178
|
30
|
+
judgeval/common/trainer/__init__.py,sha256=fkaBjtAynh1GZbvK2xbNTjuLFSDpPzj7u4Chf4vZsfs,209
|
31
|
+
judgeval/common/trainer/config.py,sha256=kaWz0ni4ijtXpu8SF2jLEnw5yA2HqaUbvjiyqEnSrXE,4195
|
32
|
+
judgeval/common/trainer/console.py,sha256=sZCoJqI6ZRArbJpxl3ZwNb9taYoEkgCpz9PF4IUbGjE,4818
|
33
|
+
judgeval/common/trainer/trainable_model.py,sha256=tnhFH2Mp5hVht3utHVFPs2BxKoBQgRJrAzgzE5IfKEU,8842
|
34
|
+
judgeval/common/trainer/trainer.py,sha256=dE-sOU26dNaWxPaN88XuN3f3XCizdHrRPNylrspCWQc,11815
|
30
35
|
judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
|
36
|
+
judgeval/data/evaluation_run.py,sha256=IirmYZ1_9N99eep7DDuoyshwjmpNK9bQCxCWXnnhhuI,4053
|
31
37
|
judgeval/data/example.py,sha256=kRskIgsjwcvv2Y8jaPwV-PND7zlmMbFsvRVQ_b7SZY0,914
|
32
|
-
judgeval/data/judgment_types.py,sha256=
|
38
|
+
judgeval/data/judgment_types.py,sha256=3nGCUZ1YJhXajhFlAQvax0SOJ8eLuORtquwwjMreJFw,9826
|
33
39
|
judgeval/data/result.py,sha256=OtSnBUrdQpjyAqxXRLTW3wC9v9lOm_GqzL14ccRQxrg,2124
|
34
40
|
judgeval/data/scorer_data.py,sha256=5QBHtvOIWOq0Rn9_uPJzAMRYMlWxMB-rXnG_6kV4Z4Y,2955
|
35
41
|
judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
|
36
|
-
judgeval/data/trace.py,sha256=
|
37
|
-
judgeval/data/trace_run.py,sha256=
|
42
|
+
judgeval/data/trace.py,sha256=S781vVU1BvQ_kTS3s7UGYdmYVVxVGjDzWJHZpHedyf0,2834
|
43
|
+
judgeval/data/trace_run.py,sha256=Oo1vDrJYX_itt4tt7PJf7fNKd0HE3fnBJxuIkRY8Wrg,1585
|
38
44
|
judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
|
39
45
|
judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
|
40
|
-
judgeval/integrations/langgraph.py,sha256=
|
46
|
+
judgeval/integrations/langgraph.py,sha256=XsTNpKvXZmSf4TJBtRKSd5AB7S-Td9GTG5wZW9Npj6k,30062
|
41
47
|
judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
|
42
48
|
judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
|
43
49
|
judgeval/judges/litellm_judge.py,sha256=K9yCGOmozt7sYO0u8CHWyZNi8mXnSR3pPkP8yVsvuRc,2561
|
@@ -47,7 +53,7 @@ judgeval/judges/utils.py,sha256=_t6oYN9q63wyP7D4jI8X0bNmvVw7OfaE7uMTYDVS14E,2782
|
|
47
53
|
judgeval/scorers/__init__.py,sha256=4H_cinTQ4EogZv59YEV-3U9EOTLppNwgAPTi1-jI9Fw,746
|
48
54
|
judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
|
49
55
|
judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
|
50
|
-
judgeval/scorers/base_scorer.py,sha256=
|
56
|
+
judgeval/scorers/base_scorer.py,sha256=hKrLLh2DaxTgAfze8p_IapvsrogRCevYgfaNCDeOJzc,2869
|
51
57
|
judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
|
52
58
|
judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
|
53
59
|
judgeval/scorers/score.py,sha256=SWyoqOOvyLpLy39tLyb_Q94sdh9r_IuDv6YNREw52lg,7546
|
@@ -61,7 +67,7 @@ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBd
|
|
61
67
|
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
|
62
68
|
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
|
63
69
|
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
|
64
|
-
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=
|
70
|
+
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=1FsUGjQu3oa2rF-oqt32j-yA2YM33_trGTJ0HgagFJ0,7793
|
65
71
|
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
|
66
72
|
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
|
67
73
|
judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
|
@@ -69,7 +75,8 @@ judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
|
|
69
75
|
judgeval/utils/async_utils.py,sha256=uNx1SopEc0quSjc8GBQqyba0SmCMAzv2NKIq6xYwttc,989
|
70
76
|
judgeval/utils/file_utils.py,sha256=PWHRs8dUr8iDwpglSSk4Yjd7C6ZhDzUaO-jV3m7riHM,1987
|
71
77
|
judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
|
72
|
-
judgeval-0.
|
73
|
-
judgeval-0.
|
74
|
-
judgeval-0.
|
75
|
-
judgeval-0.
|
78
|
+
judgeval-0.7.0.dist-info/METADATA,sha256=WvjnS9cY6RvmrLdtpJbNJN3AssRmIWp61dYr2ZUn0Bo,8877
|
79
|
+
judgeval-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
80
|
+
judgeval-0.7.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
|
81
|
+
judgeval-0.7.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
82
|
+
judgeval-0.7.0.dist-info/RECORD,,
|
judgeval/evaluation_run.py
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
from typing import List, Optional, Union
|
2
|
-
from pydantic import BaseModel, field_validator, Field
|
3
|
-
|
4
|
-
from judgeval.data import Example
|
5
|
-
from judgeval.scorers import BaseScorer, APIScorerConfig
|
6
|
-
from judgeval.constants import ACCEPTABLE_MODELS, DEFAULT_GPT_MODEL
|
7
|
-
|
8
|
-
|
9
|
-
class EvaluationRun(BaseModel):
|
10
|
-
"""
|
11
|
-
Stores example and evaluation scorers together for running an eval task
|
12
|
-
|
13
|
-
Args:
|
14
|
-
project_name (str): The name of the project the evaluation results belong to
|
15
|
-
eval_name (str): A name for this evaluation run
|
16
|
-
examples (List[Example]): The examples to evaluate
|
17
|
-
scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
|
18
|
-
model (str): The model used as a judge when using LLM as a Judge
|
19
|
-
metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
|
20
|
-
"""
|
21
|
-
|
22
|
-
organization_id: Optional[str] = None
|
23
|
-
project_name: Optional[str] = Field(default=None, validate_default=True)
|
24
|
-
eval_name: Optional[str] = Field(default=None, validate_default=True)
|
25
|
-
examples: List[Example]
|
26
|
-
scorers: List[Union[APIScorerConfig, BaseScorer]]
|
27
|
-
model: Optional[str] = DEFAULT_GPT_MODEL
|
28
|
-
trace_span_id: Optional[str] = None
|
29
|
-
trace_id: Optional[str] = None
|
30
|
-
# API Key will be "" until user calls client.run_eval(), then API Key will be set
|
31
|
-
override: Optional[bool] = False
|
32
|
-
append: Optional[bool] = False
|
33
|
-
|
34
|
-
def model_dump(self, **kwargs):
|
35
|
-
data = super().model_dump(**kwargs)
|
36
|
-
|
37
|
-
data["scorers"] = [
|
38
|
-
scorer.model_dump() for scorer in self.scorers
|
39
|
-
] # Pydantic has problems with properly calling model_dump() on the scorers, so we need to do it manually
|
40
|
-
data["examples"] = [example.model_dump() for example in self.examples]
|
41
|
-
|
42
|
-
return data
|
43
|
-
|
44
|
-
@field_validator("examples")
|
45
|
-
def validate_examples(cls, v):
|
46
|
-
if not v:
|
47
|
-
raise ValueError("Examples cannot be empty.")
|
48
|
-
for item in v:
|
49
|
-
if not isinstance(item, Example):
|
50
|
-
raise ValueError(f"Item of type {type(item)} is not a Example")
|
51
|
-
return v
|
52
|
-
|
53
|
-
@field_validator("scorers", mode="before")
|
54
|
-
def validate_scorers(cls, v):
|
55
|
-
if not v:
|
56
|
-
raise ValueError("Scorers cannot be empty.")
|
57
|
-
if not all(
|
58
|
-
isinstance(scorer, BaseScorer) or isinstance(scorer, APIScorerConfig)
|
59
|
-
for scorer in v
|
60
|
-
):
|
61
|
-
raise ValueError(
|
62
|
-
"All scorers must be of type BaseScorer or APIScorerConfig."
|
63
|
-
)
|
64
|
-
return v
|
65
|
-
|
66
|
-
@field_validator("model")
|
67
|
-
def validate_model(cls, v, values):
|
68
|
-
if not v:
|
69
|
-
raise ValueError("Model cannot be empty.")
|
70
|
-
|
71
|
-
# Check if model is string or list of strings
|
72
|
-
if isinstance(v, str):
|
73
|
-
if v not in ACCEPTABLE_MODELS:
|
74
|
-
raise ValueError(
|
75
|
-
f"Model name {v} not recognized. Please select a valid model name.)"
|
76
|
-
)
|
77
|
-
return v
|
78
|
-
|
79
|
-
class Config:
|
80
|
-
arbitrary_types_allowed = True
|
File without changes
|
File without changes
|