judgeval 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,53 +1,57 @@
1
- judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
2
- judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
3
- judgeval/constants.py,sha256=hWed25HwGUJy-tePbtoUZ0_Zg0X_MkAH84KiH-OHHFI,4150
4
- judgeval/dataset.py,sha256=rjV54XNTslNNtf-Uu2ndDIh602ZwSCFhPg2NuckDJ-w,6081
5
- judgeval/evaluation_run.py,sha256=edNpO444Fwt2ykWsflIzlYdDJUlUfbpXHHQSKfFS4y0,2876
6
- judgeval/judgment_client.py,sha256=vPoxbmxAlhbG5rXXqxWjMbyEqOI044BaQanr1fev2CE,11723
1
+ judgeval/__init__.py,sha256=5Lm1JMYFREJGN_8X-Wpruu_ovwGLJ08gCzNAt-u-pQE,419
2
+ judgeval/cli.py,sha256=IcL4_bGr9CtEeea1-AFqM_TEV_VomDlArlxh4IomiSQ,1754
3
+ judgeval/clients.py,sha256=HHul68PV1om0dxsVZZu90TtCiy5zaqAwph16jXTQzQo,989
4
+ judgeval/constants.py,sha256=UNoTLHgbpZHRInPM2ZaI3m0XokPkee5ILlg20reqhzo,4180
5
+ judgeval/dataset.py,sha256=vOrDKam2I-K1WcVF5IBkQruCDvXTc8PRaFm4-dV0lXs,6220
6
+ judgeval/judgment_client.py,sha256=-7xcBFowzXKedMINwfZCOL4FKucECWPNEY9QVMo_cys,13644
7
+ judgeval/local_eval_queue.py,sha256=GmlXeZt7bfAJe1hPUjDg_irth4RkNqL2Zdi7VzboBzI,6984
7
8
  judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
8
- judgeval/run_evaluation.py,sha256=7J6FHhWhB-IDPMSOcWkrjTpSNm2v3s_KBq8Np3y2pys,27652
9
+ judgeval/run_evaluation.py,sha256=gs-_v_i95LKlJj95G2RmQXvIyBfoldnd1pWCNO4UqsM,21985
9
10
  judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
10
11
  judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
11
12
  judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
12
13
  judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
13
14
  judgeval/common/utils.py,sha256=oxGDRVWOICKWeyGgsoc36_yAyHSYF4XtH842Mkznwis,34739
14
15
  judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
15
- judgeval/common/api/api.py,sha256=0Vd81CXoUYF1a7hOlI2PqXJcQO7eqWEbS8sX2W0CJMY,14184
16
- judgeval/common/api/constants.py,sha256=DXej0m8HEhb871SdiR8t_o4fzeMoQjHYqb_X0Plj8wY,4577
16
+ judgeval/common/api/api.py,sha256=fWtMNln0o1wOhJ9wangWpyY_j3WF7P3at_LYPJEicP0,13670
17
+ judgeval/common/api/constants.py,sha256=y0BDcQqHBZ7MwLd4gT5hLUF8UMs_GVwsJGC-ibfxCAw,4698
18
+ judgeval/common/api/json_encoder.py,sha256=QQgCe2FBmW1uWKx8yvuhr4U7_b4D0sG97GZtXHKnBdk,5881
17
19
  judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
18
20
  judgeval/common/storage/s3_storage.py,sha256=0-bNKheqJJyBZ92KGrzQtd1zocIRWBlfn_58L4a-Ay0,3719
19
21
  judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
20
22
  judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
21
- judgeval/common/tracer/core.py,sha256=blHEh61CE5kZLYCgyRF4kU6dVzi_Ko6DrnBpw2-jByI,73973
23
+ judgeval/common/tracer/core.py,sha256=TQ80NODaJx7gzmntevDLA3evVJ3m2Zy2s0Pwd7APG9Y,84867
22
24
  judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
23
- judgeval/common/tracer/otel_span_processor.py,sha256=W7SM62KnxJ48vC9WllIHRKaLlvxkCwqYoT4KqZLfGNs,6497
24
- judgeval/common/tracer/span_processor.py,sha256=eFjTgSWSkM6BWE94CrvgafDg_WkxLsFL_MafwBG-p9M,1145
25
- judgeval/common/tracer/span_transformer.py,sha256=nCnwRC52OKfYRFnsOwGdPaqb_U17yn5S_9jfhv1GaLM,7803
25
+ judgeval/common/tracer/otel_span_processor.py,sha256=BD-FKXaZft5_3zqy1Qe_tpkudVOLop9AGhBjZUgp-Z8,6502
26
+ judgeval/common/tracer/providers.py,sha256=3c3YOtKuoBjlTL0rc2HAGnUpppqvsyzrN5H6EKCqEi0,2733
27
+ judgeval/common/tracer/span_processor.py,sha256=1NQxNSVWcb8qCFLmslSVMnaWdkOZmiFJnxeeN0i6vnU,1150
28
+ judgeval/common/tracer/span_transformer.py,sha256=cfzz6RpTCOG9Io9knNlwtAW34p3wyK-u8jSNMu24p1w,7382
26
29
  judgeval/common/tracer/trace_manager.py,sha256=ltiXcWC-68DRc8uSa28qHiWRSIBf6NpYOPkZYooR8tg,3086
27
30
  judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
31
+ judgeval/data/evaluation_run.py,sha256=IirmYZ1_9N99eep7DDuoyshwjmpNK9bQCxCWXnnhhuI,4053
28
32
  judgeval/data/example.py,sha256=kRskIgsjwcvv2Y8jaPwV-PND7zlmMbFsvRVQ_b7SZY0,914
29
- judgeval/data/judgment_types.py,sha256=KE1HrFLfSxiu1zutaiZ7B7La9PGXIAsoWpo_5iy645c,8336
33
+ judgeval/data/judgment_types.py,sha256=3nGCUZ1YJhXajhFlAQvax0SOJ8eLuORtquwwjMreJFw,9826
30
34
  judgeval/data/result.py,sha256=OtSnBUrdQpjyAqxXRLTW3wC9v9lOm_GqzL14ccRQxrg,2124
31
35
  judgeval/data/scorer_data.py,sha256=5QBHtvOIWOq0Rn9_uPJzAMRYMlWxMB-rXnG_6kV4Z4Y,2955
32
36
  judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
33
- judgeval/data/trace.py,sha256=tDOuYFPUssQInjsmwyxcXq-W3IB29Vq340VzqafuKJc,6942
34
- judgeval/data/trace_run.py,sha256=c6pRSv09Vj016hxM49I3kMftCwWg8hhkfT_1kBXluSI,1600
37
+ judgeval/data/trace.py,sha256=S781vVU1BvQ_kTS3s7UGYdmYVVxVGjDzWJHZpHedyf0,2834
38
+ judgeval/data/trace_run.py,sha256=Oo1vDrJYX_itt4tt7PJf7fNKd0HE3fnBJxuIkRY8Wrg,1585
35
39
  judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
36
40
  judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
37
- judgeval/integrations/langgraph.py,sha256=kJXLsgBY7DgsUTZyVQ47deDgHm887brFHfyIbuyerGw,29986
41
+ judgeval/integrations/langgraph.py,sha256=XsTNpKvXZmSf4TJBtRKSd5AB7S-Td9GTG5wZW9Npj6k,30062
38
42
  judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
39
43
  judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
40
- judgeval/judges/litellm_judge.py,sha256=yt6QvwKMmxZcrUtjbn3EiO5aVg7CHM2YZkBCSQLS8jk,2509
41
- judgeval/judges/mixture_of_judges.py,sha256=cecQ8mRmz2-dDoZl2MGsrhZICkpIvRovGPK3su0kc8s,14889
44
+ judgeval/judges/litellm_judge.py,sha256=K9yCGOmozt7sYO0u8CHWyZNi8mXnSR3pPkP8yVsvuRc,2561
45
+ judgeval/judges/mixture_of_judges.py,sha256=iTNjTX4Le1nCwGRm9qfMCv1lQjgqoIw3OE0teiLubwo,14946
42
46
  judgeval/judges/together_judge.py,sha256=5FADUhs6-FN1ZVV_1D3-8_gu9mPbZiG0PYTpme41SfM,2336
43
- judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
47
+ judgeval/judges/utils.py,sha256=_t6oYN9q63wyP7D4jI8X0bNmvVw7OfaE7uMTYDVS14E,2782
44
48
  judgeval/scorers/__init__.py,sha256=4H_cinTQ4EogZv59YEV-3U9EOTLppNwgAPTi1-jI9Fw,746
45
49
  judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
46
50
  judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
47
- judgeval/scorers/base_scorer.py,sha256=eDfQk8N8TQfM1ayJDWr0NTdSQxcbk9-VZHd0Igb9EbI,2878
51
+ judgeval/scorers/base_scorer.py,sha256=hKrLLh2DaxTgAfze8p_IapvsrogRCevYgfaNCDeOJzc,2869
48
52
  judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
49
53
  judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
50
- judgeval/scorers/score.py,sha256=SPoN5GG5sGKAt2aw_NgDj4-w5Mv1CGgaD9q-Sj1jRII,6626
54
+ judgeval/scorers/score.py,sha256=SWyoqOOvyLpLy39tLyb_Q94sdh9r_IuDv6YNREw52lg,7546
51
55
  judgeval/scorers/utils.py,sha256=HQOYTJtNnsi_aPfMssePAaBbXpAv7LXgwUlWlDFuN2g,3965
52
56
  judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
57
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=GX4KkwPR2p-c0Y5mZingJa8EUfjAbMGhrmRBDBunOGw,1484
@@ -58,14 +62,16 @@ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBd
58
62
  judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
59
63
  judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
60
64
  judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
61
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=nx73DeoVkSqJTP1hYxMsJobG9HVWgMDN5-xFOXt_8Ts,7348
65
+ judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=1FsUGjQu3oa2rF-oqt32j-yA2YM33_trGTJ0HgagFJ0,7793
62
66
  judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
63
67
  judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
64
68
  judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
65
69
  judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
70
+ judgeval/utils/async_utils.py,sha256=uNx1SopEc0quSjc8GBQqyba0SmCMAzv2NKIq6xYwttc,989
66
71
  judgeval/utils/file_utils.py,sha256=PWHRs8dUr8iDwpglSSk4Yjd7C6ZhDzUaO-jV3m7riHM,1987
67
72
  judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
68
- judgeval-0.4.0.dist-info/METADATA,sha256=6ZJMXn13RNp-LZfoBUNtmjt7nCvthWU9qT9YNBNe7Wo,10348
69
- judgeval-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
70
- judgeval-0.4.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
71
- judgeval-0.4.0.dist-info/RECORD,,
73
+ judgeval-0.6.0.dist-info/METADATA,sha256=CulXMs0v5YrHjR3ntVX8xWKcZyxwEpo_nOYs_hkaeN8,10403
74
+ judgeval-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
75
+ judgeval-0.6.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
76
+ judgeval-0.6.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
77
+ judgeval-0.6.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ judgeval = judgeval.cli:app
@@ -1,76 +0,0 @@
1
- from typing import List, Optional, Union
2
- from pydantic import BaseModel, field_validator, Field
3
-
4
- from judgeval.data import Example
5
- from judgeval.scorers import BaseScorer, APIScorerConfig
6
- from judgeval.constants import ACCEPTABLE_MODELS
7
-
8
-
9
- class EvaluationRun(BaseModel):
10
- """
11
- Stores example and evaluation scorers together for running an eval task
12
-
13
- Args:
14
- project_name (str): The name of the project the evaluation results belong to
15
- eval_name (str): A name for this evaluation run
16
- examples (List[Example]): The examples to evaluate
17
- scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
18
- model (str): The model used as a judge when using LLM as a Judge
19
- metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
20
- """
21
-
22
- organization_id: Optional[str] = None
23
- project_name: Optional[str] = Field(default=None, validate_default=True)
24
- eval_name: Optional[str] = Field(default=None, validate_default=True)
25
- examples: List[Example]
26
- scorers: List[Union[APIScorerConfig, BaseScorer]]
27
- model: Optional[str] = "gpt-4.1"
28
- trace_span_id: Optional[str] = None
29
- # API Key will be "" until user calls client.run_eval(), then API Key will be set
30
- override: Optional[bool] = False
31
- append: Optional[bool] = False
32
-
33
- def model_dump(self, **kwargs):
34
- data = super().model_dump(**kwargs)
35
-
36
- data["scorers"] = [
37
- scorer.model_dump() for scorer in self.scorers
38
- ] # Pydantic has problems with properly calling model_dump() on the scorers, so we need to do it manually
39
- data["examples"] = [example.model_dump() for example in self.examples]
40
-
41
- return data
42
-
43
- @field_validator("examples")
44
- def validate_examples(cls, v):
45
- if not v:
46
- raise ValueError("Examples cannot be empty.")
47
- return v
48
-
49
- @field_validator("scorers", mode="before")
50
- def validate_scorers(cls, v):
51
- if not v:
52
- raise ValueError("Scorers cannot be empty.")
53
- if not all(
54
- isinstance(scorer, BaseScorer) or isinstance(scorer, APIScorerConfig)
55
- for scorer in v
56
- ):
57
- raise ValueError(
58
- "All scorers must be of type BaseScorer or APIScorerConfig."
59
- )
60
- return v
61
-
62
- @field_validator("model")
63
- def validate_model(cls, v, values):
64
- if not v:
65
- raise ValueError("Model cannot be empty.")
66
-
67
- # Check if model is string or list of strings
68
- if isinstance(v, str):
69
- if v not in ACCEPTABLE_MODELS:
70
- raise ValueError(
71
- f"Model name {v} not recognized. Please select a valid model name.)"
72
- )
73
- return v
74
-
75
- class Config:
76
- arbitrary_types_allowed = True