judgeval 0.0.26__py3-none-any.whl → 0.0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,22 @@
1
1
  judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
2
2
  judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=iTUro5SdXcYX00W18l32zL_EEEqHf5OT9uA5yZAme_s,5158
3
+ judgeval/constants.py,sha256=ksAXhAXovzJKH0uHOdQtREs168uCJRG79PooHNmEbYQ,5313
4
4
  judgeval/evaluation_run.py,sha256=RgJD60lJsunNQzObjo7iXnAzXWgubCLOAAuuamAAuoI,6354
5
- judgeval/judgment_client.py,sha256=2z134M0GeW3CdOZDx688UXmqJUlU31hlcFlLwUhF_Tg,25429
5
+ judgeval/judgment_client.py,sha256=uf0V1-eu3qnFTwrQ_Ckcv8IiWRVv7dbvou4P4KjU6hM,26794
6
6
  judgeval/rules.py,sha256=B0ZL0pn72D4Jnlr0zMQ6CPHi7D8AQQRariXCVsiCMiI,20542
7
- judgeval/run_evaluation.py,sha256=8FZ-shJ0120iTuT2S1rXzmVcoIHPsFPb0THTGOtKoHM,25772
7
+ judgeval/run_evaluation.py,sha256=N2ppmEE5WoSReChKjr_n0NcdAUlUR6Nua7M1C_3zHQ8,24949
8
8
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
9
9
  judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
10
10
  judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
11
- judgeval/common/tracer.py,sha256=Z87Q3pQrtfHYvE1vsTMdIUfR-iz_IM8dqvW9VwVdtMQ,42434
11
+ judgeval/common/tracer.py,sha256=L6JkCHj6kxhtDzf9OPg5ZC-NUUH4VDvDcV4utPi_I38,57544
12
12
  judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
13
- judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
14
- judgeval/data/api_example.py,sha256=dzkrQ0xno08y6qNfqL2djXbapUyc2B2aQ5iANn0o4CY,3667
13
+ judgeval/data/__init__.py,sha256=dG5ytBOeOWCTd5o0KP7IblqtW4G1EBaGreLWepM3jas,345
15
14
  judgeval/data/example.py,sha256=BhGBhamFWgH6wtvrRYM8dGtDfXh-cDxDhtNL5Gbdz_M,5892
16
- judgeval/data/result.py,sha256=4fgjKtUmT3br7K6fkRiNIxTGKUuwMeGyRLqzkpxwXKE,4436
15
+ judgeval/data/result.py,sha256=YHD-dVYJN4JFpM-YCGgBtSdFcGAOyWYL41sf0TE9Hzg,3122
17
16
  judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
18
17
  judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
19
18
  judgeval/data/datasets/dataset.py,sha256=AFYjksV_wXx5CqFYJsl3aN8yZ6hC50O1myRuOJ8s8_E,12867
20
- judgeval/data/datasets/eval_dataset_client.py,sha256=B4bRy0Di2oFlaBbvp4_hRx2g_9e6Cs0y3ZUT9reMyhw,10926
19
+ judgeval/data/datasets/eval_dataset_client.py,sha256=P9fEmcNrjPPaiYbbLiEiBziZrIexA39HN9qzClt6uPE,12691
21
20
  judgeval/integrations/langgraph.py,sha256=fGDZOTlVbxTO4ErC-m9OSg3h-RkOIIWXCfhjgkKRh4E,11187
22
21
  judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
23
22
  judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
@@ -31,7 +30,7 @@ judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1m
31
30
  judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
32
31
  judgeval/scorers/judgeval_scorer.py,sha256=jq_rzfTG0XBTuLCaa6TlaK4YcT-LlgsO1LEm6hpOYdg,6601
33
32
  judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
34
- judgeval/scorers/score.py,sha256=PhyAyMkc7KO_DZpFSN1HD_FS3BvdleQPZhYvQkNAdxI,18816
33
+ judgeval/scorers/score.py,sha256=ObFAlMbNRcGrfBpH4WW_6OA3CjrneC539xSWhGH60GQ,18578
35
34
  judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
36
35
  judgeval/scorers/judgeval_scorers/__init__.py,sha256=xFRb62sp4JmBUSeuAB_pC_7kEGp-lGdqCRIu9--Bbdg,5992
37
36
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mZ6b_5Dl04k3PaG24ICBajB_j43ody1II1OJhO1DkXo,1648
@@ -87,7 +86,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
87
86
  judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
88
87
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
89
88
  judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
90
- judgeval-0.0.26.dist-info/METADATA,sha256=rhTpfY5GRclxtkkXU4RrUj1ckpuxd2xsgF53oQyK6qo,5418
91
- judgeval-0.0.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- judgeval-0.0.26.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
93
- judgeval-0.0.26.dist-info/RECORD,,
89
+ judgeval-0.0.27.dist-info/METADATA,sha256=yoUWIaLIDPksMYQSxDIbVFjtFVCxim6-5LSQ2P13a-U,5418
90
+ judgeval-0.0.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
91
+ judgeval-0.0.27.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
92
+ judgeval-0.0.27.dist-info/RECORD,,
@@ -1,98 +0,0 @@
1
- from typing import List, Optional, Dict, Any, Union
2
- from pydantic import BaseModel, ConfigDict, model_validator
3
-
4
- from judgeval.data.example import Example
5
- from judgeval.data.scorer_data import ScorerData
6
- from judgeval.common.logger import debug, error
7
-
8
- class ProcessExample(BaseModel):
9
- """
10
- ProcessExample is an `Example` object that contains intermediate information
11
- about an undergoing evaluation on the original `Example`. It is used purely for
12
- internal operations and keeping track of the evaluation process.
13
- """
14
- name: str
15
- input: Optional[str] = None
16
- actual_output: Optional[Union[str, List[str]]] = None
17
- expected_output: Optional[Union[str, List[str]]] = None
18
- context: Optional[list] = None
19
- retrieval_context: Optional[list] = None
20
- tools_called: Optional[list] = None
21
- expected_tools: Optional[list] = None
22
-
23
- # make these optional, not all test cases in a conversation will be evaluated
24
- success: Optional[bool] = None
25
- scorers_data: Optional[List[ScorerData]] = None
26
- run_duration: Optional[float] = None
27
- evaluation_cost: Optional[float] = None
28
-
29
- order: Optional[int] = None
30
- # These should map 1 to 1 from golden
31
- additional_metadata: Optional[Dict] = None
32
- comments: Optional[str] = None
33
- trace_id: Optional[str] = None
34
- model_config = ConfigDict(arbitrary_types_allowed=True)
35
-
36
- def update_scorer_data(self, scorer_data: ScorerData):
37
- """
38
- Updates scorer data field of test case after the scorers have been
39
- evaluated on this test case.
40
- """
41
- debug(f"Updating scorer data for example '{self.name}' with scorer: {scorer_data}")
42
- # self.scorers_data is a list of ScorerData objects that contain the
43
- # evaluation results of each scorer on this test case
44
- if self.scorers_data is None:
45
- self.scorers_data = [scorer_data]
46
- else:
47
- self.scorers_data.append(scorer_data)
48
-
49
- if self.success is None:
50
- # self.success will be None when it is a message
51
- # in that case we will be setting success for the first time
52
- self.success = scorer_data.success
53
- else:
54
- if scorer_data.success is False:
55
- debug(f"Example '{self.name}' marked as failed due to scorer: {scorer_data}")
56
- self.success = False
57
-
58
- def update_run_duration(self, run_duration: float):
59
- self.run_duration = run_duration
60
-
61
-
62
- def create_process_example(
63
- example: Example,
64
- ) -> ProcessExample:
65
- """
66
- When an LLM Test Case is executed, we track its progress using an ProcessExample.
67
-
68
- This will track things like the success of the test case, as well as the metadata (such as verdicts and claims in Faithfulness).
69
- """
70
- success = True
71
- if example.name is not None:
72
- name = example.name
73
- else:
74
- name = "Test Case Placeholder"
75
- debug(f"No name provided for example, using default name: {name}")
76
- order = None
77
- scorers_data = []
78
-
79
- debug(f"Creating ProcessExample for: {name}")
80
- process_ex = ProcessExample(
81
- name=name,
82
- input=example.input,
83
- actual_output=example.actual_output,
84
- expected_output=example.expected_output,
85
- context=example.context,
86
- retrieval_context=example.retrieval_context,
87
- tools_called=example.tools_called,
88
- expected_tools=example.expected_tools,
89
- success=success,
90
- scorers_data=scorers_data,
91
- run_duration=None,
92
- evaluation_cost=None,
93
- order=order,
94
- additional_metadata=example.additional_metadata,
95
- trace_id=example.trace_id
96
- )
97
- return process_ex
98
-