PyPI - judgeval - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.27__py3-none-any.whl - Mend

judgeval 0.0.26py3-none-any.whl → 0.0.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

judgeval/common/tracer.py +476 -161
judgeval/constants.py +4 -2
judgeval/data/__init__.py +0 -3
judgeval/data/datasets/eval_dataset_client.py +59 -20
judgeval/data/result.py +34 -56
judgeval/judgment_client.py +47 -15
judgeval/run_evaluation.py +20 -36
judgeval/scorers/score.py +9 -11
{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/METADATA +1 -1
{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/RECORD +12 -13
judgeval/data/api_example.py +0 -98
{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/WHEEL +0 -0
{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/licenses/LICENSE.md +0 -0

{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/RECORD RENAMED Viewed

@@ -1,23 +1,22 @@
 judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
 judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
-judgeval/constants.py,sha256=iTUro5SdXcYX00W18l32zL_EEEqHf5OT9uA5yZAme_s,5158
+judgeval/constants.py,sha256=ksAXhAXovzJKH0uHOdQtREs168uCJRG79PooHNmEbYQ,5313
 judgeval/evaluation_run.py,sha256=RgJD60lJsunNQzObjo7iXnAzXWgubCLOAAuuamAAuoI,6354
-judgeval/judgment_client.py,sha256=2z134M0GeW3CdOZDx688UXmqJUlU31hlcFlLwUhF_Tg,25429
+judgeval/judgment_client.py,sha256=uf0V1-eu3qnFTwrQ_Ckcv8IiWRVv7dbvou4P4KjU6hM,26794
 judgeval/rules.py,sha256=B0ZL0pn72D4Jnlr0zMQ6CPHi7D8AQQRariXCVsiCMiI,20542
-judgeval/run_evaluation.py,sha256=8FZ-shJ0120iTuT2S1rXzmVcoIHPsFPb0THTGOtKoHM,25772
+judgeval/run_evaluation.py,sha256=N2ppmEE5WoSReChKjr_n0NcdAUlUR6Nua7M1C_3zHQ8,24949
 judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
 judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
 judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
-judgeval/common/tracer.py,sha256=Z87Q3pQrtfHYvE1vsTMdIUfR-iz_IM8dqvW9VwVdtMQ,42434
+judgeval/common/tracer.py,sha256=L6JkCHj6kxhtDzf9OPg5ZC-NUUH4VDvDcV4utPi_I38,57544
 judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
-judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
-judgeval/data/api_example.py,sha256=dzkrQ0xno08y6qNfqL2djXbapUyc2B2aQ5iANn0o4CY,3667
+judgeval/data/__init__.py,sha256=dG5ytBOeOWCTd5o0KP7IblqtW4G1EBaGreLWepM3jas,345
 judgeval/data/example.py,sha256=BhGBhamFWgH6wtvrRYM8dGtDfXh-cDxDhtNL5Gbdz_M,5892
-judgeval/data/result.py,sha256=4fgjKtUmT3br7K6fkRiNIxTGKUuwMeGyRLqzkpxwXKE,4436
+judgeval/data/result.py,sha256=YHD-dVYJN4JFpM-YCGgBtSdFcGAOyWYL41sf0TE9Hzg,3122
 judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
 judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
 judgeval/data/datasets/dataset.py,sha256=AFYjksV_wXx5CqFYJsl3aN8yZ6hC50O1myRuOJ8s8_E,12867
-judgeval/data/datasets/eval_dataset_client.py,sha256=B4bRy0Di2oFlaBbvp4_hRx2g_9e6Cs0y3ZUT9reMyhw,10926
+judgeval/data/datasets/eval_dataset_client.py,sha256=P9fEmcNrjPPaiYbbLiEiBziZrIexA39HN9qzClt6uPE,12691
 judgeval/integrations/langgraph.py,sha256=fGDZOTlVbxTO4ErC-m9OSg3h-RkOIIWXCfhjgkKRh4E,11187
 judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
 judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
@@ -31,7 +30,7 @@ judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1m
 judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
 judgeval/scorers/judgeval_scorer.py,sha256=jq_rzfTG0XBTuLCaa6TlaK4YcT-LlgsO1LEm6hpOYdg,6601
 judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
-judgeval/scorers/score.py,sha256=PhyAyMkc7KO_DZpFSN1HD_FS3BvdleQPZhYvQkNAdxI,18816
+judgeval/scorers/score.py,sha256=ObFAlMbNRcGrfBpH4WW_6OA3CjrneC539xSWhGH60GQ,18578
 judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
 judgeval/scorers/judgeval_scorers/__init__.py,sha256=xFRb62sp4JmBUSeuAB_pC_7kEGp-lGdqCRIu9--Bbdg,5992
 judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mZ6b_5Dl04k3PaG24ICBajB_j43ody1II1OJhO1DkXo,1648
@@ -87,7 +86,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
 judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
 judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
 judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
-judgeval-0.0.26.dist-info/METADATA,sha256=rhTpfY5GRclxtkkXU4RrUj1ckpuxd2xsgF53oQyK6qo,5418
-judgeval-0.0.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.0.26.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.0.26.dist-info/RECORD,,
+judgeval-0.0.27.dist-info/METADATA,sha256=yoUWIaLIDPksMYQSxDIbVFjtFVCxim6-5LSQ2P13a-U,5418
+judgeval-0.0.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.27.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.27.dist-info/RECORD,,

judgeval/data/api_example.py DELETED Viewed

@@ -1,98 +0,0 @@
-from typing import List, Optional, Dict, Any, Union
-from pydantic import BaseModel, ConfigDict, model_validator
-from judgeval.data.example import Example
-from judgeval.data.scorer_data import ScorerData
-from judgeval.common.logger import debug, error
-class ProcessExample(BaseModel):
-    """
-    ProcessExample is an `Example` object that contains intermediate information
-    about an undergoing evaluation on the original `Example`. It is used purely for
-    internal operations and keeping track of the evaluation process.
-    """
-    name: str
-    input: Optional[str] = None
-    actual_output: Optional[Union[str, List[str]]] = None
-    expected_output: Optional[Union[str, List[str]]] = None
-    context: Optional[list] = None
-    retrieval_context: Optional[list] = None
-    tools_called: Optional[list] = None
-    expected_tools: Optional[list] = None
-    # make these optional, not all test cases in a conversation will be evaluated
-    success: Optional[bool] = None
-    scorers_data: Optional[List[ScorerData]] = None
-    run_duration: Optional[float] = None
-    evaluation_cost: Optional[float] = None
-    order: Optional[int] =  None
-    # These should map 1 to 1 from golden
-    additional_metadata: Optional[Dict] = None
-    comments: Optional[str] = None
-    trace_id: Optional[str] = None
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-    def update_scorer_data(self, scorer_data: ScorerData):
-        """
-        Updates scorer data field of test case after the scorers have been
-        evaluated on this test case.
-        """
-        debug(f"Updating scorer data for example '{self.name}' with scorer: {scorer_data}")
-        # self.scorers_data is a list of ScorerData objects that contain the
-        # evaluation results of each scorer on this test case
-        if self.scorers_data is None:
-            self.scorers_data = [scorer_data]
-        else:
-            self.scorers_data.append(scorer_data)
-        if self.success is None:
-            # self.success will be None when it is a message
-            # in that case we will be setting success for the first time
-            self.success = scorer_data.success
-        else:
-            if scorer_data.success is False:
-                debug(f"Example '{self.name}' marked as failed due to scorer: {scorer_data}")
-                self.success = False
-    def update_run_duration(self, run_duration: float):
-        self.run_duration = run_duration
-def create_process_example(
-    example: Example,
-) -> ProcessExample:
-    """
-    When an LLM Test Case is executed, we track its progress using an ProcessExample.
-    This will track things like the success of the test case, as well as the metadata (such as verdicts and claims in Faithfulness).
-    """
-    success = True
-    if example.name is not None:
-        name = example.name
-    else:
-        name = "Test Case Placeholder"
-        debug(f"No name provided for example, using default name: {name}")
-    order = None
-    scorers_data = []
-    debug(f"Creating ProcessExample for: {name}")
-    process_ex = ProcessExample(
-        name=name,
-        input=example.input,
-        actual_output=example.actual_output,
-        expected_output=example.expected_output,
-        context=example.context,
-        retrieval_context=example.retrieval_context,
-        tools_called=example.tools_called,
-        expected_tools=example.expected_tools,
-        success=success,
-        scorers_data=scorers_data,
-        run_duration=None,
-        evaluation_cost=None,
-        order=order,
-        additional_metadata=example.additional_metadata,
-        trace_id=example.trace_id
-    )
-    return process_ex

{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.0.26.dist-info → judgeval-0.0.27.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.0.26__py3-none-any.whl → 0.0.27__py3-none-any.whl

judgeval 0.0.26py3-none-any.whl → 0.0.27py3-none-any.whl