PyPI - judgeval - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

judgeval 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

judgeval/clients.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 from dotenv import load_dotenv
 from openai import OpenAI
-from langfuse import Langfuse
 from typing import Optional
 from together import Together, AsyncTogether

judgeval/common/tracer.py CHANGED Viewed

@@ -11,6 +11,7 @@ import time
 import uuid
 import warnings
 from contextlib import contextmanager
+from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
 from http import HTTPStatus
@@ -962,6 +963,10 @@ def _format_output_data(client: ApiClient, response: Any) -> dict:
 class JudgevalCallbackHandler(BaseCallbackHandler):
     def __init__(self, trace_client: TraceClient):
         self.trace_client = trace_client
+        self.previous_node = "__start__"
+        self.executed_node_tools = []
+        self.executed_nodes = []
+        self.executed_tools = []
         self.openai_count = 1
     def start_span(self, name: str, span_type: SpanType = "span"):
@@ -1049,6 +1054,23 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
         # End the retriever span
         self.end_span(self.trace_client._current_span, span_type="retriever")
+    def on_chain_start(
+        self,
+        serialized: Dict[str, Any],
+        inputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any
+    ) -> None:
+        node = metadata.get("langgraph_node")
+        if node != None and node != "__start__" and node != self.previous_node:
+            self.executed_node_tools.append(node)
+            self.executed_nodes.append(node)
+        self.previous_node = node
     def on_tool_start(
         self,
         serialized: Optional[dict[str, Any]],
@@ -1060,6 +1082,8 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
     ):
         name = serialized["name"]
         self.start_span(name, span_type="tool")
+        self.executed_node_tools.append(f"{self.previous_node}:{name}")
+        self.executed_tools.append(name)
         self.trace_client.record_input({
             'args': input_str,
             'kwargs': kwargs

judgeval/constants.py CHANGED Viewed

@@ -22,7 +22,7 @@ class APIScorer(str, Enum):
     CONTEXTUAL_RELEVANCY = "contextual_relevancy"
     CONTEXTUAL_PRECISION = "contextual_precision"
     INSTRUCTION_ADHERENCE = "instruction_adherence"
-    TOOL_CORRECTNESS = "tool_correctness"
+    EXECUTION_ORDER = "execution_order"
     JSON_CORRECTNESS = "json_correctness"
     COMPARISON = "comparison"
     GROUNDEDNESS = "groundedness"

judgeval/data/api_example.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Dict, Any
+from typing import List, Optional, Dict, Any, Union
 from pydantic import BaseModel, ConfigDict, model_validator
 from judgeval.data.example import Example
@@ -13,8 +13,8 @@ class ProcessExample(BaseModel):
     """
     name: str
     input: Optional[str] = None
-    actual_output: Optional[str] = None
-    expected_output: Optional[str] = None
+    actual_output: Optional[Union[str, List[str]]] = None
+    expected_output: Optional[Union[str, List[str]]] = None
     context: Optional[list] = None
     retrieval_context: Optional[list] = None
     tools_called: Optional[list] = None
@@ -57,19 +57,6 @@ class ProcessExample(BaseModel):
     def update_run_duration(self, run_duration: float):
         self.run_duration = run_duration
-    @model_validator(mode="before")
-    def check_input(cls, values: Dict[str, Any]):
-        input = values.get("input")
-        actual_output = values.get("actual_output")
-        if (input is None or actual_output is None):
-            error(f"Validation error: Required fields missing. input={input}, actual_output={actual_output}")
-            raise ValueError(
-                "'input' and 'actual_output' must be provided."
-            )
-        return values
 def create_process_example(

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -3,6 +3,7 @@ import csv
 import datetime
 import json
 import os
+import yaml
 from dataclasses import dataclass, field
 from typing import List, Union, Literal
@@ -190,6 +191,76 @@ class EvalDataset:
         for g in ground_truths:
             self.add_ground_truth(g)
+    def add_from_yaml(self, file_path: str) -> None:
+        debug(f"Loading dataset from YAML file: {file_path}")
+        """
+        Adds examples and ground truths from a YAML file.
+        The format of the YAML file is expected to be a dictionary with two keys: "examples" and "ground_truths".
+        The value of each key is a list of dictionaries, where each dictionary represents an example or ground truth.
+        The YAML file is expected to have the following format:
+        ground_truths:
+          - input: "test input"
+            actual_output: null
+            expected_output: "expected output"
+            context:
+              - "context1"
+            retrieval_context:
+              - "retrieval1"
+            additional_metadata:
+              key: "value"
+            comments: "test comment"
+            tools_called:
+              - "tool1"
+            expected_tools:
+              - "tool1"
+            source_file: "test.py"
+            trace_id: "094121"
+        examples:
+          - input: "test input"
+            actual_output: "test output"
+            expected_output: "expected output"
+            context:
+              - "context1"
+              - "context2"
+            retrieval_context:
+              - "retrieval1"
+            additional_metadata:
+              key: "value"
+            tools_called:
+              - "tool1"
+            expected_tools:
+              - "tool1"
+              - "tool2"
+            name: "test example"
+            example_id: null
+            timestamp: "20241230_160117"
+            trace_id: "123"
+        """
+        try:
+            with open(file_path, "r") as file:
+                payload = yaml.safe_load(file)
+                if payload is None:
+                    raise ValueError("The YAML file is empty.")
+                examples = payload.get("examples", [])
+                ground_truths = payload.get("ground_truths", [])
+        except FileNotFoundError:
+            error(f"YAML file not found: {file_path}")
+            raise FileNotFoundError(f"The file {file_path} was not found.")
+        except yaml.YAMLError:
+            error(f"Invalid YAML file: {file_path}")
+            raise ValueError(f"The file {file_path} is not a valid YAML file.")
+        info(f"Added {len(examples)} examples and {len(ground_truths)} ground truths from YAML")
+        new_examples = [Example(**e) for e in examples]
+        for e in new_examples:
+            self.add_example(e)
+        new_ground_truths = [GroundTruthExample(**g) for g in ground_truths]
+        for g in new_ground_truths:
+            self.add_ground_truth(g)
     def add_example(self, e: Example) -> None:
         self.examples = self.examples + [e]
         # TODO if we need to add rank, then we need to do it here
@@ -197,7 +268,7 @@ class EvalDataset:
     def add_ground_truth(self, g: GroundTruthExample) -> None:
         self.ground_truths = self.ground_truths + [g]
-    def save_as(self, file_type: Literal["json", "csv"], dir_path: str, save_name: str = None) -> None:
+    def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
         """
         Saves the dataset as a file. Save both the ground truths and examples.
@@ -266,8 +337,49 @@ class EvalDataset:
                             g.trace_id
                         ]
                     )
+        elif file_type == "yaml":
+            with open(complete_path, "w") as file:
+                yaml_data = {
+                    "examples": [
+                        {
+                            "input": e.input,
+                            "actual_output": e.actual_output,
+                            "expected_output": e.expected_output,
+                            "context": e.context,
+                            "retrieval_context": e.retrieval_context,
+                            "additional_metadata": e.additional_metadata,
+                            "tools_called": e.tools_called,
+                            "expected_tools": e.expected_tools,
+                            "name": e.name,
+                            "comments": None,  # Example does not have comments
+                            "source_file": None,  # Example does not have source file
+                            "example": True,  # Adding an Example
+                            "trace_id": e.trace_id
+                        }
+                        for e in self.examples
+                    ],
+                    "ground_truths": [
+                        {
+                            "input": g.input,
+                            "actual_output": g.actual_output,
+                            "expected_output": g.expected_output,
+                            "context": g.context,
+                            "retrieval_context": g.retrieval_context,
+                            "additional_metadata": g.additional_metadata,
+                            "tools_called": g.tools_called,
+                            "expected_tools": g.expected_tools,
+                            "name": None,  # GroundTruthExample does not have name
+                            "comments": g.comments,
+                            "source_file": g.source_file,
+                            "example": False,  # Adding a GroundTruthExample, not an Example
+                            "trace_id": g.trace_id
+                        }
+                        for g in self.ground_truths
+                    ]
+                }
+                yaml.dump(yaml_data, file, default_flow_style=False)
         else:
-            ACCEPTABLE_FILE_TYPES = ["json", "csv"]
+            ACCEPTABLE_FILE_TYPES = ["json", "csv", "yaml"]
             raise TypeError(f"Invalid file type: {file_type}. Please choose from {ACCEPTABLE_FILE_TYPES}")
     def __iter__(self):

judgeval/data/example.py CHANGED Viewed

@@ -2,11 +2,13 @@
 Classes for representing examples in a dataset.
 """
-from typing import Optional, Any, Dict, List
+from typing import Optional, Any, Dict, List, Union
 from uuid import uuid4
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 from enum import Enum
 from datetime import datetime
+import time
 class ExampleParams(Enum):
@@ -22,9 +24,9 @@ class ExampleParams(Enum):
 class Example(BaseModel):
-    input: str
-    actual_output: str
-    expected_output: Optional[str] = None
+    input: Optional[str] = None
+    actual_output: Optional[Union[str, List[str]]] = None
+    expected_output: Optional[Union[str, List[str]]] = None
     context: Optional[List[str]] = None
     retrieval_context: Optional[List[str]] = None
     additional_metadata: Optional[Dict[str, Any]] = None
@@ -37,12 +39,6 @@ class Example(BaseModel):
     trace_id: Optional[str] = None
     def __init__(self, **data):
-        # Check that required fields are provided
-        if 'input' not in data:
-            raise ValueError("Example must be initialized with 'input' field.")
-        if 'actual_output' not in data:
-            raise ValueError("Example must be initialized with 'actual_output' field.")
         if 'example_id' not in data:
             data['example_id'] = str(uuid4())
         # Set timestamp if not provided
@@ -53,22 +49,27 @@ class Example(BaseModel):
     @field_validator('input', mode='before')
     @classmethod
     def validate_input(cls, v):
-        if not v or not isinstance(v, str):
+        if v is not None and (not v or not isinstance(v, str)):
             raise ValueError(f"Input must be a non-empty string but got '{v}' of type {type(v)}")
         return v
     @field_validator('actual_output', mode='before')
     @classmethod
     def validate_actual_output(cls, v):
-        if not isinstance(v, str):
-            raise ValueError(f"Actual output must be a string but got '{v}' of type {type(v)}")
+        if v is not None:
+            if not isinstance(v, (str, list)):
+                raise ValueError(f"Actual output must be a string or a list of strings but got {v} of type {type(v)}")
+            if isinstance(v, list) and not all(isinstance(item, str) for item in v):
+                raise ValueError(f"All items in actual_output must be strings but got {v}")
         return v
     @field_validator('expected_output', mode='before')
     @classmethod
     def validate_expected_output(cls, v):
-        if v is not None and not isinstance(v, str):
-            raise ValueError(f"Expected output must be a string or None but got {v} of type {type(v)}")
+        if v is not None and not isinstance(v, (str, list)):
+            raise ValueError(f"Expected output must be a string, a list of strings, or None but got {v} of type {type(v)}")
+        if isinstance(v, list) and not all(isinstance(item, str) for item in v):
+            raise ValueError(f"All items in expected_output must be strings but got {v}")
         return v
     @field_validator('context', 'retrieval_context', 'tools_called', 'expected_tools', mode='before')

judgeval/data/result.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Union, Optional, Dict, Any
+from typing import List, Union, Optional, Dict, Any, Union
 from judgeval.data import ScorerData, ProcessExample
@@ -30,8 +30,8 @@ class ScoringResult:
     # Inputs from the original example
     input: Optional[str] = None
-    actual_output: Optional[str] = None
-    expected_output: Optional[str] = None
+    actual_output: Optional[Union[str, List[str]]] = None
+    expected_output: Optional[Union[str, List[str]]] = None
     context: Optional[List[str]] = None
     retrieval_context: Optional[List[str]] = None
     additional_metadata: Optional[Dict[str, Any]] = None

judgeval/scorers/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from judgeval.scorers.api_scorer import APIJudgmentScorer
 from judgeval.scorers.judgeval_scorer import JudgevalScorer
 from judgeval.scorers.prompt_scorer import PromptScorer, ClassifierScorer
 from judgeval.scorers.judgeval_scorers import (
-    ToolCorrectnessScorer,
+    ExecutionOrderScorer,
     JSONCorrectnessScorer,
     SummarizationScorer,
     HallucinationScorer,
@@ -24,7 +24,7 @@ __all__ = [
     "JudgevalScorer",
     "PromptScorer",
     "ClassifierScorer",
-    "ToolCorrectnessScorer",
+    "ExecutionOrderScorer",
     "JSONCorrectnessScorer",
     "SummarizationScorer",
     "HallucinationScorer",

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Type, Optional, Any
 # Import implementations
 from judgeval.scorers.judgeval_scorers.api_scorers import (
-    ToolCorrectnessScorer as APIToolCorrectnessScorer,
+    ExecutionOrderScorer as APIExecutionOrderScorer,
     JSONCorrectnessScorer as APIJSONCorrectnessScorer,
     SummarizationScorer as APISummarizationScorer,
     HallucinationScorer as APIHallucinationScorer,
@@ -24,7 +24,7 @@ from judgeval.scorers.judgeval_scorers.local_implementations import (
     ContextualRelevancyScorer as LocalContextualRelevancyScorer,
     FaithfulnessScorer as LocalFaithfulnessScorer,
     JsonCorrectnessScorer as LocalJsonCorrectnessScorer,
-    ToolCorrectnessScorer as LocalToolCorrectnessScorer,
+    ExecutionOrderScorer as LocalExecutionOrderScorer,
     HallucinationScorer as LocalHallucinationScorer,
     SummarizationScorer as LocalSummarizationScorer,
     AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer,
@@ -98,9 +98,9 @@ AnswerRelevancyScorer = ScorerWrapper(
     local_implementation=LocalAnswerRelevancyScorer
 )
-ToolCorrectnessScorer = ScorerWrapper(
-    api_implementation=APIToolCorrectnessScorer,
-    local_implementation=LocalToolCorrectnessScorer
+ExecutionOrderScorer = ScorerWrapper(
+    api_implementation=APIExecutionOrderScorer,
+    local_implementation=LocalExecutionOrderScorer
 )
 JSONCorrectnessScorer = ScorerWrapper(
@@ -154,7 +154,7 @@ GroundednessScorer = ScorerWrapper(
 )
 __all__ = [
-    "ToolCorrectnessScorer",
+    "ExecutionOrderScorer",
     "JSONCorrectnessScorer",
     "SummarizationScorer",
     "HallucinationScorer",

judgeval/scorers/judgeval_scorers/api_scorers/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from judgeval.scorers.judgeval_scorers.api_scorers.tool_correctness import ToolCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.api_scorers.execution_order import ExecutionOrderScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.json_correctness import JSONCorrectnessScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.summarization import SummarizationScorer
 from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import HallucinationScorer
@@ -13,7 +13,7 @@ from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import
 from judgeval.scorers.judgeval_scorers.api_scorers.groundedness import GroundednessScorer
 __all__ = [
-    "ToolCorrectnessScorer",
+    "ExecutionOrderScorer",
     "JSONCorrectnessScorer",
     "SummarizationScorer",
     "HallucinationScorer",

judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+`judgeval` tool correctness scorer
+TODO add link to docs page for this scorer
+"""
+# Internal imports
+from judgeval.scorers.api_scorer import APIJudgmentScorer
+from judgeval.constants import APIScorer
+from typing import Optional, Dict
+class ExecutionOrderScorer(APIJudgmentScorer):
+    kwargs: Optional[Dict] = None
+    def __init__(self, threshold: float, should_exact_match: bool = False, should_consider_ordering: bool = False):
+        super().__init__(threshold=threshold, score_type=APIScorer.EXECUTION_ORDER)
+        self.kwargs = {"should_exact_match": should_exact_match, "should_consider_ordering": should_consider_ordering}
+    @property
+    def __name__(self):
+        return "Execution Order"
+    def to_dict(self) -> dict:
+        """
+        Converts the scorer configuration to a dictionary format.
+        Returns:
+            dict: A dictionary containing the scorer's configuration
+        """
+        return {
+            "score_type": self.score_type,
+            "threshold": self.threshold,
+            "kwargs": self.kwargs
+        }

judgeval/scorers/judgeval_scorers/local_implementations/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from judgeval.scorers.judgeval_scorers.local_implementations.contextual_recall.c
 from judgeval.scorers.judgeval_scorers.local_implementations.contextual_relevancy.contextual_relevancy_scorer import ContextualRelevancyScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.faithfulness.faithfulness_scorer import FaithfulnessScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.json_correctness.json_correctness_scorer import JsonCorrectnessScorer
-from judgeval.scorers.judgeval_scorers.local_implementations.tool_correctness.tool_correctness_scorer import ToolCorrectnessScorer
+from judgeval.scorers.judgeval_scorers.local_implementations.execution_order.execution_order import ExecutionOrderScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.hallucination.hallucination_scorer import HallucinationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.summarization.summarization_scorer import SummarizationScorer
 from judgeval.scorers.judgeval_scorers.local_implementations.answer_correctness.answer_correctness_scorer import AnswerCorrectnessScorer
@@ -20,7 +20,7 @@ __all__ = [
     "ContextualRelevancyScorer",
     "FaithfulnessScorer",
     "JsonCorrectnessScorer",
-    "ToolCorrectnessScorer",
+    "ExecutionOrderScorer",
     "HallucinationScorer",
     "SummarizationScorer",
     "InstructionAdherenceScorer",

judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from judgeval.scorers.judgeval_scorers.local_implementations.execution_order.execution_order import ExecutionOrderScorer
+__all__ = ["ExecutionOrderScorer"]

judgeval/scorers/judgeval_scorers/local_implementations/{tool_correctness/tool_correctness_scorer.py → execution_order/execution_order.py} RENAMED Viewed

@@ -45,7 +45,7 @@ def get_lcs(seq1, seq2):
     return lcs[::-1]
-class ToolCorrectnessScorer(JudgevalScorer):
+class ExecutionOrderScorer(JudgevalScorer):
     def __init__(
         self,
         threshold: float = 0.5,
@@ -56,7 +56,7 @@ class ToolCorrectnessScorer(JudgevalScorer):
         should_consider_ordering: bool = False,
     ):
         super().__init__(
-            score_type=APIScorer.TOOL_CORRECTNESS,
+            score_type=APIScorer.EXECUTION_ORDER,
             threshold=1 if strict_mode else threshold,
             evaluation_model=None,
             include_reason=include_reason,
@@ -152,5 +152,5 @@ class ToolCorrectnessScorer(JudgevalScorer):
     @property
     def __name__(self):
-        return "Tool Correctness"
+        return "Execution Order"

{judgeval-0.0.18.dist-info → judgeval-0.0.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.18
+Version: 0.0.20
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.0.18.dist-info → judgeval-0.0.20.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
-judgeval/clients.py,sha256=mG3SeEdzAA4eUrxbNHIpxWVgGykknpvCo3_wtKOa324,974
-judgeval/constants.py,sha256=BXTzKBmhDVutiitaCRarfkc_M-0NplRJofIt_QSa5QI,5010
+judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
+judgeval/constants.py,sha256=i8JIDUyo38Vt0R1n0GRA4FaakkBC5F2o4hQa0ncSF2E,5008
 judgeval/evaluation_run.py,sha256=RgJD60lJsunNQzObjo7iXnAzXWgubCLOAAuuamAAuoI,6354
 judgeval/judgment_client.py,sha256=evlvcrYO9pF-oCgcvlGE59iODN0C6GJtn7bySFU_88k,23384
 judgeval/rules.py,sha256=ebsiDEBVAnYTQxwVNvh_RpmKeWBnjQXgHs8KofTjcAs,15526
@@ -8,16 +8,16 @@ judgeval/run_evaluation.py,sha256=yLW24kFcw0xzXHvnDclYqtujTww6SDwvut6HM1x7SXk,21
 judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
 judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
 judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
-judgeval/common/tracer.py,sha256=tTG4VZRXJjilm0ltQCeXJvd7TiL9W1PSVaf0LOmw2C4,44430
+judgeval/common/tracer.py,sha256=FYrAuav6OiiawHLQ2e154MLvCBMdh-z_ucU2h7XK08M,45295
 judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
 judgeval/data/__init__.py,sha256=QykVE22Qf-I2f1g-jC9-iQyLNXgDmX1-vHbCgZg8Ra8,558
-judgeval/data/api_example.py,sha256=NEiJKpf2WIo4FPQ2-vuoCZ_9ixexhdg_wdNYWXPSA2M,4094
-judgeval/data/example.py,sha256=jsKkq91CWUnsvlfPP8qdXTCOg7l5ClFQkCeVoNJCZMc,5631
+judgeval/data/api_example.py,sha256=dzkrQ0xno08y6qNfqL2djXbapUyc2B2aQ5iANn0o4CY,3667
+judgeval/data/example.py,sha256=BhGBhamFWgH6wtvrRYM8dGtDfXh-cDxDhtNL5Gbdz_M,5892
 judgeval/data/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
-judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
+judgeval/data/result.py,sha256=4fgjKtUmT3br7K6fkRiNIxTGKUuwMeGyRLqzkpxwXKE,4436
 judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
 judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
-judgeval/data/datasets/dataset.py,sha256=0NItb98Yz0P954rg9FF9s09uVQ7cEg9A5J6Xvie9nhw,12022
+judgeval/data/datasets/dataset.py,sha256=LrBK8y3y1R9_BKmXxTzdXMMIQvXlq7tf7TM-u7jgSxE,16839
 judgeval/data/datasets/eval_dataset_client.py,sha256=QsfHyFC4WePV7uJGYUVjiIwtk1Ie_VpWUrnd2Q4kKdU,11479
 judgeval/data/datasets/utils.py,sha256=6DpGCPmGFNOKIGNcVCOSjTOdWemrpAuYnlo778sGG7g,2455
 judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
@@ -26,7 +26,7 @@ judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6
 judgeval/judges/mixture_of_judges.py,sha256=IJoi4Twk8ze1CJWVEp69k6TSqTCTGrmVYQ0qdffer60,15549
 judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
 judgeval/judges/utils.py,sha256=9lvUxziGV86ISvVFxYBWc09TWFyAQgUTyPf_a9mD5Rs,2686
-judgeval/scorers/__init__.py,sha256=_KP6c1dr6O2p95hx_WvRpZXfSGg9r2hNn_PjY9Ch5ds,1160
+judgeval/scorers/__init__.py,sha256=gkeKJvjXhswCnkEyjijrVvGVM3Om86egrZ-PUOGvNvI,1158
 judgeval/scorers/api_scorer.py,sha256=wGqTQCbUE7uE-PzaKcCmexAqutdTunjFR0zVA6bUxdE,2518
 judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1mC0,2183
 judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
@@ -34,25 +34,25 @@ judgeval/scorers/judgeval_scorer.py,sha256=oIkfoGXA09wL_vcK1DRibzQSA-MFNa-hmw1Ih
 judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
 judgeval/scorers/score.py,sha256=GALVmeApP1Cyih2vY93zRaU6RShtW4jJDG47Pm6yfnw,18657
 judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
-judgeval/scorers/judgeval_scorers/__init__.py,sha256=-nnqz-aU5PB_m1cb-2ySpZ18WDxupxmQCr-ws0aSalw,6000
-judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=cJSwTA6hqZXUSaPkTl4yDyl3cUzv0IlcTu592uoTY98,1651
+judgeval/scorers/judgeval_scorers/__init__.py,sha256=xFRb62sp4JmBUSeuAB_pC_7kEGp-lGdqCRIu9--Bbdg,5992
+judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mZ6b_5Dl04k3PaG24ICBajB_j43ody1II1OJhO1DkXo,1648
 judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
 judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=CqvvjV7AZqPlXh-PZaPKYPILHr15u4bIYiKBFjlk5i0,457
 judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=6Q1qbsANOoZ3PM8n_gtZLIMbTBB9879L3acRelNJ6Uk,1001
 judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=2zBrm_EEc143bmPA4HVcf8XtQeuc_BexczGx-SHlwRY,473
 judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=NyojBWy_lRYx8diREulSK8s9dfYdZav4eZjg3TwUm0M,461
 judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=wROMWOliCnB39ftX9TdeZmG9y0vrnxIGVby65tLOQRU,574
+judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=qxnvEDeKRlyzxX3EX53sW4oXxAM8Fj_q6ibdTxJNTAc,1076
 judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=gNf_i5c0jjpz2zCGhe7TtDMLKxc1PdOExJMFB5X7hSg,442
 judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=esO76hEp0NzeBUdoSICPLdx5AeA5zWSt_2zpcSgvGis,442
 judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=ffYwH3CexPkKgo1rCALMivypROQjG5WWEsKXEFZxe2k,446
 judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=t1lWYOF0Pxvw5-NrI1Dt9FojaOncOCRlZc4a2SA20h4,477
 judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=CAZBQKwNSqpqAoOgStYfr-yP1Brug_6VRimRIQY-zdg,894
 judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D_0q-_fGWh2jQHW9O4Pu7I7xvLWsHU6cn8,450
-judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py,sha256=17ppPXm962ew67GU5m0npzbPu3CuhgdKY_KmfPvKfu4,457
 judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
 judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
 judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
-judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=pipWXfS_n4UsnZViwZAF2bPB1FYNfmoJAJUNY7JSq7I,1937
+judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=k_t-THIAtsk7lNvm9faj0u24dPZjn7qRbZ8YGjQ21xs,1926
 judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
 judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=3Dpm8BIIe0Th2p0ccO5bb-le93lywjOLSo712HwEIUE,10196
 judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py,sha256=hBUqEd8Hy3g8peOVjpSmRb31fPtpodDzdRUonhKRl30,6686
@@ -71,6 +71,8 @@ judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompt
 judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py,sha256=JPCvrekKLbl_xdD49evhtiFIVocuegCpCBkn1auzTSE,184
 judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=BtVgE7z-9PHfFRcvn96aEG5mXVcWBweVyty934hZdiU,8915
 judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=uO-8Uo7VrXu4xWpxjIx6_UI3aw5KuJxubSHb71Nzm6Q,4574
+judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py,sha256=DpOHbjYEhVmP-RiaTEa5PZHpoPvduNXG5p6k9lR0AS0,157
+judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py,sha256=y-Ag8YuzEvExUIj4qU7y53INVLH9L_TUTJLIxCIdAQo,5458
 judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
 judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=LPVTGHBBJSpE6TrgzZQS2_vw4P9HiUYmykrwo6UMdws,11251
 judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=vNLjF4NKZJSV4VNenHzoAUB2xVZz6tt_5AzryKmOVrI,11690
@@ -84,11 +86,9 @@ judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_co
 judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py,sha256=mv6-XeLSV5yj1H98YYV2iTYVd88zKftZJP42Lgl6R80,89
 judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py,sha256=6GnRz2h-6Fwt4sl__0RgQOyo3n3iDO4MNuHWxdu-rrM,10242
 judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
-judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
-judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=8ucE8UrA44Mr-wHgVsFNU9gKunkPxe87VPYrFVi949g,5461
 judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
 judgeval/utils/alerts.py,sha256=RgW5R9Dn3Jtim0OyAYDbNzjoX2s6SA4Mw16GyyaikjI,1424
-judgeval-0.0.18.dist-info/METADATA,sha256=HgUKRC4MPmKHowspF1WKlP5xbpnJiLzfkbZiC4bYIek,1283
-judgeval-0.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.0.18.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.0.18.dist-info/RECORD,,
+judgeval-0.0.20.dist-info/METADATA,sha256=cz7uKUuHAc1rdANc8IJ5klQhlmrqOu_K1y6wwEIAdFU,1283
+judgeval-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.20.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.20.dist-info/RECORD,,

judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""
-`judgeval` tool correctness scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIJudgmentScorer
-from judgeval.constants import APIScorer
-class ToolCorrectnessScorer(APIJudgmentScorer):
-    def __init__(self, threshold: float):
-        super().__init__(threshold=threshold, score_type=APIScorer.TOOL_CORRECTNESS)
-    @property
-    def __name__(self):
-        return "Tool Correctness"

judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from judgeval.scorers.judgeval_scorers.local_implementations.tool_correctness.tool_correctness_scorer import ToolCorrectnessScorer
-__all__ = ["ToolCorrectnessScorer"]

{judgeval-0.0.18.dist-info → judgeval-0.0.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.0.18.dist-info → judgeval-0.0.20.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

judgeval 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl