PyPI - judgeval - Versions diffs - 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl - Mend

judgeval 0.0.11py3-none-any.whl → 0.22.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show

judgeval/__init__.py +177 -12
judgeval/api/__init__.py +519 -0
judgeval/api/api_types.py +407 -0
judgeval/cli.py +79 -0
judgeval/constants.py +76 -47
judgeval/data/__init__.py +3 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +15 -56
judgeval/data/judgment_types.py +450 -0
judgeval/data/result.py +29 -73
judgeval/data/scorer_data.py +29 -62
judgeval/data/scripts/fix_default_factory.py +23 -0
judgeval/data/scripts/openapi_transform.py +123 -0
judgeval/data/trace.py +121 -0
judgeval/dataset/__init__.py +264 -0
judgeval/env.py +52 -0
judgeval/evaluation/__init__.py +344 -0
judgeval/exceptions.py +27 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +50 -0
judgeval/judges/__init__.py +2 -3
judgeval/judges/base_judge.py +2 -3
judgeval/judges/litellm_judge.py +100 -20
judgeval/judges/together_judge.py +101 -20
judgeval/judges/utils.py +20 -24
judgeval/logger.py +62 -0
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +18 -25
judgeval/scorers/agent_scorer.py +17 -0
judgeval/scorers/api_scorer.py +45 -41
judgeval/scorers/base_scorer.py +83 -38
judgeval/scorers/example_scorer.py +17 -0
judgeval/scorers/exceptions.py +1 -0
judgeval/scorers/judgeval_scorers/__init__.py +0 -148
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
judgeval/scorers/score.py +77 -306
judgeval/scorers/utils.py +4 -199
judgeval/tracer/__init__.py +1122 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +128 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +396 -0
judgeval/trainer/trainable_model.py +243 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +97 -0
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/utils/version_check.py +28 -0
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.22.2.dist-info/METADATA +265 -0
judgeval-0.22.2.dist-info/RECORD +112 -0
judgeval-0.22.2.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -39
judgeval/common/__init__.py +0 -8
judgeval/common/exceptions.py +0 -28
judgeval/common/logger.py +0 -189
judgeval/common/tracer.py +0 -798
judgeval/common/utils.py +0 -763
judgeval/data/api_example.py +0 -111
judgeval/data/datasets/__init__.py +0 -5
judgeval/data/datasets/dataset.py +0 -286
judgeval/data/datasets/eval_dataset_client.py +0 -193
judgeval/data/datasets/ground_truth.py +0 -54
judgeval/data/datasets/utils.py +0 -74
judgeval/evaluation_run.py +0 -132
judgeval/judges/mixture_of_judges.py +0 -248
judgeval/judgment_client.py +0 -354
judgeval/run_evaluation.py +0 -439
judgeval/scorers/judgeval_scorer.py +0 -140
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
judgeval/scorers/prompt_scorer.py +0 -439
judgeval-0.0.11.dist-info/METADATA +0 -36
judgeval-0.0.11.dist-info/RECORD +0 -84
{judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
{judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/utils.py CHANGED Viewed

@@ -2,208 +2,13 @@
 Util functions for Scorer objects
 """
-import asyncio
-import nest_asyncio
-import inspect
-import json
-import sys
-import re
-from contextlib import contextmanager
-from rich.progress import Progress, SpinnerColumn, TextColumn
-from rich.console import Console
-from typing import List, Optional, Any
+from typing import List
-from judgeval.scorers import JudgevalScorer
-from judgeval.data import Example, ExampleParams
-from judgeval.scorers.exceptions import MissingExampleParamsError
+from judgeval.scorers import BaseScorer
-def clone_scorers(scorers: List[JudgevalScorer]) -> List[JudgevalScorer]:
+def clone_scorers(scorers: List[BaseScorer]) -> List[BaseScorer]:
     """
     Creates duplicates of the scorers passed as argument.
     """
-    cloned_scorers = []
-    for s in scorers:
-        scorer_class = type(s)
-        args = vars(s)
-        signature = inspect.signature(scorer_class.__init__)
-        valid_params = signature.parameters.keys()
-        valid_args = {key: args[key] for key in valid_params if key in args}
-        cloned_scorer = scorer_class(**valid_args)
-        # kinda hacky, but in case the class inheriting from JudgevalScorer doesn't have `model` in its __init__,
-        # we need to explicitly include it here so that we can add the judge model to the cloned scorer
-        cloned_scorer._add_model(model=args.get("model"))
-        cloned_scorers.append(cloned_scorer)
-    return cloned_scorers
-def scorer_console_msg(
-    scorer: JudgevalScorer,
-    async_mode: Optional[bool] = None,
-):
-    """
-    Renders a message to be displayed to console when a scorer is being executed.
-    """
-    if async_mode is None:
-        run_async = scorer.async_mode
-    else:
-        run_async = async_mode
-    return f"🔨 Executing Judgment's [rgb(106,0,255)]{scorer.__name__} Scorer[/rgb(106,0,255)]! \
-        [rgb(55,65,81)](using {scorer.evaluation_model}, async_mode={run_async})...[/rgb(55,65,81)]"
-@contextmanager
-def scorer_progress_meter(
-    scorer: JudgevalScorer,
-    async_mode: Optional[bool] = None,
-    display_meter: bool = True,
-    total: int = 100,
-    transient: bool = True,
-):
-    """
-    Context manager to display a progress indicator (spinner) while a scorer is being run.
-    """
-    console = Console(file=sys.stderr)
-    if display_meter:
-        with Progress(
-            SpinnerColumn(style="rgb(106,0,255)"),
-            TextColumn("[progress.description]{task.description}"),
-            console=console,
-            transient=transient,
-        ) as progress:
-            progress.add_task(
-                description=scorer_console_msg(scorer, async_mode),
-                total=total,
-            )
-            yield
-    else:
-        yield
-def parse_response_json(llm_response: str, scorer: Optional[JudgevalScorer] = None) -> dict:
-    """
-    Extracts JSON output from an LLM response and returns it as a dictionary.
-    If the JSON is invalid, the error is forwarded to the `scorer`, if provided.
-    Args:
-        llm_response (str): The response from an LLM.
-        scorer (JudgevalScorer, optional): The scorer object to forward errors to (if any).
-    """
-    start = llm_response.find("{")  # opening bracket
-    end = llm_response.rfind("}") + 1  # closing bracket
-    if end == 0 and start != -1:  # add the closing bracket if it's missing
-        llm_response = llm_response + "}"
-        end = len(llm_response)
-    json_str = llm_response[start:end] if start != -1 and end != 0 else ""  # extract the JSON string
-    json_str = re.sub(r",\s*([\]}])", r"\1", json_str)  # Remove trailing comma if present
-    try:
-        return json.loads(json_str)
-    except json.JSONDecodeError:
-        error_str = "Evaluation LLM outputted an invalid JSON. Please use a stronger evaluation model."
-        if scorer is not None:
-            scorer.error = error_str
-        raise ValueError(error_str)
-    except Exception as e:
-        raise Exception(f"An unexpected error occurred: {str(e)}")
-def print_verbose_logs(metric: str, logs: str):
-    print("*" * 50)
-    print(f"{metric} Verbose Logs")
-    print("*" * 50)
-    print("")
-    print(logs)
-    print("")
-    print("=" * 70)
-def create_verbose_logs(metric: JudgevalScorer, steps: List[str]) -> str:
-    """
-    Creates verbose logs for a scorer object.
-    Args:
-        metric (JudgevalScorer): The scorer object.
-        steps (List[str]): The steps to be included in the verbose logs.
-    Returns:
-        str: The verbose logs (Concatenated steps).
-    """
-    verbose_logs = ""
-    for i in range(len(steps) - 1):
-        verbose_logs += steps[i]
-        if i < len(steps) - 2:  # don't add new line for penultimate step
-            verbose_logs += " \n \n"
-    if metric.verbose_mode:
-        print_verbose_logs(metric.__name__, verbose_logs + f"\n \n{steps[-1]}")
-    return verbose_logs
-def get_or_create_event_loop() -> asyncio.AbstractEventLoop:
-    """
-    Get or create an asyncio event loop.
-    This function attempts to retrieve the current event loop using `asyncio.get_event_loop()`.
-    If the event loop is already running, it applies the `nest_asyncio` patch to allow nested
-    asynchronous execution. If the event loop is closed or not found, it creates a new event loop
-    and sets it as the current event loop.
-    Returns:
-        asyncio.AbstractEventLoop: The current or newly created event loop.
-    Raises:
-        RuntimeError: If the event loop is closed.
-    """
-    try:
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            print(
-                "Event loop is already running. Applying nest_asyncio patch to allow async execution..."
-            )
-            nest_asyncio.apply()
-        if loop.is_closed():
-            raise RuntimeError
-    except RuntimeError:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-    return loop
-def check_example_params(
-    example: Example,
-    example_params: List[ExampleParams],
-    scorer: JudgevalScorer,
-):
-    if isinstance(example, Example) is False:
-        error_str = f"in check_example_params(): Expected example to be of type 'Example', but got {type(example)}"
-        scorer.error = error_str
-        raise MissingExampleParamsError(error_str)
-    missing_params = []
-    for param in example_params:
-        if getattr(example, param.value) is None:
-            missing_params.append(f"'{param.value}'")
-    if missing_params:
-        if len(missing_params) == 1:
-            missing_params_str = missing_params[0]
-        elif len(missing_params) == 2:
-            missing_params_str = " and ".join(missing_params)
-        else:
-            missing_params_str = (
-                ", ".join(missing_params[:-1]) + ", and " + missing_params[-1]
-            )
-        error_str = f"{missing_params_str} fields in example cannot be None for the '{scorer.__name__}' scorer"
-        scorer.error = error_str
-        raise MissingExampleParamsError(error_str)
+    return [s.model_copy(deep=True) for s in scorers]

judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

Potentially problematic release.

judgeval 0.0.11py3-none-any.whl → 0.22.2py3-none-any.whl