PyPI - judgeval - Versions diffs - 0.0.39__py3-none-any.whl → 0.0.41__py3-none-any.whl - Mend

judgeval 0.0.39py3-none-any.whl → 0.0.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

judgeval/clients.py +6 -4
judgeval/common/tracer.py +504 -257
judgeval/common/utils.py +5 -1
judgeval/constants.py +2 -0
judgeval/data/__init__.py +2 -1
judgeval/data/datasets/dataset.py +12 -6
judgeval/data/datasets/eval_dataset_client.py +3 -1
judgeval/data/example.py +7 -7
judgeval/data/tool.py +29 -1
judgeval/data/trace.py +31 -39
judgeval/data/trace_run.py +2 -1
judgeval/evaluation_run.py +4 -7
judgeval/judgment_client.py +34 -7
judgeval/run_evaluation.py +67 -19
judgeval/scorers/__init__.py +4 -1
judgeval/scorers/judgeval_scorer.py +12 -1
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +4 -0
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +124 -0
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +20 -0
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +1 -1
judgeval/scorers/prompt_scorer.py +8 -164
judgeval/scorers/score.py +15 -15
judgeval-0.0.41.dist-info/METADATA +1450 -0
{judgeval-0.0.39.dist-info → judgeval-0.0.41.dist-info}/RECORD +26 -24
judgeval-0.0.39.dist-info/METADATA +0 -247
{judgeval-0.0.39.dist-info → judgeval-0.0.41.dist-info}/WHEEL +0 -0
{judgeval-0.0.39.dist-info → judgeval-0.0.41.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/score.py CHANGED Viewed

@@ -48,7 +48,7 @@ async def safe_a_score_example(
         info(f"Successfully scored example {example.example_id}")
     except MissingTestCaseParamsError as e:
         if skip_on_missing_params:  # Skip the example if the scorer requires parameters that are missing
-            with example_logging_context(example.timestamp, example.example_id):
+            with example_logging_context(example.created_at, example.example_id):
                 warning(f"Skipping example {example.example_id} due to missing parameters")
             scorer.skipped = True
             return
@@ -56,10 +56,10 @@ async def safe_a_score_example(
             if ignore_errors:  # Gracefully handle the error, does not stop the evaluation
                 scorer.error = str(e)
                 scorer.success = False
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     warning(f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters")
             else:  # Raise the error and stop the evaluation
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
                 raise
     except TypeError:  # in case a_score_example does not accept _show_indicator
@@ -68,27 +68,27 @@ async def safe_a_score_example(
         except MissingTestCaseParamsError as e:
             if skip_on_missing_params:
                 scorer.skipped = True
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     warning(f"Skipping example {example.example_id} due to missing parameters")
                 return
             else:
                 if ignore_errors:
                     scorer.error = str(e)
                     scorer.success = False
-                    with example_logging_context(example.timestamp, example.example_id):
+                    with example_logging_context(example.created_at, example.example_id):
                         warning(f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters")
                 else:
-                    with example_logging_context(example.timestamp, example.example_id):
+                    with example_logging_context(example.created_at, example.example_id):
                         error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
                     raise
     except Exception as e:
         if ignore_errors:
             scorer.error = str(e)
             scorer.success = False  # Assuming you want to set success to False
-            with example_logging_context(example.timestamp, example.example_id):
+            with example_logging_context(example.created_at, example.example_id):
                 warning(f"Ignoring errors for example {example.example_id}: {str(e)}")
         else:
-            with example_logging_context(example.timestamp, example.example_id):
+            with example_logging_context(example.created_at, example.example_id):
                 error(f"Stopping example {example.example_id}: {str(e)}")
             raise
@@ -128,7 +128,7 @@ async def score_task(
         except MissingTestCaseParamsError as e:
             if skip_on_missing_params:
                 scorer.skipped = True
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     debug(f"Skipping example {example.example_id} due to missing parameters")
                 return
             else:
@@ -137,7 +137,7 @@ async def score_task(
                     scorer.success = False  # Override success
                     finish_text = "Failed"
                 else:
-                    with example_logging_context(example.timestamp, example.example_id):
+                    with example_logging_context(example.created_at, example.example_id):
                         error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
                     raise
         except TypeError:
@@ -147,7 +147,7 @@ async def score_task(
             except MissingTestCaseParamsError as e:
                 if skip_on_missing_params:
                     scorer.skipped = True
-                    with example_logging_context(example.timestamp, example.example_id):
+                    with example_logging_context(example.created_at, example.example_id):
                         debug(f"Skipping example {example.example_id} due to missing parameters")
                     return
                 else:
@@ -156,7 +156,7 @@ async def score_task(
                         scorer.success = False  # Override success
                         finish_text = "Failed"
                     else:
-                        with example_logging_context(example.timestamp, example.example_id):
+                        with example_logging_context(example.created_at, example.example_id):
                             error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
                         raise
         except Exception as e:
@@ -164,10 +164,10 @@ async def score_task(
                 scorer.error = str(e)
                 scorer.success = False  # Override success
                 finish_text = "Failed"
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     warning(f"Ignoring errors for example {example.example_id}: {str(e)}")
             else:
-                with example_logging_context(example.timestamp, example.example_id):
+                with example_logging_context(example.created_at, example.example_id):
                     error(f"Stopping example {example.example_id}: {str(e)}")
                 raise
@@ -305,7 +305,7 @@ async def a_execute_scoring(
             bar_format="{desc}: |{bar}|{percentage:3.0f}% ({n_fmt}/{total_fmt}) [Time Taken: {elapsed}, {rate_fmt}{postfix}]",
         ) as pbar:
             for i, ex in enumerate(examples):
-                with example_logging_context(ex.timestamp, ex.example_id):
+                with example_logging_context(ex.created_at, ex.example_id):
                     debug(f"Starting scoring for example {ex.example_id}")
                     debug(f"Input: {ex.input}")
                     debug(f"Using {len(scorers)} scorers")

judgeval 0.0.39__py3-none-any.whl → 0.0.41__py3-none-any.whl

judgeval 0.0.39py3-none-any.whl → 0.0.41py3-none-any.whl