PyPI - judgeval - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

judgeval 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

judgeval/scorers/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from judgeval.scorers.judgeval_scorers import (
     AnswerRelevancyScorer,
     ScorerWrapper,
     AnswerCorrectnessScorer,
+    Text2SQLScorer,
 )
 __all__ = [
@@ -31,4 +32,5 @@ __all__ = [
     "AnswerRelevancyScorer",
     "ScorerWrapper",
     "AnswerCorrectnessScorer",
+    "Text2SQLScorer",
 ]

judgeval/scorers/judgeval_scorers/__init__.py CHANGED Viewed

@@ -28,6 +28,9 @@ from judgeval.scorers.judgeval_scorers.local_implementations import (
     AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
 )
+from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
 class ScorerWrapper:
     """
     Wrapper class that can dynamically load either API or local implementation of a scorer.
@@ -141,4 +144,5 @@ __all__ = [
     "ContextualPrecisionScorer",
     "ContextualRecallScorer",
     "AnswerRelevancyScorer",
+    "Text2SQLScorer",
 ]

judgeval/scorers/judgeval_scorers/classifiers/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .text2sql import Text2SQLScorer
+__all__ = ["Text2SQLScorer"]

judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .text2sql_scorer import Text2SQLScorer
+__all__ = ["Text2SQLScorer"]

judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+ClassifierScorer implementation for basic Text-to-SQL evaluation.
+Takes a natural language query, a corresponding LLM-generated SQL query, and a table schema + (optional) metadata.
+Determines if the LLM-generated SQL query is valid and works for the natural language query.
+"""
+from judgeval.scorers import ClassifierScorer
+Text2SQLScorer = ClassifierScorer(
+    "Text to SQL",
+    slug="text2sql-1010101010",
+    threshold=1.0,
+    conversation=[{
+        "role": "system",
+        "content": """You will be given a natural language query, a corresponding LLM-generated SQL query, and a table schema + (optional) metadata.
+** TASK INSTRUCTIONS **
+Your task is to decide whether the LLM generated SQL query properly filters for what the natural language query is asking, based on the table schema + (optional) metadata.
+Additionally, you should check if the SQL query is valid based on the table schema (checking for syntax errors, false column names, etc.)
+** TIPS **
+- Look for correct references to the table schema for column names, table names, etc.
+- Check that the SQL query can be executed; make sure JOINs, GROUP BYs, ORDER BYs, etc. are valid with respect to the table schema.
+- Check that aggregation functions (COUNT, SUM, AVG, etc.) are used appropriately with GROUP BY clauses
+- Verify that WHERE conditions use the correct operators and data types for comparisons
+- Ensure LIMIT and OFFSET clauses make sense for the query's purpose
+- Check that JOINs use the correct keys and maintain referential integrity
+- Verify that ORDER BY clauses use valid column names and sort directions
+- Check for proper handling of NULL values where relevant
+- Ensure subqueries are properly constructed and correlated when needed
+- EVEN IF THE QUERY IS VALID, IF IT DOESN'T WORK FOR THE NATURAL LANGUAGE QUERY, YOU SHOULD CHOOSE "N" AS THE ANSWER.
+** FORMATTING YOUR ANSWER **
+If the SQL query is valid and works for the natural language query, choose option "Y" and otherwise "N". Provide a justification for your decision; if you choose "N", explain what about the LLM-generated SQL query is incorrect, or explain why it doesn't address the natural language query.
+IF YOUR JUSTIFICATION SHOWS THAT THE SQL QUERY IS VALID AND WORKS FOR THE NATURAL LANGUAGE QUERY, YOU SHOULD CHOOSE "Y" AS THE ANSWER.
+IF THE SQL QUERY IS INVALID, YOU SHOULD CHOOSE "N" AS THE ANSWER.
+** YOUR TURN **
+Natural language query:
+{{input}}
+LLM generated SQL query:
+{{actual_output}}
+Table schema:
+{{context}}
+        """
+    }],
+    options={
+        "Y": 1.0,
+        "N": 0.0
+    }
+)

{judgeval-0.0.8.dist-info → judgeval-0.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.8
+Version: 0.0.9
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.0.8.dist-info → judgeval-0.0.9.dist-info}/RECORD RENAMED Viewed

@@ -24,7 +24,7 @@ judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6
 judgeval/judges/mixture_of_judges.py,sha256=OuGWCuXyqe7s_Y74ij90TJFRfHU-VAFyJVVrwBM0RO0,15532
 judgeval/judges/together_judge.py,sha256=x3jf-tq77QPXHeeoF739f69hE_0VceXD9FHLrVFdGVA,2275
 judgeval/judges/utils.py,sha256=YUvivcGV1OKLPMJ9N6aTvhA0r_zzJ2NXriPguiiaVaY,2110
-judgeval/scorers/__init__.py,sha256=3rq2VtszrJk9gZ3oAMVd7EGlSugr8aRlHWprMDgQPaQ,956
+judgeval/scorers/__init__.py,sha256=XcDdLn_s16rSQob0896oj4JXTA8-Xfl271TUEBj6Oew,998
 judgeval/scorers/api_scorer.py,sha256=88kCWr6IetLFn3ziTPG-lwDWvMhFUC6xfINU1MJBoho,2125
 judgeval/scorers/base_scorer.py,sha256=mbOReG88fWaqCnC8F0u5QepRlzgVkuOz89KEKYxrmMc,1794
 judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
@@ -32,7 +32,7 @@ judgeval/scorers/judgeval_scorer.py,sha256=14SZ3sBZtGNM3BCegKebkNad9LTs5Tyhs0kD6
 judgeval/scorers/prompt_scorer.py,sha256=bUv8eZNy1XGVM1gNMt33dgIVX6zj63bGAV6O0o0c7yg,17821
 judgeval/scorers/score.py,sha256=zJKG21h9Njyj2vS36CAFK2wlbOcHSKgrLgHV5_25KKw,18630
 judgeval/scorers/utils.py,sha256=dtueaJm8e3Ph3wj1vC-srzadgK_CoIlOefdvMQ-cwK8,6826
-judgeval/scorers/judgeval_scorers/__init__.py,sha256=077QnuBfw9Sy9RP2TF2oKCtt5PbaqBZLyiP-gczKShk,5092
+judgeval/scorers/judgeval_scorers/__init__.py,sha256=D12jJAKTcfmz8fDBkYeOmdzZMZsURuODIJ5p7Nk1lWE,5189
 judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=zFwH2TC5AFlpDRfVKc6GN4YTtnmeyALl-JRLoZD_Jco,1284
 judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
 judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=CqvvjV7AZqPlXh-PZaPKYPILHr15u4bIYiKBFjlk5i0,457
@@ -44,6 +44,9 @@ judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=ffYwH3CexP
 judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=CAZBQKwNSqpqAoOgStYfr-yP1Brug_6VRimRIQY-zdg,894
 judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D_0q-_fGWh2jQHW9O4Pu7I7xvLWsHU6cn8,450
 judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py,sha256=17ppPXm962ew67GU5m0npzbPu3CuhgdKY_KmfPvKfu4,457
+judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
+judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
 judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=ZDbmYHwIbPD75Gj9JKtEWnpBdSVGGRmbn1_IOR6GR-c,1627
 judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
 judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=PDThn6SzqxgMXT7BpQs2TEBOsgfD5fi6fnKk31qaCTo,10227
@@ -73,7 +76,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
 judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=CBuE6oCxMzTdJoXFt_YPWBte88kedEQ9t3g52ZRztGY,21086
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
-judgeval-0.0.8.dist-info/METADATA,sha256=91SMIPO60Q_Ab7yTjL2sKmPgmfl6Bji6_QAzkjaOHlk,1204
-judgeval-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.0.8.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.0.8.dist-info/RECORD,,
+judgeval-0.0.9.dist-info/METADATA,sha256=D9-pDQVSwfHCVcZ85-AS6MaMhd1AGz1CAJ5fRLwrRsA,1204
+judgeval-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.9.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.9.dist-info/RECORD,,

{judgeval-0.0.8.dist-info → judgeval-0.0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.0.8.dist-info → judgeval-0.0.9.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

judgeval 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl