judgeval 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ from judgeval.scorers.judgeval_scorers import (
13
13
  AnswerRelevancyScorer,
14
14
  ScorerWrapper,
15
15
  AnswerCorrectnessScorer,
16
+ Text2SQLScorer,
16
17
  )
17
18
 
18
19
  __all__ = [
@@ -31,4 +32,5 @@ __all__ = [
31
32
  "AnswerRelevancyScorer",
32
33
  "ScorerWrapper",
33
34
  "AnswerCorrectnessScorer",
35
+ "Text2SQLScorer",
34
36
  ]
@@ -28,6 +28,9 @@ from judgeval.scorers.judgeval_scorers.local_implementations import (
28
28
  AnswerCorrectnessScorer as LocalAnswerCorrectnessScorer
29
29
  )
30
30
 
31
+ from judgeval.scorers.judgeval_scorers.classifiers import Text2SQLScorer
32
+
33
+
31
34
  class ScorerWrapper:
32
35
  """
33
36
  Wrapper class that can dynamically load either API or local implementation of a scorer.
@@ -141,4 +144,5 @@ __all__ = [
141
144
  "ContextualPrecisionScorer",
142
145
  "ContextualRecallScorer",
143
146
  "AnswerRelevancyScorer",
147
+ "Text2SQLScorer",
144
148
  ]
@@ -0,0 +1,3 @@
1
+ from .text2sql import Text2SQLScorer
2
+
3
+ __all__ = ["Text2SQLScorer"]
@@ -0,0 +1,3 @@
1
+ from .text2sql_scorer import Text2SQLScorer
2
+
3
+ __all__ = ["Text2SQLScorer"]
@@ -0,0 +1,54 @@
1
+ """
2
+ ClassifierScorer implementation for basic Text-to-SQL evaluation.
3
+
4
+ Takes a natural language query, a corresponding LLM-generated SQL query, and a table schema + (optional) metadata.
5
+ Determines if the LLM-generated SQL query is valid and works for the natural language query.
6
+ """
7
+ from judgeval.scorers import ClassifierScorer
8
+
9
+ Text2SQLScorer = ClassifierScorer(
10
+ "Text to SQL",
11
+ slug="text2sql-1010101010",
12
+ threshold=1.0,
13
+ conversation=[{
14
+ "role": "system",
15
+ "content": """You will be given a natural language query, a corresponding LLM-generated SQL query, and a table schema + (optional) metadata.
16
+
17
+ ** TASK INSTRUCTIONS **
18
+ Your task is to decide whether the LLM generated SQL query properly filters for what the natural language query is asking, based on the table schema + (optional) metadata.
19
+ Additionally, you should check if the SQL query is valid based on the table schema (checking for syntax errors, false column names, etc.)
20
+
21
+ ** TIPS **
22
+ - Look for correct references to the table schema for column names, table names, etc.
23
+ - Check that the SQL query can be executed; make sure JOINs, GROUP BYs, ORDER BYs, etc. are valid with respect to the table schema.
24
+ - Check that aggregation functions (COUNT, SUM, AVG, etc.) are used appropriately with GROUP BY clauses
25
+ - Verify that WHERE conditions use the correct operators and data types for comparisons
26
+ - Ensure LIMIT and OFFSET clauses make sense for the query's purpose
27
+ - Check that JOINs use the correct keys and maintain referential integrity
28
+ - Verify that ORDER BY clauses use valid column names and sort directions
29
+ - Check for proper handling of NULL values where relevant
30
+ - Ensure subqueries are properly constructed and correlated when needed
31
+ - EVEN IF THE QUERY IS VALID, IF IT DOESN'T WORK FOR THE NATURAL LANGUAGE QUERY, YOU SHOULD CHOOSE "N" AS THE ANSWER.
32
+
33
+ ** FORMATTING YOUR ANSWER **
34
+ If the SQL query is valid and works for the natural language query, choose option "Y" and otherwise "N". Provide a justification for your decision; if you choose "N", explain what about the LLM-generated SQL query is incorrect, or explain why it doesn't address the natural language query.
35
+ IF YOUR JUSTIFICATION SHOWS THAT THE SQL QUERY IS VALID AND WORKS FOR THE NATURAL LANGUAGE QUERY, YOU SHOULD CHOOSE "Y" AS THE ANSWER.
36
+ IF THE SQL QUERY IS INVALID, YOU SHOULD CHOOSE "N" AS THE ANSWER.
37
+
38
+ ** YOUR TURN **
39
+ Natural language query:
40
+ {{input}}
41
+
42
+ LLM generated SQL query:
43
+ {{actual_output}}
44
+
45
+ Table schema:
46
+ {{context}}
47
+ """
48
+ }],
49
+ options={
50
+ "Y": 1.0,
51
+ "N": 0.0
52
+ }
53
+ )
54
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -24,7 +24,7 @@ judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6
24
24
  judgeval/judges/mixture_of_judges.py,sha256=OuGWCuXyqe7s_Y74ij90TJFRfHU-VAFyJVVrwBM0RO0,15532
25
25
  judgeval/judges/together_judge.py,sha256=x3jf-tq77QPXHeeoF739f69hE_0VceXD9FHLrVFdGVA,2275
26
26
  judgeval/judges/utils.py,sha256=YUvivcGV1OKLPMJ9N6aTvhA0r_zzJ2NXriPguiiaVaY,2110
27
- judgeval/scorers/__init__.py,sha256=3rq2VtszrJk9gZ3oAMVd7EGlSugr8aRlHWprMDgQPaQ,956
27
+ judgeval/scorers/__init__.py,sha256=XcDdLn_s16rSQob0896oj4JXTA8-Xfl271TUEBj6Oew,998
28
28
  judgeval/scorers/api_scorer.py,sha256=88kCWr6IetLFn3ziTPG-lwDWvMhFUC6xfINU1MJBoho,2125
29
29
  judgeval/scorers/base_scorer.py,sha256=mbOReG88fWaqCnC8F0u5QepRlzgVkuOz89KEKYxrmMc,1794
30
30
  judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
@@ -32,7 +32,7 @@ judgeval/scorers/judgeval_scorer.py,sha256=14SZ3sBZtGNM3BCegKebkNad9LTs5Tyhs0kD6
32
32
  judgeval/scorers/prompt_scorer.py,sha256=bUv8eZNy1XGVM1gNMt33dgIVX6zj63bGAV6O0o0c7yg,17821
33
33
  judgeval/scorers/score.py,sha256=zJKG21h9Njyj2vS36CAFK2wlbOcHSKgrLgHV5_25KKw,18630
34
34
  judgeval/scorers/utils.py,sha256=dtueaJm8e3Ph3wj1vC-srzadgK_CoIlOefdvMQ-cwK8,6826
35
- judgeval/scorers/judgeval_scorers/__init__.py,sha256=077QnuBfw9Sy9RP2TF2oKCtt5PbaqBZLyiP-gczKShk,5092
35
+ judgeval/scorers/judgeval_scorers/__init__.py,sha256=D12jJAKTcfmz8fDBkYeOmdzZMZsURuODIJ5p7Nk1lWE,5189
36
36
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=zFwH2TC5AFlpDRfVKc6GN4YTtnmeyALl-JRLoZD_Jco,1284
37
37
  judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
38
38
  judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=CqvvjV7AZqPlXh-PZaPKYPILHr15u4bIYiKBFjlk5i0,457
@@ -44,6 +44,9 @@ judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=ffYwH3CexP
44
44
  judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=CAZBQKwNSqpqAoOgStYfr-yP1Brug_6VRimRIQY-zdg,894
45
45
  judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D_0q-_fGWh2jQHW9O4Pu7I7xvLWsHU6cn8,450
46
46
  judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py,sha256=17ppPXm962ew67GU5m0npzbPu3CuhgdKY_KmfPvKfu4,457
47
+ judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
48
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
49
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
47
50
  judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=ZDbmYHwIbPD75Gj9JKtEWnpBdSVGGRmbn1_IOR6GR-c,1627
48
51
  judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
49
52
  judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=PDThn6SzqxgMXT7BpQs2TEBOsgfD5fi6fnKk31qaCTo,10227
@@ -73,7 +76,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
73
76
  judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=CBuE6oCxMzTdJoXFt_YPWBte88kedEQ9t3g52ZRztGY,21086
74
77
  judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
75
78
  judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
76
- judgeval-0.0.8.dist-info/METADATA,sha256=91SMIPO60Q_Ab7yTjL2sKmPgmfl6Bji6_QAzkjaOHlk,1204
77
- judgeval-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
78
- judgeval-0.0.8.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
79
- judgeval-0.0.8.dist-info/RECORD,,
79
+ judgeval-0.0.9.dist-info/METADATA,sha256=D9-pDQVSwfHCVcZ85-AS6MaMhd1AGz1CAJ5fRLwrRsA,1204
80
+ judgeval-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
81
+ judgeval-0.0.9.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
82
+ judgeval-0.0.9.dist-info/RECORD,,