judgeval 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. judgeval/common/tracer.py +41 -2
  2. judgeval/constants.py +1 -0
  3. judgeval/data/__init__.py +2 -3
  4. judgeval/data/custom_example.py +98 -0
  5. judgeval/data/datasets/dataset.py +17 -124
  6. judgeval/data/datasets/eval_dataset_client.py +5 -11
  7. judgeval/data/datasets/utils.py +0 -73
  8. judgeval/data/ground_truth.py +0 -54
  9. judgeval/judgment_client.py +23 -7
  10. judgeval/run_evaluation.py +62 -8
  11. judgeval/scorers/api_scorer.py +3 -1
  12. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -2
  13. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -2
  14. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +10 -2
  15. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +11 -2
  16. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +11 -2
  17. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +10 -3
  18. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +10 -2
  19. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -2
  20. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +10 -2
  21. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +10 -2
  22. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +9 -2
  23. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +9 -2
  24. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +10 -3
  25. {judgeval-0.0.20.dist-info → judgeval-0.0.22.dist-info}/METADATA +7 -3
  26. {judgeval-0.0.20.dist-info → judgeval-0.0.22.dist-info}/RECORD +28 -27
  27. {judgeval-0.0.20.dist-info → judgeval-0.0.22.dist-info}/WHEEL +0 -0
  28. {judgeval-0.0.20.dist-info → judgeval-0.0.22.dist-info}/licenses/LICENSE.md +0 -0
@@ -10,7 +10,6 @@ from judgeval.data.datasets import EvalDataset, EvalDatasetClient
10
10
  from judgeval.data import (
11
11
  ScoringResult,
12
12
  Example,
13
- GroundTruthExample
14
13
  )
15
14
  from judgeval.scorers import (
16
15
  APIJudgmentScorer,
@@ -27,7 +26,8 @@ from judgeval.judges import JudgevalJudge
27
26
  from judgeval.constants import (
28
27
  JUDGMENT_EVAL_FETCH_API_URL,
29
28
  JUDGMENT_EVAL_DELETE_API_URL,
30
- JUDGMENT_EVAL_DELETE_PROJECT_API_URL
29
+ JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
30
+ JUDGMENT_PROJECT_DELETE_API_URL
31
31
  )
32
32
  from judgeval.common.exceptions import JudgmentAPIError
33
33
  from pydantic import BaseModel
@@ -156,7 +156,7 @@ class JudgmentClient:
156
156
  metadata: Optional[Dict[str, Any]] = None,
157
157
  project_name: str = "",
158
158
  eval_run_name: str = "",
159
- log_results: bool = False,
159
+ log_results: bool = True,
160
160
  use_judgment: bool = True,
161
161
  rules: Optional[List[Rule]] = None
162
162
  ) -> List[ScoringResult]:
@@ -282,11 +282,11 @@ class JudgmentClient:
282
282
  """
283
283
  return self.eval_dataset_client.pull_all_user_dataset_stats()
284
284
 
285
- def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
285
+ def edit_dataset(self, alias: str, examples: List[Example]) -> bool:
286
286
  """
287
- Edits the dataset on Judgment platform by adding new examples and ground truths
287
+ Edits the dataset on Judgment platform by adding new examples
288
288
  """
289
- return self.eval_dataset_client.edit_dataset(alias, examples, ground_truths)
289
+ return self.eval_dataset_client.edit_dataset(alias, examples)
290
290
 
291
291
  # Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
292
292
  def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
@@ -362,7 +362,6 @@ class JudgmentClient:
362
362
  response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
363
363
  json={
364
364
  "project_name": project_name,
365
- "judgment_api_key": self.judgment_api_key,
366
365
  },
367
366
  headers={
368
367
  "Content-Type": "application/json",
@@ -372,6 +371,23 @@ class JudgmentClient:
372
371
  if response.status_code != requests.codes.ok:
373
372
  raise ValueError(f"Error deleting eval results: {response.json()}")
374
373
  return response.json()
374
+
375
+ def delete_project(self, project_name: str) -> bool:
376
+ """
377
+ Deletes a project from the server. Which also deletes all evaluations and traces associated with the project.
378
+ """
379
+ response = requests.delete(JUDGMENT_PROJECT_DELETE_API_URL,
380
+ json={
381
+ "project_name": project_name,
382
+ },
383
+ headers={
384
+ "Content-Type": "application/json",
385
+ "Authorization": f"Bearer {self.judgment_api_key}",
386
+ "X-Organization-Id": self.organization_id
387
+ })
388
+ if response.status_code != requests.codes.ok:
389
+ raise ValueError(f"Error deleting project: {response.json()}")
390
+ return response.json()
375
391
 
376
392
  def _validate_api_key(self):
377
393
  """
@@ -1,12 +1,17 @@
1
1
  import asyncio
2
2
  import requests
3
- from typing import List, Dict
3
+ import time
4
+ import sys
5
+ import itertools
6
+ import threading
7
+ from typing import List, Dict, Any
4
8
  from datetime import datetime
5
9
  from rich import print as rprint
6
10
 
7
11
  from judgeval.data import (
8
12
  ScorerData,
9
- ScoringResult
13
+ ScoringResult,
14
+ Example
10
15
  )
11
16
  from judgeval.scorers import (
12
17
  JudgevalScorer,
@@ -14,7 +19,6 @@ from judgeval.scorers import (
14
19
  ClassifierScorer
15
20
  )
16
21
  from judgeval.scorers.score import a_execute_scoring
17
-
18
22
  from judgeval.constants import (
19
23
  ROOT_API,
20
24
  JUDGMENT_EVAL_API_URL,
@@ -185,7 +189,7 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
185
189
  raise JudgmentAPIError(f"Failed to check if eval run name exists: {str(e)}")
186
190
 
187
191
 
188
- def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> None:
192
+ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> str:
189
193
  """
190
194
  Logs evaluation results to the Judgment API database.
191
195
 
@@ -220,7 +224,9 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
220
224
  raise JudgmentAPIError(error_message)
221
225
 
222
226
  if "ui_results_url" in res.json():
223
- rprint(f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)]{res.json()['ui_results_url']}[/]\n")
227
+ url = res.json()['ui_results_url']
228
+ pretty_str = f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
229
+ return pretty_str
224
230
 
225
231
  except requests.exceptions.RequestException as e:
226
232
  error(f"Request failed while saving evaluation results to DB: {str(e)}")
@@ -229,6 +235,51 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
229
235
  error(f"Failed to save evaluation results to DB: {str(e)}")
230
236
  raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
231
237
 
238
+ def run_with_spinner(message: str, func, *args, **kwargs) -> Any:
239
+ """Run a function with a spinner in the terminal."""
240
+ spinner = itertools.cycle(['|', '/', '-', '\\'])
241
+
242
+ def display_spinner():
243
+ while not stop_spinner_event.is_set():
244
+ sys.stdout.write(f'\r{message}{next(spinner)}')
245
+ sys.stdout.flush()
246
+ time.sleep(0.1)
247
+
248
+ stop_spinner_event = threading.Event()
249
+ spinner_thread = threading.Thread(target=display_spinner)
250
+ spinner_thread.start()
251
+
252
+ try:
253
+ result = func(*args, **kwargs)
254
+ except Exception as e:
255
+ error(f"An error occurred: {str(e)}")
256
+ stop_spinner_event.set()
257
+ spinner_thread.join()
258
+ raise e
259
+ finally:
260
+ stop_spinner_event.set()
261
+ spinner_thread.join()
262
+
263
+ sys.stdout.write('\r' + ' ' * (len(message) + 1) + '\r')
264
+ sys.stdout.flush()
265
+
266
+ return result
267
+
268
+ def check_examples(examples: List[Example], scorers: List[APIJudgmentScorer]) -> None:
269
+ """
270
+ Checks if the example contains the necessary parameters for the scorer.
271
+ """
272
+ for scorer in scorers:
273
+ if isinstance(scorer, APIJudgmentScorer):
274
+ for example in examples:
275
+ missing_params = []
276
+ for param in scorer.required_params:
277
+ if getattr(example, param.value) is None:
278
+ missing_params.append(f"'{param.value}'")
279
+ if missing_params:
280
+ # We do this because we want to inform users that an example is missing parameters for a scorer
281
+ # Example ID (usually random UUID) does not provide any helpful information for the user but printing the entire example is overdoing it
282
+ print(f"WARNING: Example {example.example_id} is missing the following parameters: {missing_params} for scorer {scorer.score_type.value}")
232
283
 
233
284
 
234
285
  def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[ScoringResult]:
@@ -253,7 +304,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
253
304
  Returns:
254
305
  List[ScoringResult]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult` object.
255
306
  """
256
-
307
+
257
308
  # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
258
309
  if not override and evaluation_run.log_results:
259
310
  check_eval_run_name_exists(
@@ -306,6 +357,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
306
357
 
307
358
  # Execute evaluation using Judgment API
308
359
  if judgment_scorers:
360
+ check_examples(evaluation_run.examples, evaluation_run.scorers)
309
361
  info("Starting API evaluation")
310
362
  debug(f"Creating API evaluation run with {len(judgment_scorers)} scorers")
311
363
  try: # execute an EvaluationRun with just JudgmentScorers
@@ -323,7 +375,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
323
375
  rules=evaluation_run.rules
324
376
  )
325
377
  debug("Sending request to Judgment API")
326
- response_data: List[Dict] = execute_api_eval(api_evaluation_run) # Dicts are `ScoringResult` objs
378
+ response_data: List[Dict] = run_with_spinner("Running Evaluation: ", execute_api_eval, api_evaluation_run)
327
379
  info(f"Received {len(response_data['results'])} results from API")
328
380
  except JudgmentAPIError as e:
329
381
  error(f"An error occurred while executing the Judgment API request: {str(e)}")
@@ -352,6 +404,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
352
404
  api_results.append(ScoringResult(**filtered_result))
353
405
  # Run local evals
354
406
  if local_scorers: # List[JudgevalScorer]
407
+ # We should be removing local scorers soon
355
408
  info("Starting local evaluation")
356
409
  for example in evaluation_run.examples:
357
410
  with example_logging_context(example.timestamp, example.example_id):
@@ -389,7 +442,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
389
442
  # )
390
443
 
391
444
  if evaluation_run.log_results:
392
- log_evaluation_results(merged_results, evaluation_run)
445
+ pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, merged_results, evaluation_run)
446
+ rprint(pretty_str)
393
447
 
394
448
  for i, result in enumerate(merged_results):
395
449
  if not result.scorers_data: # none of the scorers could be executed on this example
@@ -5,8 +5,9 @@ Scores `Example`s using ready-made Judgment evaluators.
5
5
  """
6
6
 
7
7
  from pydantic import BaseModel, field_validator
8
+ from typing import List
8
9
  from judgeval.common.logger import debug, info, warning, error
9
-
10
+ from judgeval.data import ExampleParams
10
11
  from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
11
12
 
12
13
 
@@ -20,6 +21,7 @@ class APIJudgmentScorer(BaseModel):
20
21
  """
21
22
  score_type: APIScorer
22
23
  threshold: float
24
+ required_params: List[ExampleParams] = [] # List of the required parameters on examples for the scorer
23
25
 
24
26
  @field_validator('threshold')
25
27
  def validate_threshold(cls, v, info):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class AnswerCorrectnessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_CORRECTNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.ANSWER_CORRECTNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.EXPECTED_OUTPUT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,18 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class AnswerRelevancyScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_RELEVANCY)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.ANSWER_RELEVANCY,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -9,12 +9,20 @@ TODO add link to docs page for this scorer
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
11
  from typing import Optional, Dict
12
-
12
+ from judgeval.data import ExampleParams
13
13
  class ComparisonScorer(APIJudgmentScorer):
14
14
  kwargs: Optional[Dict] = None
15
15
 
16
16
  def __init__(self, threshold: float, criteria: str, description: str):
17
- super().__init__(threshold=threshold, score_type=APIScorer.COMPARISON)
17
+ super().__init__(
18
+ threshold=threshold,
19
+ score_type=APIScorer.COMPARISON,
20
+ required_params=[
21
+ ExampleParams.INPUT,
22
+ ExampleParams.ACTUAL_OUTPUT,
23
+ ExampleParams.EXPECTED_OUTPUT,
24
+ ]
25
+ )
18
26
  self.kwargs = {"criteria": criteria, "description": description}
19
27
 
20
28
  @property
@@ -8,11 +8,20 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class ContextualPrecisionScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_PRECISION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.CONTEXTUAL_PRECISION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ExampleParams.EXPECTED_OUTPUT,
23
+ ]
24
+ )
16
25
 
17
26
  @property
18
27
  def __name__(self):
@@ -8,12 +8,21 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
+ from judgeval.data import ExampleParams
11
12
 
12
13
 
13
14
  class ContextualRecallScorer(APIJudgmentScorer):
14
15
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_RECALL)
16
-
16
+ super().__init__(
17
+ threshold=threshold,
18
+ score_type=APIScorer.CONTEXTUAL_RECALL,
19
+ required_params=[
20
+ ExampleParams.INPUT,
21
+ ExampleParams.ACTUAL_OUTPUT,
22
+ ExampleParams.EXPECTED_OUTPUT,
23
+ ExampleParams.RETRIEVAL_CONTEXT,
24
+ ]
25
+ )
17
26
  @property
18
27
  def __name__(self):
19
28
  return "Contextual Recall"
@@ -8,15 +8,22 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class ContextualRelevancyScorer(APIJudgmentScorer):
14
14
  """
15
15
  Scorer that checks if the output of a model is relevant to the retrieval context
16
16
  """
17
17
  def __init__(self, threshold: float):
18
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_RELEVANCY)
19
-
18
+ super().__init__(
19
+ threshold=threshold,
20
+ score_type=APIScorer.CONTEXTUAL_RELEVANCY,
21
+ required_params=[
22
+ ExampleParams.INPUT,
23
+ ExampleParams.ACTUAL_OUTPUT,
24
+ ExampleParams.RETRIEVAL_CONTEXT,
25
+ ]
26
+ )
20
27
  @property
21
28
  def __name__(self):
22
29
  return "Contextual Relevancy"
@@ -8,13 +8,21 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
- from typing import Optional, Dict
11
+ from typing import Optional, Dict, List
12
+ from judgeval.data import ExampleParams
12
13
 
13
14
  class ExecutionOrderScorer(APIJudgmentScorer):
14
15
  kwargs: Optional[Dict] = None
15
16
 
16
17
  def __init__(self, threshold: float, should_exact_match: bool = False, should_consider_ordering: bool = False):
17
- super().__init__(threshold=threshold, score_type=APIScorer.EXECUTION_ORDER)
18
+ super().__init__(
19
+ threshold=threshold,
20
+ score_type=APIScorer.EXECUTION_ORDER,
21
+ required_params=[
22
+ ExampleParams.ACTUAL_OUTPUT,
23
+ ExampleParams.EXPECTED_OUTPUT,
24
+ ]
25
+ )
18
26
  self.kwargs = {"should_exact_match": should_exact_match, "should_consider_ordering": should_consider_ordering}
19
27
 
20
28
  @property
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class FaithfulnessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.FAITHFULNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.FAITHFULNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class GroundednessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.GROUNDEDNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.GROUNDEDNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class HallucinationScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.HALLUCINATION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.HALLUCINATION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,18 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class InstructionAdherenceScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.INSTRUCTION_ADHERENCE)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.INSTRUCTION_ADHERENCE,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -11,13 +11,20 @@ from pydantic import BaseModel, Field
11
11
  # Internal imports
12
12
  from judgeval.scorers.api_scorer import APIJudgmentScorer
13
13
  from judgeval.constants import APIScorer
14
-
14
+ from judgeval.data import ExampleParams
15
15
 
16
16
  class JSONCorrectnessScorer(APIJudgmentScorer):
17
17
  json_schema: BaseModel = Field(None, exclude=True)
18
18
 
19
19
  def __init__(self, threshold: float, json_schema: BaseModel):
20
- super().__init__(threshold=threshold, score_type=APIScorer.JSON_CORRECTNESS)
20
+ super().__init__(
21
+ threshold=threshold,
22
+ score_type=APIScorer.JSON_CORRECTNESS,
23
+ required_params=[
24
+ ExampleParams.INPUT,
25
+ ExampleParams.ACTUAL_OUTPUT,
26
+ ]
27
+ )
21
28
  object.__setattr__(self, 'json_schema', json_schema)
22
29
 
23
30
  def to_dict(self):
@@ -7,12 +7,19 @@ TODO add link to docs page for this scorer
7
7
 
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
-
10
+ from judgeval.constants import APIScorer
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class SummarizationScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.SUMMARIZATION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.SUMMARIZATION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,9 +12,15 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: anthropic
14
14
  Requires-Dist: fastapi
15
+ Requires-Dist: langchain
16
+ Requires-Dist: langchain-anthropic
17
+ Requires-Dist: langchain-core
18
+ Requires-Dist: langchain-huggingface
19
+ Requires-Dist: langchain-openai
15
20
  Requires-Dist: litellm
16
21
  Requires-Dist: nest-asyncio
17
22
  Requires-Dist: openai
23
+ Requires-Dist: openpyxl
18
24
  Requires-Dist: pandas
19
25
  Requires-Dist: pika
20
26
  Requires-Dist: python-dotenv==1.0.1
@@ -23,8 +29,6 @@ Requires-Dist: supabase
23
29
  Requires-Dist: together
24
30
  Requires-Dist: uvicorn
25
31
  Provides-Extra: dev
26
- Requires-Dist: langfuse==2.50.3; extra == 'dev'
27
- Requires-Dist: patronus; extra == 'dev'
28
32
  Requires-Dist: pytest-asyncio>=0.25.0; extra == 'dev'
29
33
  Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
30
34
  Requires-Dist: pytest>=8.3.4; extra == 'dev'