judgeval 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/common/tracer.py CHANGED
@@ -10,6 +10,7 @@ import os
10
10
  import time
11
11
  import uuid
12
12
  import warnings
13
+ from contextvars import ContextVar
13
14
  from contextlib import contextmanager
14
15
  from collections import defaultdict
15
16
  from dataclasses import dataclass, field
@@ -37,6 +38,7 @@ from judgeval.constants import (
37
38
  RABBITMQ_PORT,
38
39
  RABBITMQ_QUEUE,
39
40
  JUDGMENT_TRACES_DELETE_API_URL,
41
+ JUDGMENT_PROJECT_DELETE_API_URL,
40
42
  JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL
41
43
  )
42
44
  from judgeval.judgment_client import JudgmentClient
@@ -54,7 +56,7 @@ from langchain_core.utils.function_calling import convert_to_openai_tool
54
56
  from langchain_core.callbacks import CallbackManager, BaseCallbackHandler
55
57
  from langchain_core.agents import AgentAction, AgentFinish
56
58
  from langchain_core.outputs import LLMResult
57
-
59
+ from langchain_core.tracers.context import register_configure_hook
58
60
  from langchain_core.messages.ai import AIMessage
59
61
  from langchain_core.messages.tool import ToolMessage
60
62
  from langchain_core.messages.base import BaseMessage
@@ -251,7 +253,8 @@ class TraceManagerClient:
251
253
  raise ValueError(f"Failed to save trace data: {response.text}")
252
254
 
253
255
  if not empty_save and "ui_results_url" in response.json():
254
- rprint(f"\n🔍 You can view your trace data here: [rgb(106,0,255)]{response.json()['ui_results_url']}[/]\n")
256
+ pretty_str = f"\n🔍 You can view your trace data here: [rgb(106,0,255)][link={response.json()['ui_results_url']}]View Trace[/link]\n"
257
+ rprint(pretty_str)
255
258
 
256
259
  def delete_trace(self, trace_id: str):
257
260
  """
@@ -294,6 +297,27 @@ class TraceManagerClient:
294
297
  raise ValueError(f"Failed to delete trace: {response.text}")
295
298
 
296
299
  return response.json()
300
+
301
+ def delete_project(self, project_name: str):
302
+ """
303
+ Deletes a project from the server. Which also deletes all evaluations and traces associated with the project.
304
+ """
305
+ response = requests.delete(
306
+ JUDGMENT_PROJECT_DELETE_API_URL,
307
+ json={
308
+ "project_name": project_name,
309
+ },
310
+ headers={
311
+ "Content-Type": "application/json",
312
+ "Authorization": f"Bearer {self.judgment_api_key}",
313
+ "X-Organization-Id": self.organization_id
314
+ }
315
+ )
316
+
317
+ if response.status_code != HTTPStatus.OK:
318
+ raise ValueError(f"Failed to delete traces: {response.text}")
319
+
320
+ return response.json()
297
321
 
298
322
 
299
323
  class TraceClient:
@@ -1152,3 +1176,18 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
1152
1176
  'args': str(messages),
1153
1177
  'kwargs': kwargs
1154
1178
  })
1179
+
1180
+ judgeval_callback_handler_var: ContextVar[Optional[JudgevalCallbackHandler]] = ContextVar(
1181
+ "judgeval_callback_handler", default=None
1182
+ )
1183
+
1184
+ def set_global_handler(handler: JudgevalCallbackHandler):
1185
+ judgeval_callback_handler_var.set(handler)
1186
+
1187
+ def clear_global_handler():
1188
+ judgeval_callback_handler_var.set(None)
1189
+
1190
+ register_configure_hook(
1191
+ context_var=judgeval_callback_handler_var,
1192
+ inheritable=True,
1193
+ )
judgeval/constants.py CHANGED
@@ -48,6 +48,7 @@ JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
48
48
  JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_eval_results/"
49
49
  JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_name/"
50
50
  JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
51
+ JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
51
52
  JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
52
53
  JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
53
54
  JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
@@ -27,7 +27,8 @@ from judgeval.judges import JudgevalJudge
27
27
  from judgeval.constants import (
28
28
  JUDGMENT_EVAL_FETCH_API_URL,
29
29
  JUDGMENT_EVAL_DELETE_API_URL,
30
- JUDGMENT_EVAL_DELETE_PROJECT_API_URL
30
+ JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
31
+ JUDGMENT_PROJECT_DELETE_API_URL
31
32
  )
32
33
  from judgeval.common.exceptions import JudgmentAPIError
33
34
  from pydantic import BaseModel
@@ -156,7 +157,7 @@ class JudgmentClient:
156
157
  metadata: Optional[Dict[str, Any]] = None,
157
158
  project_name: str = "",
158
159
  eval_run_name: str = "",
159
- log_results: bool = False,
160
+ log_results: bool = True,
160
161
  use_judgment: bool = True,
161
162
  rules: Optional[List[Rule]] = None
162
163
  ) -> List[ScoringResult]:
@@ -362,7 +363,6 @@ class JudgmentClient:
362
363
  response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
363
364
  json={
364
365
  "project_name": project_name,
365
- "judgment_api_key": self.judgment_api_key,
366
366
  },
367
367
  headers={
368
368
  "Content-Type": "application/json",
@@ -372,6 +372,23 @@ class JudgmentClient:
372
372
  if response.status_code != requests.codes.ok:
373
373
  raise ValueError(f"Error deleting eval results: {response.json()}")
374
374
  return response.json()
375
+
376
+ def delete_project(self, project_name: str) -> bool:
377
+ """
378
+ Deletes a project from the server. Which also deletes all evaluations and traces associated with the project.
379
+ """
380
+ response = requests.delete(JUDGMENT_PROJECT_DELETE_API_URL,
381
+ json={
382
+ "project_name": project_name,
383
+ },
384
+ headers={
385
+ "Content-Type": "application/json",
386
+ "Authorization": f"Bearer {self.judgment_api_key}",
387
+ "X-Organization-Id": self.organization_id
388
+ })
389
+ if response.status_code != requests.codes.ok:
390
+ raise ValueError(f"Error deleting project: {response.json()}")
391
+ return response.json()
375
392
 
376
393
  def _validate_api_key(self):
377
394
  """
@@ -1,12 +1,17 @@
1
1
  import asyncio
2
2
  import requests
3
- from typing import List, Dict
3
+ import time
4
+ import sys
5
+ import itertools
6
+ import threading
7
+ from typing import List, Dict, Any
4
8
  from datetime import datetime
5
9
  from rich import print as rprint
6
10
 
7
11
  from judgeval.data import (
8
12
  ScorerData,
9
- ScoringResult
13
+ ScoringResult,
14
+ Example
10
15
  )
11
16
  from judgeval.scorers import (
12
17
  JudgevalScorer,
@@ -14,7 +19,6 @@ from judgeval.scorers import (
14
19
  ClassifierScorer
15
20
  )
16
21
  from judgeval.scorers.score import a_execute_scoring
17
-
18
22
  from judgeval.constants import (
19
23
  ROOT_API,
20
24
  JUDGMENT_EVAL_API_URL,
@@ -185,7 +189,7 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
185
189
  raise JudgmentAPIError(f"Failed to check if eval run name exists: {str(e)}")
186
190
 
187
191
 
188
- def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> None:
192
+ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run: EvaluationRun) -> str:
189
193
  """
190
194
  Logs evaluation results to the Judgment API database.
191
195
 
@@ -220,7 +224,9 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
220
224
  raise JudgmentAPIError(error_message)
221
225
 
222
226
  if "ui_results_url" in res.json():
223
- rprint(f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)]{res.json()['ui_results_url']}[/]\n")
227
+ url = res.json()['ui_results_url']
228
+ pretty_str = f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
229
+ return pretty_str
224
230
 
225
231
  except requests.exceptions.RequestException as e:
226
232
  error(f"Request failed while saving evaluation results to DB: {str(e)}")
@@ -229,6 +235,51 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
229
235
  error(f"Failed to save evaluation results to DB: {str(e)}")
230
236
  raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
231
237
 
238
+ def run_with_spinner(message: str, func, *args, **kwargs) -> Any:
239
+ """Run a function with a spinner in the terminal."""
240
+ spinner = itertools.cycle(['|', '/', '-', '\\'])
241
+
242
+ def display_spinner():
243
+ while not stop_spinner_event.is_set():
244
+ sys.stdout.write(f'\r{message}{next(spinner)}')
245
+ sys.stdout.flush()
246
+ time.sleep(0.1)
247
+
248
+ stop_spinner_event = threading.Event()
249
+ spinner_thread = threading.Thread(target=display_spinner)
250
+ spinner_thread.start()
251
+
252
+ try:
253
+ result = func(*args, **kwargs)
254
+ except Exception as e:
255
+ error(f"An error occurred: {str(e)}")
256
+ stop_spinner_event.set()
257
+ spinner_thread.join()
258
+ raise e
259
+ finally:
260
+ stop_spinner_event.set()
261
+ spinner_thread.join()
262
+
263
+ sys.stdout.write('\r' + ' ' * (len(message) + 1) + '\r')
264
+ sys.stdout.flush()
265
+
266
+ return result
267
+
268
+ def check_examples(examples: List[Example], scorers: List[APIJudgmentScorer]) -> None:
269
+ """
270
+ Checks if the example contains the necessary parameters for the scorer.
271
+ """
272
+ for scorer in scorers:
273
+ if isinstance(scorer, APIJudgmentScorer):
274
+ for example in examples:
275
+ missing_params = []
276
+ for param in scorer.required_params:
277
+ if getattr(example, param.value) is None:
278
+ missing_params.append(f"'{param.value}'")
279
+ if missing_params:
280
+ # We do this because we want to inform users that an example is missing parameters for a scorer
281
+ # Example ID (usually random UUID) does not provide any helpful information for the user but printing the entire example is overdoing it
282
+ print(f"WARNING: Example {example.example_id} is missing the following parameters: {missing_params} for scorer {scorer.score_type.value}")
232
283
 
233
284
 
234
285
  def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[ScoringResult]:
@@ -253,7 +304,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
253
304
  Returns:
254
305
  List[ScoringResult]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult` object.
255
306
  """
256
-
307
+
257
308
  # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
258
309
  if not override and evaluation_run.log_results:
259
310
  check_eval_run_name_exists(
@@ -306,6 +357,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
306
357
 
307
358
  # Execute evaluation using Judgment API
308
359
  if judgment_scorers:
360
+ check_examples(evaluation_run.examples, evaluation_run.scorers)
309
361
  info("Starting API evaluation")
310
362
  debug(f"Creating API evaluation run with {len(judgment_scorers)} scorers")
311
363
  try: # execute an EvaluationRun with just JudgmentScorers
@@ -323,7 +375,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
323
375
  rules=evaluation_run.rules
324
376
  )
325
377
  debug("Sending request to Judgment API")
326
- response_data: List[Dict] = execute_api_eval(api_evaluation_run) # Dicts are `ScoringResult` objs
378
+ response_data: List[Dict] = run_with_spinner("Running Evaluation: ", execute_api_eval, api_evaluation_run)
327
379
  info(f"Received {len(response_data['results'])} results from API")
328
380
  except JudgmentAPIError as e:
329
381
  error(f"An error occurred while executing the Judgment API request: {str(e)}")
@@ -352,6 +404,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
352
404
  api_results.append(ScoringResult(**filtered_result))
353
405
  # Run local evals
354
406
  if local_scorers: # List[JudgevalScorer]
407
+ # We should be removing local scorers soon
355
408
  info("Starting local evaluation")
356
409
  for example in evaluation_run.examples:
357
410
  with example_logging_context(example.timestamp, example.example_id):
@@ -389,7 +442,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
389
442
  # )
390
443
 
391
444
  if evaluation_run.log_results:
392
- log_evaluation_results(merged_results, evaluation_run)
445
+ pretty_str = run_with_spinner("Logging Results: ", log_evaluation_results, merged_results, evaluation_run)
446
+ rprint(pretty_str)
393
447
 
394
448
  for i, result in enumerate(merged_results):
395
449
  if not result.scorers_data: # none of the scorers could be executed on this example
@@ -5,8 +5,9 @@ Scores `Example`s using ready-made Judgment evaluators.
5
5
  """
6
6
 
7
7
  from pydantic import BaseModel, field_validator
8
+ from typing import List
8
9
  from judgeval.common.logger import debug, info, warning, error
9
-
10
+ from judgeval.data import ExampleParams
10
11
  from judgeval.constants import APIScorer, UNBOUNDED_SCORERS
11
12
 
12
13
 
@@ -20,6 +21,7 @@ class APIJudgmentScorer(BaseModel):
20
21
  """
21
22
  score_type: APIScorer
22
23
  threshold: float
24
+ required_params: List[ExampleParams] = [] # List of the required parameters on examples for the scorer
23
25
 
24
26
  @field_validator('threshold')
25
27
  def validate_threshold(cls, v, info):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class AnswerCorrectnessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_CORRECTNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.ANSWER_CORRECTNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.EXPECTED_OUTPUT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,18 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class AnswerRelevancyScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.ANSWER_RELEVANCY)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.ANSWER_RELEVANCY,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -9,12 +9,20 @@ TODO add link to docs page for this scorer
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
11
  from typing import Optional, Dict
12
-
12
+ from judgeval.data import ExampleParams
13
13
  class ComparisonScorer(APIJudgmentScorer):
14
14
  kwargs: Optional[Dict] = None
15
15
 
16
16
  def __init__(self, threshold: float, criteria: str, description: str):
17
- super().__init__(threshold=threshold, score_type=APIScorer.COMPARISON)
17
+ super().__init__(
18
+ threshold=threshold,
19
+ score_type=APIScorer.COMPARISON,
20
+ required_params=[
21
+ ExampleParams.INPUT,
22
+ ExampleParams.ACTUAL_OUTPUT,
23
+ ExampleParams.EXPECTED_OUTPUT,
24
+ ]
25
+ )
18
26
  self.kwargs = {"criteria": criteria, "description": description}
19
27
 
20
28
  @property
@@ -8,11 +8,20 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class ContextualPrecisionScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_PRECISION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.CONTEXTUAL_PRECISION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ExampleParams.EXPECTED_OUTPUT,
23
+ ]
24
+ )
16
25
 
17
26
  @property
18
27
  def __name__(self):
@@ -8,12 +8,21 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
+ from judgeval.data import ExampleParams
11
12
 
12
13
 
13
14
  class ContextualRecallScorer(APIJudgmentScorer):
14
15
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_RECALL)
16
-
16
+ super().__init__(
17
+ threshold=threshold,
18
+ score_type=APIScorer.CONTEXTUAL_RECALL,
19
+ required_params=[
20
+ ExampleParams.INPUT,
21
+ ExampleParams.ACTUAL_OUTPUT,
22
+ ExampleParams.EXPECTED_OUTPUT,
23
+ ExampleParams.RETRIEVAL_CONTEXT,
24
+ ]
25
+ )
17
26
  @property
18
27
  def __name__(self):
19
28
  return "Contextual Recall"
@@ -8,15 +8,22 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class ContextualRelevancyScorer(APIJudgmentScorer):
14
14
  """
15
15
  Scorer that checks if the output of a model is relevant to the retrieval context
16
16
  """
17
17
  def __init__(self, threshold: float):
18
- super().__init__(threshold=threshold, score_type=APIScorer.CONTEXTUAL_RELEVANCY)
19
-
18
+ super().__init__(
19
+ threshold=threshold,
20
+ score_type=APIScorer.CONTEXTUAL_RELEVANCY,
21
+ required_params=[
22
+ ExampleParams.INPUT,
23
+ ExampleParams.ACTUAL_OUTPUT,
24
+ ExampleParams.RETRIEVAL_CONTEXT,
25
+ ]
26
+ )
20
27
  @property
21
28
  def __name__(self):
22
29
  return "Contextual Relevancy"
@@ -8,13 +8,21 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
- from typing import Optional, Dict
11
+ from typing import Optional, Dict, List
12
+ from judgeval.data import ExampleParams
12
13
 
13
14
  class ExecutionOrderScorer(APIJudgmentScorer):
14
15
  kwargs: Optional[Dict] = None
15
16
 
16
17
  def __init__(self, threshold: float, should_exact_match: bool = False, should_consider_ordering: bool = False):
17
- super().__init__(threshold=threshold, score_type=APIScorer.EXECUTION_ORDER)
18
+ super().__init__(
19
+ threshold=threshold,
20
+ score_type=APIScorer.EXECUTION_ORDER,
21
+ required_params=[
22
+ ExampleParams.ACTUAL_OUTPUT,
23
+ ExampleParams.EXPECTED_OUTPUT,
24
+ ]
25
+ )
18
26
  self.kwargs = {"should_exact_match": should_exact_match, "should_consider_ordering": should_consider_ordering}
19
27
 
20
28
  @property
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class FaithfulnessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.FAITHFULNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.FAITHFULNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class GroundednessScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.GROUNDEDNESS)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.GROUNDEDNESS,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.RETRIEVAL_CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,19 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class HallucinationScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.HALLUCINATION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.HALLUCINATION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ExampleParams.CONTEXT,
22
+ ]
23
+ )
16
24
 
17
25
  @property
18
26
  def __name__(self):
@@ -8,11 +8,18 @@ TODO add link to docs page for this scorer
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
10
  from judgeval.constants import APIScorer
11
-
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class InstructionAdherenceScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.INSTRUCTION_ADHERENCE)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.INSTRUCTION_ADHERENCE,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -11,13 +11,20 @@ from pydantic import BaseModel, Field
11
11
  # Internal imports
12
12
  from judgeval.scorers.api_scorer import APIJudgmentScorer
13
13
  from judgeval.constants import APIScorer
14
-
14
+ from judgeval.data import ExampleParams
15
15
 
16
16
  class JSONCorrectnessScorer(APIJudgmentScorer):
17
17
  json_schema: BaseModel = Field(None, exclude=True)
18
18
 
19
19
  def __init__(self, threshold: float, json_schema: BaseModel):
20
- super().__init__(threshold=threshold, score_type=APIScorer.JSON_CORRECTNESS)
20
+ super().__init__(
21
+ threshold=threshold,
22
+ score_type=APIScorer.JSON_CORRECTNESS,
23
+ required_params=[
24
+ ExampleParams.INPUT,
25
+ ExampleParams.ACTUAL_OUTPUT,
26
+ ]
27
+ )
21
28
  object.__setattr__(self, 'json_schema', json_schema)
22
29
 
23
30
  def to_dict(self):
@@ -7,12 +7,19 @@ TODO add link to docs page for this scorer
7
7
 
8
8
  # Internal imports
9
9
  from judgeval.scorers.api_scorer import APIJudgmentScorer
10
- from judgeval.constants import APIScorer
11
-
10
+ from judgeval.constants import APIScorer
11
+ from judgeval.data import ExampleParams
12
12
 
13
13
  class SummarizationScorer(APIJudgmentScorer):
14
14
  def __init__(self, threshold: float):
15
- super().__init__(threshold=threshold, score_type=APIScorer.SUMMARIZATION)
15
+ super().__init__(
16
+ threshold=threshold,
17
+ score_type=APIScorer.SUMMARIZATION,
18
+ required_params=[
19
+ ExampleParams.INPUT,
20
+ ExampleParams.ACTUAL_OUTPUT,
21
+ ]
22
+ )
16
23
 
17
24
  @property
18
25
  def __name__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.20
3
+ Version: 0.0.21
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,9 +12,15 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: anthropic
14
14
  Requires-Dist: fastapi
15
+ Requires-Dist: langchain
16
+ Requires-Dist: langchain-anthropic
17
+ Requires-Dist: langchain-core
18
+ Requires-Dist: langchain-huggingface
19
+ Requires-Dist: langchain-openai
15
20
  Requires-Dist: litellm
16
21
  Requires-Dist: nest-asyncio
17
22
  Requires-Dist: openai
23
+ Requires-Dist: openpyxl
18
24
  Requires-Dist: pandas
19
25
  Requires-Dist: pika
20
26
  Requires-Dist: python-dotenv==1.0.1
@@ -23,8 +29,6 @@ Requires-Dist: supabase
23
29
  Requires-Dist: together
24
30
  Requires-Dist: uvicorn
25
31
  Provides-Extra: dev
26
- Requires-Dist: langfuse==2.50.3; extra == 'dev'
27
- Requires-Dist: patronus; extra == 'dev'
28
32
  Requires-Dist: pytest-asyncio>=0.25.0; extra == 'dev'
29
33
  Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
30
34
  Requires-Dist: pytest>=8.3.4; extra == 'dev'
@@ -1,14 +1,14 @@
1
1
  judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
2
2
  judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=i8JIDUyo38Vt0R1n0GRA4FaakkBC5F2o4hQa0ncSF2E,5008
3
+ judgeval/constants.py,sha256=VhJppAECTUDQwzC_FpzJw2wPlkYoogsadHxaJIY_J8U,5073
4
4
  judgeval/evaluation_run.py,sha256=RgJD60lJsunNQzObjo7iXnAzXWgubCLOAAuuamAAuoI,6354
5
- judgeval/judgment_client.py,sha256=evlvcrYO9pF-oCgcvlGE59iODN0C6GJtn7bySFU_88k,23384
5
+ judgeval/judgment_client.py,sha256=5lqp9X67qPzBUu7kQYETslsc3L5JjxrDVgVLslF07A0,24173
6
6
  judgeval/rules.py,sha256=ebsiDEBVAnYTQxwVNvh_RpmKeWBnjQXgHs8KofTjcAs,15526
7
- judgeval/run_evaluation.py,sha256=yLW24kFcw0xzXHvnDclYqtujTww6SDwvut6HM1x7SXk,21505
7
+ judgeval/run_evaluation.py,sha256=YOzkyeWl-r3vaz0jB5nM-1VULi7ALmJ9_f58ENqexXk,23827
8
8
  judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
9
9
  judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
10
10
  judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
11
- judgeval/common/tracer.py,sha256=FYrAuav6OiiawHLQ2e154MLvCBMdh-z_ucU2h7XK08M,45295
11
+ judgeval/common/tracer.py,sha256=WFjFNf3NZ2BN8UAu2MG0F3Om9LgJNma3m_GrxyXgJqE,46655
12
12
  judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
13
13
  judgeval/data/__init__.py,sha256=QykVE22Qf-I2f1g-jC9-iQyLNXgDmX1-vHbCgZg8Ra8,558
14
14
  judgeval/data/api_example.py,sha256=dzkrQ0xno08y6qNfqL2djXbapUyc2B2aQ5iANn0o4CY,3667
@@ -27,7 +27,7 @@ judgeval/judges/mixture_of_judges.py,sha256=IJoi4Twk8ze1CJWVEp69k6TSqTCTGrmVYQ0q
27
27
  judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
28
28
  judgeval/judges/utils.py,sha256=9lvUxziGV86ISvVFxYBWc09TWFyAQgUTyPf_a9mD5Rs,2686
29
29
  judgeval/scorers/__init__.py,sha256=gkeKJvjXhswCnkEyjijrVvGVM3Om86egrZ-PUOGvNvI,1158
30
- judgeval/scorers/api_scorer.py,sha256=wGqTQCbUE7uE-PzaKcCmexAqutdTunjFR0zVA6bUxdE,2518
30
+ judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
31
31
  judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1mC0,2183
32
32
  judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
33
33
  judgeval/scorers/judgeval_scorer.py,sha256=oIkfoGXA09wL_vcK1DRibzQSA-MFNa-hmw1IhGBErf8,6592
@@ -36,19 +36,19 @@ judgeval/scorers/score.py,sha256=GALVmeApP1Cyih2vY93zRaU6RShtW4jJDG47Pm6yfnw,186
36
36
  judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
37
37
  judgeval/scorers/judgeval_scorers/__init__.py,sha256=xFRb62sp4JmBUSeuAB_pC_7kEGp-lGdqCRIu9--Bbdg,5992
38
38
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mZ6b_5Dl04k3PaG24ICBajB_j43ody1II1OJhO1DkXo,1648
39
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=690G5askjE8dcbKPGvCF6JxAEM9QJUqb-3K-D6lI6oM,463
40
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=CqvvjV7AZqPlXh-PZaPKYPILHr15u4bIYiKBFjlk5i0,457
41
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=6Q1qbsANOoZ3PM8n_gtZLIMbTBB9879L3acRelNJ6Uk,1001
42
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=2zBrm_EEc143bmPA4HVcf8XtQeuc_BexczGx-SHlwRY,473
43
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=NyojBWy_lRYx8diREulSK8s9dfYdZav4eZjg3TwUm0M,461
44
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=wROMWOliCnB39ftX9TdeZmG9y0vrnxIGVby65tLOQRU,574
45
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=qxnvEDeKRlyzxX3EX53sW4oXxAM8Fj_q6ibdTxJNTAc,1076
46
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=gNf_i5c0jjpz2zCGhe7TtDMLKxc1PdOExJMFB5X7hSg,442
47
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=esO76hEp0NzeBUdoSICPLdx5AeA5zWSt_2zpcSgvGis,442
48
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=ffYwH3CexPkKgo1rCALMivypROQjG5WWEsKXEFZxe2k,446
49
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=t1lWYOF0Pxvw5-NrI1Dt9FojaOncOCRlZc4a2SA20h4,477
50
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=CAZBQKwNSqpqAoOgStYfr-yP1Brug_6VRimRIQY-zdg,894
51
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=-E3oxYbI0D_0q-_fGWh2jQHW9O4Pu7I7xvLWsHU6cn8,450
39
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=Fnd9CVIOZ73sWEWymsU5eBrrZqPFjMZ0BKpeW-PDyTg,711
40
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=oETeN9K0HSIRdL2SDqn82Vskpwh5SlKnZvs5VDm2OBU,658
41
+ judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=kuzf9OWvpY38yYSwlBgneLkUZwJNM4FQqvbS66keA90,1249
42
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=tpSuzFAaW8X9xqA0aLLKwh7qmBK0Pc_bJZMIe_q412U,770
43
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=pFVhk4pLtQ-FnNlbI-dFF-SIh69Jza7erHqiPkFWoBo,758
44
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=RQ6DZwEhChfecd89Ey-T7ke--7qTaXZlRsNxwH8gaME,823
45
+ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=Pb3CiNF2Ca826B92wJCVAi_68lJjLhqqCKwQKaflSUg,1294
46
+ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=-BwOapqjryYNKNydtdkUiKIij76dY0O1jBmdc6dKazQ,692
47
+ judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=ntEEeTANEOsGlcbiTAF_3r6BeSJEaVDns8po8T0L6Vg,692
48
+ judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=k5gDOki-8KXrZXydvdSqDt3NZqQ28hXoOCHQf6jNxr4,686
49
+ judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=XnSGEkQfwVqaqnHEGMCsxNiHVzrsrej48uDbLoWc8CQ,678
50
+ judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=mMKEuR87_yanEuZJ5YSGFMHDD_oLVZ6-rQuciFaDOMA,1095
51
+ judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=QmWB8bVbDYHY5FcF0rYZE_3c2XXgMLRmR6aXJWfdMC4,655
52
52
  judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
53
53
  judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
54
54
  judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
@@ -88,7 +88,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py
88
88
  judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
89
89
  judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
90
90
  judgeval/utils/alerts.py,sha256=RgW5R9Dn3Jtim0OyAYDbNzjoX2s6SA4Mw16GyyaikjI,1424
91
- judgeval-0.0.20.dist-info/METADATA,sha256=cz7uKUuHAc1rdANc8IJ5klQhlmrqOu_K1y6wwEIAdFU,1283
92
- judgeval-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- judgeval-0.0.20.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
94
- judgeval-0.0.20.dist-info/RECORD,,
91
+ judgeval-0.0.21.dist-info/METADATA,sha256=jQW4w6jGNaHvPWTcqX3ZGr_SKeCpNl7DsNr-cwrYHsA,1378
92
+ judgeval-0.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ judgeval-0.0.21.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
94
+ judgeval-0.0.21.dist-info/RECORD,,