judgeval 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ """Local evaluation queue for batching custom scorer evaluations.
2
+
3
+ This module provides a simple in-memory queue for EvaluationRun objects that contain
4
+ only local (BaseScorer) scorers. Useful for batching evaluations and processing them
5
+ either synchronously or in a background thread.
6
+ """
7
+
8
+ import queue
9
+ import threading
10
+ from typing import Callable, List, Optional
11
+ import time
12
+
13
+ from judgeval.common.logger import judgeval_logger
14
+ from judgeval.constants import MAX_CONCURRENT_EVALUATIONS
15
+ from judgeval.data import ScoringResult
16
+ from judgeval.evaluation_run import EvaluationRun
17
+ from judgeval.utils.async_utils import safe_run_async
18
+ from judgeval.scorers import BaseScorer
19
+ from judgeval.scorers.score import a_execute_scoring
20
+
21
+
22
+ class LocalEvaluationQueue:
23
+ """Lightweight in-memory queue for local evaluation runs.
24
+
25
+ Only supports EvaluationRuns with local scorers (BaseScorer instances).
26
+ API scorers (APIScorerConfig) are not supported as they have their own queue.
27
+ """
28
+
29
+ def __init__(
30
+ self, max_concurrent: int = MAX_CONCURRENT_EVALUATIONS, num_workers: int = 4
31
+ ):
32
+ if num_workers <= 0:
33
+ raise ValueError("num_workers must be a positive integer.")
34
+ self._queue: queue.Queue[Optional[EvaluationRun]] = queue.Queue()
35
+ self._max_concurrent = max_concurrent
36
+ self._num_workers = num_workers # Number of worker threads
37
+ self._worker_threads: List[threading.Thread] = []
38
+ self._shutdown_event = threading.Event()
39
+
40
+ def enqueue(self, evaluation_run: EvaluationRun) -> None:
41
+ """Add evaluation run to the queue."""
42
+ self._queue.put(evaluation_run)
43
+
44
+ def _process_run(self, evaluation_run: EvaluationRun) -> List[ScoringResult]:
45
+ """Execute evaluation run locally and return results."""
46
+ local_scorers = [s for s in evaluation_run.scorers if isinstance(s, BaseScorer)]
47
+
48
+ if not local_scorers:
49
+ raise ValueError(
50
+ "LocalEvaluationQueue only supports runs with local scorers (BaseScorer). "
51
+ "Found only APIScorerConfig instances."
52
+ )
53
+
54
+ return safe_run_async(
55
+ a_execute_scoring(
56
+ evaluation_run.examples,
57
+ local_scorers,
58
+ model=evaluation_run.model,
59
+ throttle_value=0,
60
+ max_concurrent=self._max_concurrent // self._num_workers,
61
+ show_progress=False,
62
+ )
63
+ )
64
+
65
+ def run_all(
66
+ self,
67
+ callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
68
+ ) -> None:
69
+ """Process all queued runs synchronously.
70
+
71
+ Args:
72
+ callback: Optional function called after each run with (run, results).
73
+ """
74
+ while not self._queue.empty():
75
+ run = self._queue.get()
76
+ if run is None: # Sentinel for worker shutdown
77
+ self._queue.put(None)
78
+ break
79
+ results = self._process_run(run)
80
+ if callback:
81
+ callback(run, results)
82
+ self._queue.task_done()
83
+
84
+ def start_workers(
85
+ self,
86
+ callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
87
+ ) -> List[threading.Thread]:
88
+ """Start multiple background threads to process runs in parallel.
89
+
90
+ Args:
91
+ callback: Optional function called after each run with (run, results).
92
+
93
+ Returns:
94
+ List of started worker threads.
95
+ """
96
+
97
+ def _worker(worker_id: int) -> None:
98
+ while not self._shutdown_event.is_set():
99
+ try:
100
+ # Use timeout so workers can check shutdown event periodically
101
+ run = self._queue.get(timeout=1.0)
102
+ if run is None: # Sentinel to stop worker
103
+ # Put sentinel back for other workers
104
+ self._queue.put(None)
105
+ self._queue.task_done()
106
+ break
107
+
108
+ try:
109
+ results = self._process_run(run)
110
+ if callback:
111
+ callback(run, results)
112
+ except Exception as exc:
113
+ judgeval_logger.error(
114
+ f"Worker {worker_id} error processing {run.eval_name}: {exc}"
115
+ )
116
+ # Continue processing other runs instead of shutting down all workers
117
+ finally:
118
+ self._queue.task_done()
119
+
120
+ except queue.Empty:
121
+ # Timeout - check shutdown event and continue
122
+ continue
123
+
124
+ # Start worker threads
125
+ for i in range(self._num_workers):
126
+ thread = threading.Thread(target=_worker, args=(i,), daemon=True)
127
+ thread.start()
128
+ self._worker_threads.append(thread)
129
+
130
+ return self._worker_threads
131
+
132
+ def start_worker(
133
+ self,
134
+ callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
135
+ ) -> Optional[threading.Thread]:
136
+ """Start a single background thread to process runs (backward compatibility).
137
+
138
+ Args:
139
+ callback: Optional function called after each run with (run, results).
140
+
141
+ Returns:
142
+ The started thread, or None if no threads were started.
143
+ """
144
+ threads = self.start_workers(callback)
145
+ return threads[0] if threads else None
146
+
147
+ def wait_for_completion(self, timeout: Optional[float] = None) -> bool:
148
+ """Wait for all queued tasks to complete.
149
+
150
+ Args:
151
+ timeout: Maximum time to wait in seconds. None means wait indefinitely.
152
+
153
+ Returns:
154
+ True if all tasks completed, False if timeout occurred.
155
+ """
156
+ try:
157
+ if timeout is None:
158
+ self._queue.join()
159
+ return True
160
+ else:
161
+ start_time = time.time()
162
+ while not self._queue.empty() or self._queue.unfinished_tasks > 0:
163
+ if time.time() - start_time > timeout:
164
+ return False
165
+ time.sleep(0.1)
166
+ return True
167
+ except Exception:
168
+ return False
169
+
170
+ def stop_workers(self) -> None:
171
+ """Signal all background workers to stop after current tasks complete."""
172
+ if not self._worker_threads:
173
+ return
174
+
175
+ # Signal shutdown
176
+ self._shutdown_event.set()
177
+
178
+ # Send sentinel to wake up any blocking workers
179
+ for _ in range(self._num_workers):
180
+ self._queue.put(None)
181
+
182
+ # Wait for all workers to finish with timeout
183
+ for thread in self._worker_threads:
184
+ if thread.is_alive():
185
+ thread.join(timeout=5.0)
186
+ if thread.is_alive():
187
+ judgeval_logger.warning(
188
+ f"Worker thread {thread.name} did not shut down gracefully"
189
+ )
190
+
191
+ self._worker_threads.clear()
192
+ self._shutdown_event.clear()
@@ -1,10 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  import asyncio
2
4
  import concurrent.futures
3
5
  import time
4
6
  import orjson
5
7
  import sys
6
8
  import threading
7
- from typing import List, Dict, Union, Optional, Callable, Tuple, Any
9
+ from typing import List, Dict, Union, Optional, Callable, Tuple, Any, TYPE_CHECKING
8
10
  from rich import print as rprint
9
11
 
10
12
  from judgeval.data import ScorerData, ScoringResult, Example, Trace
@@ -17,10 +19,13 @@ from judgeval.constants import (
17
19
  from judgeval.common.exceptions import JudgmentAPIError
18
20
  from judgeval.common.api.api import JudgmentAPIException
19
21
  from judgeval.common.logger import judgeval_logger
20
- from judgeval.evaluation_run import EvaluationRun
21
- from judgeval.data.trace_run import TraceRun
22
- from judgeval.common.tracer import Tracer
23
- from judgeval.integrations.langgraph import JudgevalCallbackHandler
22
+
23
+
24
+ if TYPE_CHECKING:
25
+ from judgeval.common.tracer import Tracer
26
+ from judgeval.data.trace_run import TraceRun
27
+ from judgeval.evaluation_run import EvaluationRun
28
+ from judgeval.integrations.langgraph import JudgevalCallbackHandler
24
29
 
25
30
 
26
31
  def safe_run_async(coro):
@@ -282,7 +287,7 @@ def run_trace_eval(
282
287
  judgment_api_key: str,
283
288
  override: bool = False,
284
289
  function: Optional[Callable] = None,
285
- tracer: Optional[Union[Tracer, JudgevalCallbackHandler]] = None,
290
+ tracer: Optional[Union[Tracer, "JudgevalCallbackHandler"]] = None,
286
291
  examples: Optional[List[Example]] = None,
287
292
  ) -> List[ScoringResult]:
288
293
  # Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
@@ -1,27 +1,29 @@
1
1
  from judgeval.scorers.api_scorer import APIScorerConfig
2
2
  from judgeval.constants import APIScorerType
3
- from typing import Mapping, Dict, Any
3
+ from typing import Dict, Any, Optional
4
4
  from judgeval.common.api import JudgmentApiClient, JudgmentAPIException
5
5
  import os
6
6
  from judgeval.common.exceptions import JudgmentAPIError
7
+ from copy import copy
8
+ from judgeval.common.logger import judgeval_logger
7
9
 
8
10
 
9
11
  def push_prompt_scorer(
10
12
  name: str,
11
13
  prompt: str,
12
- options: Mapping[str, float],
14
+ options: Optional[Dict[str, float]] = None,
13
15
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
14
16
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
15
17
  ) -> str:
16
18
  client = JudgmentApiClient(judgment_api_key, organization_id)
17
19
  try:
18
- r = client.save_scorer(name, prompt, dict(options))
20
+ r = client.save_scorer(name, prompt, options)
19
21
  except JudgmentAPIException as e:
20
22
  if e.status_code == 500:
21
23
  raise JudgmentAPIError(
22
24
  f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}"
23
25
  )
24
- raise JudgmentAPIError(f"Failed to save classifier scorer: {e.error_detail}")
26
+ raise JudgmentAPIError(f"Failed to save prompt scorer: {e.error_detail}")
25
27
  return r["name"]
26
28
 
27
29
 
@@ -32,7 +34,7 @@ def fetch_prompt_scorer(
32
34
  ):
33
35
  client = JudgmentApiClient(judgment_api_key, organization_id)
34
36
  try:
35
- scorer_config = client.fetch_scorer(name)
37
+ scorer_config = client.fetch_scorer(name)["scorer"]
36
38
  scorer_config.pop("created_at")
37
39
  scorer_config.pop("updated_at")
38
40
  return scorer_config
@@ -42,7 +44,7 @@ def fetch_prompt_scorer(
42
44
  f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}"
43
45
  )
44
46
  raise JudgmentAPIError(
45
- f"Failed to fetch classifier scorer '{name}': {e.error_detail}"
47
+ f"Failed to fetch prompt scorer '{name}': {e.error_detail}"
46
48
  )
47
49
 
48
50
 
@@ -72,7 +74,7 @@ class PromptScorer(APIScorerConfig):
72
74
  """
73
75
 
74
76
  prompt: str
75
- options: Mapping[str, float]
77
+ options: Optional[Dict[str, float]] = None
76
78
  score_type: APIScorerType = APIScorerType.PROMPT_SCORER
77
79
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
78
80
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
@@ -88,7 +90,7 @@ class PromptScorer(APIScorerConfig):
88
90
  return cls(
89
91
  name=name,
90
92
  prompt=scorer_config["prompt"],
91
- options=scorer_config["options"],
93
+ options=scorer_config.get("options"),
92
94
  judgment_api_key=judgment_api_key,
93
95
  organization_id=organization_id,
94
96
  )
@@ -98,12 +100,13 @@ class PromptScorer(APIScorerConfig):
98
100
  cls,
99
101
  name: str,
100
102
  prompt: str,
101
- options: Mapping[str, float],
103
+ options: Optional[Dict[str, float]] = None,
102
104
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or "",
103
105
  organization_id: str = os.getenv("JUDGMENT_ORG_ID") or "",
104
106
  ):
105
107
  if not scorer_exists(name, judgment_api_key, organization_id):
106
108
  push_prompt_scorer(name, prompt, options, judgment_api_key, organization_id)
109
+ judgeval_logger.info(f"Successfully created PromptScorer: {name}")
107
110
  return cls(
108
111
  name=name,
109
112
  prompt=prompt,
@@ -117,13 +120,6 @@ class PromptScorer(APIScorerConfig):
117
120
  )
118
121
 
119
122
  # Setter functions. Each setter function pushes the scorer to the DB.
120
- def set_name(self, name: str):
121
- """
122
- Updates the name of the scorer.
123
- """
124
- self.name = name
125
- self.push_prompt_scorer()
126
-
127
123
  def set_threshold(self, threshold: float):
128
124
  """
129
125
  Updates the threshold of the scorer.
@@ -140,8 +136,9 @@ class PromptScorer(APIScorerConfig):
140
136
  """
141
137
  self.prompt = prompt
142
138
  self.push_prompt_scorer()
139
+ judgeval_logger.info(f"Successfully updated prompt for {self.name}")
143
140
 
144
- def set_options(self, options: Mapping[str, float]):
141
+ def set_options(self, options: Dict[str, float]):
145
142
  """
146
143
  Updates the options with the new options.
147
144
 
@@ -150,6 +147,7 @@ class PromptScorer(APIScorerConfig):
150
147
  """
151
148
  self.options = options
152
149
  self.push_prompt_scorer()
150
+ judgeval_logger.info(f"Successfully updated options for {self.name}")
153
151
 
154
152
  def append_to_prompt(self, prompt_addition: str):
155
153
  """
@@ -157,6 +155,7 @@ class PromptScorer(APIScorerConfig):
157
155
  """
158
156
  self.prompt += prompt_addition
159
157
  self.push_prompt_scorer()
158
+ judgeval_logger.info(f"Successfully appended to prompt for {self.name}")
160
159
 
161
160
  # Getters
162
161
  def get_prompt(self) -> str | None:
@@ -165,11 +164,11 @@ class PromptScorer(APIScorerConfig):
165
164
  """
166
165
  return self.prompt
167
166
 
168
- def get_options(self) -> Mapping[str, float] | None:
167
+ def get_options(self) -> Dict[str, float] | None:
169
168
  """
170
169
  Returns the options of the scorer.
171
170
  """
172
- return self.options
171
+ return copy(self.options) if self.options is not None else None
173
172
 
174
173
  def get_name(self) -> str | None:
175
174
  """
judgeval/scorers/score.py CHANGED
@@ -17,6 +17,7 @@ from judgeval.scorers import BaseScorer
17
17
  from judgeval.scorers.utils import clone_scorers
18
18
  from judgeval.common.logger import judgeval_logger
19
19
  from judgeval.judges import JudgevalJudge
20
+ from judgeval.constants import DEFAULT_GPT_MODEL
20
21
 
21
22
 
22
23
  async def safe_a_score_example(
@@ -48,16 +49,18 @@ async def safe_a_score_example(
48
49
  judgeval_logger.error(f"Error during scoring: {str(e)}")
49
50
  scorer.error = str(e)
50
51
  scorer.success = False
52
+ scorer.score = 0
51
53
  return
52
54
 
53
55
 
54
56
  async def a_execute_scoring(
55
57
  examples: List[Example],
56
58
  scorers: List[BaseScorer],
57
- model: Optional[Union[str, List[str], JudgevalJudge]] = "gpt-4.1",
59
+ model: Optional[Union[str, List[str], JudgevalJudge]] = DEFAULT_GPT_MODEL,
58
60
  ignore_errors: bool = False,
59
61
  throttle_value: int = 0,
60
62
  max_concurrent: int = 100,
63
+ show_progress: bool = True,
61
64
  ) -> List[ScoringResult]:
62
65
  """
63
66
  Executes evaluations of `Example`s asynchronously using one or more `BaseScorer`s.
@@ -70,8 +73,7 @@ async def a_execute_scoring(
70
73
  ignore_errors (bool): Whether to ignore errors during evaluation.
71
74
  throttle_value (int): The amount of time to wait between starting each task.
72
75
  max_concurrent (int): The maximum number of concurrent tasks.
73
-
74
- _use_bar_indicator (bool): Whether to use a progress bar indicator.
76
+ show_progress (bool): Whether to show the progress bar indicator.
75
77
 
76
78
  Returns:
77
79
  List[ScoringResult]: A list of `ScoringResult` objects containing the evaluation results.
@@ -100,16 +102,37 @@ async def a_execute_scoring(
100
102
  tasks = []
101
103
  cloned_scorers: List[BaseScorer]
102
104
 
103
- with tqdm_asyncio(
104
- desc=f"Evaluating {len(examples)} example(s) in parallel",
105
- unit="Example",
106
- total=len(examples),
107
- bar_format="{desc}: |{bar}|{percentage:3.0f}% ({n_fmt}/{total_fmt}) [Time Taken: {elapsed}, {rate_fmt}{postfix}]",
108
- ) as pbar:
105
+ if show_progress:
106
+ with tqdm_asyncio(
107
+ desc=f"Evaluating {len(examples)} example(s) in parallel",
108
+ unit="Example",
109
+ total=len(examples),
110
+ bar_format="{desc}: |{bar}|{percentage:3.0f}% ({n_fmt}/{total_fmt}) [Time Taken: {elapsed}, {rate_fmt}{postfix}]",
111
+ ) as pbar:
112
+ for i, ex in enumerate(examples):
113
+ if isinstance(ex, Example):
114
+ if len(scorers) == 0:
115
+ pbar.update(1)
116
+ continue
117
+
118
+ cloned_scorers = clone_scorers(scorers)
119
+ task = execute_with_semaphore(
120
+ func=a_eval_examples_helper,
121
+ scorers=cloned_scorers,
122
+ example=ex,
123
+ scoring_results=scoring_results,
124
+ score_index=i,
125
+ ignore_errors=ignore_errors,
126
+ pbar=pbar,
127
+ )
128
+ tasks.append(asyncio.create_task(task))
129
+
130
+ await asyncio.sleep(throttle_value)
131
+ await asyncio.gather(*tasks)
132
+ else:
109
133
  for i, ex in enumerate(examples):
110
134
  if isinstance(ex, Example):
111
135
  if len(scorers) == 0:
112
- pbar.update(1)
113
136
  continue
114
137
 
115
138
  cloned_scorers = clone_scorers(scorers)
@@ -120,7 +143,7 @@ async def a_execute_scoring(
120
143
  scoring_results=scoring_results,
121
144
  score_index=i,
122
145
  ignore_errors=ignore_errors,
123
- pbar=pbar,
146
+ pbar=None,
124
147
  )
125
148
  tasks.append(asyncio.create_task(task))
126
149
 
@@ -0,0 +1,36 @@
1
+ """Async utilities for judgeval."""
2
+
3
+ import asyncio
4
+ import concurrent.futures
5
+ from typing import Awaitable, TypeVar
6
+
7
+
8
+ # Generic type variable for coroutine return type
9
+ T = TypeVar("T")
10
+
11
+
12
+ def safe_run_async(coro: Awaitable[T]) -> T: # type: ignore[type-var]
13
+ """Safely execute an async *coro* from synchronous code.
14
+
15
+ This helper handles two common situations:
16
+
17
+ 1. **No running event loop** – Simply delegates to ``asyncio.run``.
18
+ 2. **Existing running loop** – Executes the coroutine in a separate
19
+ thread so that we don't attempt to nest event loops (which would raise
20
+ ``RuntimeError``).
21
+
22
+ Args:
23
+ coro: The coroutine to execute.
24
+
25
+ Returns:
26
+ The result returned by *coro*.
27
+ """
28
+
29
+ try:
30
+ asyncio.get_running_loop()
31
+ except RuntimeError:
32
+ return asyncio.run(coro)
33
+
34
+ with concurrent.futures.ThreadPoolExecutor() as executor:
35
+ future = executor.submit(lambda: asyncio.run(coro))
36
+ return future.result()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.3.2
3
+ Version: 0.5.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -10,27 +10,24 @@ License-File: LICENSE.md
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
- Requires-Dist: anthropic
14
13
  Requires-Dist: boto3
15
- Requires-Dist: datamodel-code-generator>=0.31.1
16
- Requires-Dist: google-genai
17
- Requires-Dist: groq>=0.30.0
18
14
  Requires-Dist: langchain-anthropic
19
15
  Requires-Dist: langchain-core
20
16
  Requires-Dist: langchain-huggingface
21
17
  Requires-Dist: langchain-openai
22
18
  Requires-Dist: litellm>=1.61.15
23
- Requires-Dist: matplotlib>=3.10.3
24
- Requires-Dist: nest-asyncio
25
- Requires-Dist: openai
19
+ Requires-Dist: nest-asyncio>=1.6.0
26
20
  Requires-Dist: opentelemetry-api>=1.34.1
27
21
  Requires-Dist: opentelemetry-sdk>=1.34.1
28
22
  Requires-Dist: orjson>=3.9.0
29
- Requires-Dist: pandas
30
- Requires-Dist: python-dotenv==1.0.1
31
- Requires-Dist: python-slugify>=8.0.4
23
+ Requires-Dist: python-dotenv
32
24
  Requires-Dist: requests
33
- Requires-Dist: together
25
+ Requires-Dist: rich
26
+ Provides-Extra: langchain
27
+ Requires-Dist: langchain-anthropic; extra == 'langchain'
28
+ Requires-Dist: langchain-core; extra == 'langchain'
29
+ Requires-Dist: langchain-huggingface; extra == 'langchain'
30
+ Requires-Dist: langchain-openai; extra == 'langchain'
34
31
  Description-Content-Type: text/markdown
35
32
 
36
33
  <div align="center">
@@ -1,53 +1,56 @@
1
- judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
2
- judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
3
- judgeval/constants.py,sha256=hWed25HwGUJy-tePbtoUZ0_Zg0X_MkAH84KiH-OHHFI,4150
4
- judgeval/dataset.py,sha256=rjV54XNTslNNtf-Uu2ndDIh602ZwSCFhPg2NuckDJ-w,6081
5
- judgeval/evaluation_run.py,sha256=edNpO444Fwt2ykWsflIzlYdDJUlUfbpXHHQSKfFS4y0,2876
6
- judgeval/judgment_client.py,sha256=vPoxbmxAlhbG5rXXqxWjMbyEqOI044BaQanr1fev2CE,11723
1
+ judgeval/__init__.py,sha256=5Lm1JMYFREJGN_8X-Wpruu_ovwGLJ08gCzNAt-u-pQE,419
2
+ judgeval/clients.py,sha256=HHul68PV1om0dxsVZZu90TtCiy5zaqAwph16jXTQzQo,989
3
+ judgeval/constants.py,sha256=UNoTLHgbpZHRInPM2ZaI3m0XokPkee5ILlg20reqhzo,4180
4
+ judgeval/dataset.py,sha256=vOrDKam2I-K1WcVF5IBkQruCDvXTc8PRaFm4-dV0lXs,6220
5
+ judgeval/evaluation_run.py,sha256=FJpnc1sGncmAOAnEUO0n2vNXjlycljGqBdV99qPT5og,3087
6
+ judgeval/judgment_client.py,sha256=tGhENRb2YVIe2WUlcssC8DuEijeUC7Ajj_rh_Dh7bzA,11878
7
+ judgeval/local_eval_queue.py,sha256=fAI0_OlvCr-WOCQWw18C4JIRJHKYzlyGzsGUm8LcsYE,7076
7
8
  judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
8
- judgeval/run_evaluation.py,sha256=7J6FHhWhB-IDPMSOcWkrjTpSNm2v3s_KBq8Np3y2pys,27652
9
+ judgeval/run_evaluation.py,sha256=4kcaw3R_akhxqutGFGTaBS2pqD-3d0ET7zMDL1_7HK4,27741
9
10
  judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
10
11
  judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
11
12
  judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
12
13
  judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
13
14
  judgeval/common/utils.py,sha256=oxGDRVWOICKWeyGgsoc36_yAyHSYF4XtH842Mkznwis,34739
14
15
  judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
15
- judgeval/common/api/api.py,sha256=wty02HYANeOYlM8fHOLc33ux5bu9Ieq7iRqCr-UP0ng,14157
16
- judgeval/common/api/constants.py,sha256=vAW94pbyTS6rv1TKpt7z6xxMJvTaAxFiy1D4kzuLHeg,4567
16
+ judgeval/common/api/api.py,sha256=uuLH6veC0LewfZ1IFiiUi5_OV7zTa7xTIK9LRlLoufc,13743
17
+ judgeval/common/api/constants.py,sha256=DXej0m8HEhb871SdiR8t_o4fzeMoQjHYqb_X0Plj8wY,4577
18
+ judgeval/common/api/json_encoder.py,sha256=XsScZe9hZP56yuxQ-3Ox6K8DcbjWxc2Yq7FcLF9qkUE,5852
17
19
  judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
18
20
  judgeval/common/storage/s3_storage.py,sha256=0-bNKheqJJyBZ92KGrzQtd1zocIRWBlfn_58L4a-Ay0,3719
19
21
  judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
20
22
  judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
21
- judgeval/common/tracer/core.py,sha256=blHEh61CE5kZLYCgyRF4kU6dVzi_Ko6DrnBpw2-jByI,73973
23
+ judgeval/common/tracer/core.py,sha256=rI7P0CaarP5FLQZmOGWpOJkjdf6WUgSds6i_QF04J3M,85071
22
24
  judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
23
25
  judgeval/common/tracer/otel_span_processor.py,sha256=W7SM62KnxJ48vC9WllIHRKaLlvxkCwqYoT4KqZLfGNs,6497
26
+ judgeval/common/tracer/providers.py,sha256=3c3YOtKuoBjlTL0rc2HAGnUpppqvsyzrN5H6EKCqEi0,2733
24
27
  judgeval/common/tracer/span_processor.py,sha256=eFjTgSWSkM6BWE94CrvgafDg_WkxLsFL_MafwBG-p9M,1145
25
- judgeval/common/tracer/span_transformer.py,sha256=nCnwRC52OKfYRFnsOwGdPaqb_U17yn5S_9jfhv1GaLM,7803
28
+ judgeval/common/tracer/span_transformer.py,sha256=mUmfUYjEekUEOXAZMmH0WEF94ge05EBi5ftSc-T91zQ,7314
26
29
  judgeval/common/tracer/trace_manager.py,sha256=ltiXcWC-68DRc8uSa28qHiWRSIBf6NpYOPkZYooR8tg,3086
27
30
  judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
28
31
  judgeval/data/example.py,sha256=kRskIgsjwcvv2Y8jaPwV-PND7zlmMbFsvRVQ_b7SZY0,914
29
- judgeval/data/judgment_types.py,sha256=KE1HrFLfSxiu1zutaiZ7B7La9PGXIAsoWpo_5iy645c,8336
32
+ judgeval/data/judgment_types.py,sha256=1DTpCnIdDM93Rozu9Dr812Q5K3lZfawMcWbPG2ofbxM,8407
30
33
  judgeval/data/result.py,sha256=OtSnBUrdQpjyAqxXRLTW3wC9v9lOm_GqzL14ccRQxrg,2124
31
34
  judgeval/data/scorer_data.py,sha256=5QBHtvOIWOq0Rn9_uPJzAMRYMlWxMB-rXnG_6kV4Z4Y,2955
32
35
  judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
33
- judgeval/data/trace.py,sha256=tDOuYFPUssQInjsmwyxcXq-W3IB29Vq340VzqafuKJc,6942
34
- judgeval/data/trace_run.py,sha256=c6pRSv09Vj016hxM49I3kMftCwWg8hhkfT_1kBXluSI,1600
36
+ judgeval/data/trace.py,sha256=LG-IZksynC1VgfUBuBfIIfR1DT9Bn-sY4vIj6Rc9K6Q,2791
37
+ judgeval/data/trace_run.py,sha256=ZCAzktgOSUPD0p1XQj8qGcF-DdsdQFNZM2dtY0aKGbE,1657
35
38
  judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
36
39
  judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
37
40
  judgeval/integrations/langgraph.py,sha256=kJXLsgBY7DgsUTZyVQ47deDgHm887brFHfyIbuyerGw,29986
38
41
  judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
39
42
  judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
40
- judgeval/judges/litellm_judge.py,sha256=yt6QvwKMmxZcrUtjbn3EiO5aVg7CHM2YZkBCSQLS8jk,2509
41
- judgeval/judges/mixture_of_judges.py,sha256=cecQ8mRmz2-dDoZl2MGsrhZICkpIvRovGPK3su0kc8s,14889
43
+ judgeval/judges/litellm_judge.py,sha256=K9yCGOmozt7sYO0u8CHWyZNi8mXnSR3pPkP8yVsvuRc,2561
44
+ judgeval/judges/mixture_of_judges.py,sha256=iTNjTX4Le1nCwGRm9qfMCv1lQjgqoIw3OE0teiLubwo,14946
42
45
  judgeval/judges/together_judge.py,sha256=5FADUhs6-FN1ZVV_1D3-8_gu9mPbZiG0PYTpme41SfM,2336
43
- judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
46
+ judgeval/judges/utils.py,sha256=_t6oYN9q63wyP7D4jI8X0bNmvVw7OfaE7uMTYDVS14E,2782
44
47
  judgeval/scorers/__init__.py,sha256=4H_cinTQ4EogZv59YEV-3U9EOTLppNwgAPTi1-jI9Fw,746
45
48
  judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
46
49
  judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
47
50
  judgeval/scorers/base_scorer.py,sha256=eDfQk8N8TQfM1ayJDWr0NTdSQxcbk9-VZHd0Igb9EbI,2878
48
51
  judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
49
52
  judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
50
- judgeval/scorers/score.py,sha256=2-M_AmOjIQR2c0qvuB4WIIQD-7zSNdzsWC8ttqltw2g,6601
53
+ judgeval/scorers/score.py,sha256=SWyoqOOvyLpLy39tLyb_Q94sdh9r_IuDv6YNREw52lg,7546
51
54
  judgeval/scorers/utils.py,sha256=HQOYTJtNnsi_aPfMssePAaBbXpAv7LXgwUlWlDFuN2g,3965
52
55
  judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
56
  judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=GX4KkwPR2p-c0Y5mZingJa8EUfjAbMGhrmRBDBunOGw,1484
@@ -58,14 +61,15 @@ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBd
58
61
  judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
59
62
  judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
60
63
  judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
61
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=TS3uZ6YQfMs2yGCwzlz-yxZ3Rid79MGxEQESZkSX_Vo,7038
64
+ judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=nx73DeoVkSqJTP1hYxMsJobG9HVWgMDN5-xFOXt_8Ts,7348
62
65
  judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
63
66
  judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
64
67
  judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
65
68
  judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
69
+ judgeval/utils/async_utils.py,sha256=uNx1SopEc0quSjc8GBQqyba0SmCMAzv2NKIq6xYwttc,989
66
70
  judgeval/utils/file_utils.py,sha256=PWHRs8dUr8iDwpglSSk4Yjd7C6ZhDzUaO-jV3m7riHM,1987
67
71
  judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
68
- judgeval-0.3.2.dist-info/METADATA,sha256=3VDQzNPY0wKnaIpzmrP2OLsk6eEGJZH_dSl4QcJRVwE,10348
69
- judgeval-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
70
- judgeval-0.3.2.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
71
- judgeval-0.3.2.dist-info/RECORD,,
72
+ judgeval-0.5.0.dist-info/METADATA,sha256=wwnunL-UcNKbB7D5t-UnOM_x3DVghU2BBPAVxa0tNfo,10348
73
+ judgeval-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
74
+ judgeval-0.5.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
75
+ judgeval-0.5.0.dist-info/RECORD,,