judgeval 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +2 -0
- judgeval/clients.py +2 -1
- judgeval/common/api/api.py +2 -16
- judgeval/common/api/json_encoder.py +242 -0
- judgeval/common/tracer/core.py +498 -215
- judgeval/common/tracer/providers.py +119 -0
- judgeval/common/tracer/span_transformer.py +14 -25
- judgeval/constants.py +1 -0
- judgeval/data/judgment_types.py +2 -1
- judgeval/data/trace.py +5 -122
- judgeval/data/trace_run.py +2 -1
- judgeval/dataset.py +2 -0
- judgeval/evaluation_run.py +6 -2
- judgeval/judges/litellm_judge.py +2 -1
- judgeval/judges/mixture_of_judges.py +2 -1
- judgeval/judges/utils.py +2 -1
- judgeval/judgment_client.py +11 -6
- judgeval/local_eval_queue.py +192 -0
- judgeval/run_evaluation.py +11 -6
- judgeval/scorers/score.py +33 -11
- judgeval/utils/async_utils.py +36 -0
- {judgeval-0.4.0.dist-info → judgeval-0.5.0.dist-info}/METADATA +9 -12
- {judgeval-0.4.0.dist-info → judgeval-0.5.0.dist-info}/RECORD +25 -21
- {judgeval-0.4.0.dist-info → judgeval-0.5.0.dist-info}/WHEEL +0 -0
- {judgeval-0.4.0.dist-info → judgeval-0.5.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
"""Local evaluation queue for batching custom scorer evaluations.
|
2
|
+
|
3
|
+
This module provides a simple in-memory queue for EvaluationRun objects that contain
|
4
|
+
only local (BaseScorer) scorers. Useful for batching evaluations and processing them
|
5
|
+
either synchronously or in a background thread.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import queue
|
9
|
+
import threading
|
10
|
+
from typing import Callable, List, Optional
|
11
|
+
import time
|
12
|
+
|
13
|
+
from judgeval.common.logger import judgeval_logger
|
14
|
+
from judgeval.constants import MAX_CONCURRENT_EVALUATIONS
|
15
|
+
from judgeval.data import ScoringResult
|
16
|
+
from judgeval.evaluation_run import EvaluationRun
|
17
|
+
from judgeval.utils.async_utils import safe_run_async
|
18
|
+
from judgeval.scorers import BaseScorer
|
19
|
+
from judgeval.scorers.score import a_execute_scoring
|
20
|
+
|
21
|
+
|
22
|
+
class LocalEvaluationQueue:
|
23
|
+
"""Lightweight in-memory queue for local evaluation runs.
|
24
|
+
|
25
|
+
Only supports EvaluationRuns with local scorers (BaseScorer instances).
|
26
|
+
API scorers (APIScorerConfig) are not supported as they have their own queue.
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(
|
30
|
+
self, max_concurrent: int = MAX_CONCURRENT_EVALUATIONS, num_workers: int = 4
|
31
|
+
):
|
32
|
+
if num_workers <= 0:
|
33
|
+
raise ValueError("num_workers must be a positive integer.")
|
34
|
+
self._queue: queue.Queue[Optional[EvaluationRun]] = queue.Queue()
|
35
|
+
self._max_concurrent = max_concurrent
|
36
|
+
self._num_workers = num_workers # Number of worker threads
|
37
|
+
self._worker_threads: List[threading.Thread] = []
|
38
|
+
self._shutdown_event = threading.Event()
|
39
|
+
|
40
|
+
def enqueue(self, evaluation_run: EvaluationRun) -> None:
|
41
|
+
"""Add evaluation run to the queue."""
|
42
|
+
self._queue.put(evaluation_run)
|
43
|
+
|
44
|
+
def _process_run(self, evaluation_run: EvaluationRun) -> List[ScoringResult]:
|
45
|
+
"""Execute evaluation run locally and return results."""
|
46
|
+
local_scorers = [s for s in evaluation_run.scorers if isinstance(s, BaseScorer)]
|
47
|
+
|
48
|
+
if not local_scorers:
|
49
|
+
raise ValueError(
|
50
|
+
"LocalEvaluationQueue only supports runs with local scorers (BaseScorer). "
|
51
|
+
"Found only APIScorerConfig instances."
|
52
|
+
)
|
53
|
+
|
54
|
+
return safe_run_async(
|
55
|
+
a_execute_scoring(
|
56
|
+
evaluation_run.examples,
|
57
|
+
local_scorers,
|
58
|
+
model=evaluation_run.model,
|
59
|
+
throttle_value=0,
|
60
|
+
max_concurrent=self._max_concurrent // self._num_workers,
|
61
|
+
show_progress=False,
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
def run_all(
|
66
|
+
self,
|
67
|
+
callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
|
68
|
+
) -> None:
|
69
|
+
"""Process all queued runs synchronously.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
callback: Optional function called after each run with (run, results).
|
73
|
+
"""
|
74
|
+
while not self._queue.empty():
|
75
|
+
run = self._queue.get()
|
76
|
+
if run is None: # Sentinel for worker shutdown
|
77
|
+
self._queue.put(None)
|
78
|
+
break
|
79
|
+
results = self._process_run(run)
|
80
|
+
if callback:
|
81
|
+
callback(run, results)
|
82
|
+
self._queue.task_done()
|
83
|
+
|
84
|
+
def start_workers(
|
85
|
+
self,
|
86
|
+
callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
|
87
|
+
) -> List[threading.Thread]:
|
88
|
+
"""Start multiple background threads to process runs in parallel.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
callback: Optional function called after each run with (run, results).
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
List of started worker threads.
|
95
|
+
"""
|
96
|
+
|
97
|
+
def _worker(worker_id: int) -> None:
|
98
|
+
while not self._shutdown_event.is_set():
|
99
|
+
try:
|
100
|
+
# Use timeout so workers can check shutdown event periodically
|
101
|
+
run = self._queue.get(timeout=1.0)
|
102
|
+
if run is None: # Sentinel to stop worker
|
103
|
+
# Put sentinel back for other workers
|
104
|
+
self._queue.put(None)
|
105
|
+
self._queue.task_done()
|
106
|
+
break
|
107
|
+
|
108
|
+
try:
|
109
|
+
results = self._process_run(run)
|
110
|
+
if callback:
|
111
|
+
callback(run, results)
|
112
|
+
except Exception as exc:
|
113
|
+
judgeval_logger.error(
|
114
|
+
f"Worker {worker_id} error processing {run.eval_name}: {exc}"
|
115
|
+
)
|
116
|
+
# Continue processing other runs instead of shutting down all workers
|
117
|
+
finally:
|
118
|
+
self._queue.task_done()
|
119
|
+
|
120
|
+
except queue.Empty:
|
121
|
+
# Timeout - check shutdown event and continue
|
122
|
+
continue
|
123
|
+
|
124
|
+
# Start worker threads
|
125
|
+
for i in range(self._num_workers):
|
126
|
+
thread = threading.Thread(target=_worker, args=(i,), daemon=True)
|
127
|
+
thread.start()
|
128
|
+
self._worker_threads.append(thread)
|
129
|
+
|
130
|
+
return self._worker_threads
|
131
|
+
|
132
|
+
def start_worker(
|
133
|
+
self,
|
134
|
+
callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
|
135
|
+
) -> Optional[threading.Thread]:
|
136
|
+
"""Start a single background thread to process runs (backward compatibility).
|
137
|
+
|
138
|
+
Args:
|
139
|
+
callback: Optional function called after each run with (run, results).
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
The started thread, or None if no threads were started.
|
143
|
+
"""
|
144
|
+
threads = self.start_workers(callback)
|
145
|
+
return threads[0] if threads else None
|
146
|
+
|
147
|
+
def wait_for_completion(self, timeout: Optional[float] = None) -> bool:
|
148
|
+
"""Wait for all queued tasks to complete.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
timeout: Maximum time to wait in seconds. None means wait indefinitely.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
True if all tasks completed, False if timeout occurred.
|
155
|
+
"""
|
156
|
+
try:
|
157
|
+
if timeout is None:
|
158
|
+
self._queue.join()
|
159
|
+
return True
|
160
|
+
else:
|
161
|
+
start_time = time.time()
|
162
|
+
while not self._queue.empty() or self._queue.unfinished_tasks > 0:
|
163
|
+
if time.time() - start_time > timeout:
|
164
|
+
return False
|
165
|
+
time.sleep(0.1)
|
166
|
+
return True
|
167
|
+
except Exception:
|
168
|
+
return False
|
169
|
+
|
170
|
+
def stop_workers(self) -> None:
|
171
|
+
"""Signal all background workers to stop after current tasks complete."""
|
172
|
+
if not self._worker_threads:
|
173
|
+
return
|
174
|
+
|
175
|
+
# Signal shutdown
|
176
|
+
self._shutdown_event.set()
|
177
|
+
|
178
|
+
# Send sentinel to wake up any blocking workers
|
179
|
+
for _ in range(self._num_workers):
|
180
|
+
self._queue.put(None)
|
181
|
+
|
182
|
+
# Wait for all workers to finish with timeout
|
183
|
+
for thread in self._worker_threads:
|
184
|
+
if thread.is_alive():
|
185
|
+
thread.join(timeout=5.0)
|
186
|
+
if thread.is_alive():
|
187
|
+
judgeval_logger.warning(
|
188
|
+
f"Worker thread {thread.name} did not shut down gracefully"
|
189
|
+
)
|
190
|
+
|
191
|
+
self._worker_threads.clear()
|
192
|
+
self._shutdown_event.clear()
|
judgeval/run_evaluation.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import asyncio
|
2
4
|
import concurrent.futures
|
3
5
|
import time
|
4
6
|
import orjson
|
5
7
|
import sys
|
6
8
|
import threading
|
7
|
-
from typing import List, Dict, Union, Optional, Callable, Tuple, Any
|
9
|
+
from typing import List, Dict, Union, Optional, Callable, Tuple, Any, TYPE_CHECKING
|
8
10
|
from rich import print as rprint
|
9
11
|
|
10
12
|
from judgeval.data import ScorerData, ScoringResult, Example, Trace
|
@@ -17,10 +19,13 @@ from judgeval.constants import (
|
|
17
19
|
from judgeval.common.exceptions import JudgmentAPIError
|
18
20
|
from judgeval.common.api.api import JudgmentAPIException
|
19
21
|
from judgeval.common.logger import judgeval_logger
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
from judgeval.
|
22
|
+
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from judgeval.common.tracer import Tracer
|
26
|
+
from judgeval.data.trace_run import TraceRun
|
27
|
+
from judgeval.evaluation_run import EvaluationRun
|
28
|
+
from judgeval.integrations.langgraph import JudgevalCallbackHandler
|
24
29
|
|
25
30
|
|
26
31
|
def safe_run_async(coro):
|
@@ -282,7 +287,7 @@ def run_trace_eval(
|
|
282
287
|
judgment_api_key: str,
|
283
288
|
override: bool = False,
|
284
289
|
function: Optional[Callable] = None,
|
285
|
-
tracer: Optional[Union[Tracer, JudgevalCallbackHandler]] = None,
|
290
|
+
tracer: Optional[Union[Tracer, "JudgevalCallbackHandler"]] = None,
|
286
291
|
examples: Optional[List[Example]] = None,
|
287
292
|
) -> List[ScoringResult]:
|
288
293
|
# Call endpoint to check to see if eval run name exists (if we DON'T want to override and DO want to log results)
|
judgeval/scorers/score.py
CHANGED
@@ -17,6 +17,7 @@ from judgeval.scorers import BaseScorer
|
|
17
17
|
from judgeval.scorers.utils import clone_scorers
|
18
18
|
from judgeval.common.logger import judgeval_logger
|
19
19
|
from judgeval.judges import JudgevalJudge
|
20
|
+
from judgeval.constants import DEFAULT_GPT_MODEL
|
20
21
|
|
21
22
|
|
22
23
|
async def safe_a_score_example(
|
@@ -55,10 +56,11 @@ async def safe_a_score_example(
|
|
55
56
|
async def a_execute_scoring(
|
56
57
|
examples: List[Example],
|
57
58
|
scorers: List[BaseScorer],
|
58
|
-
model: Optional[Union[str, List[str], JudgevalJudge]] =
|
59
|
+
model: Optional[Union[str, List[str], JudgevalJudge]] = DEFAULT_GPT_MODEL,
|
59
60
|
ignore_errors: bool = False,
|
60
61
|
throttle_value: int = 0,
|
61
62
|
max_concurrent: int = 100,
|
63
|
+
show_progress: bool = True,
|
62
64
|
) -> List[ScoringResult]:
|
63
65
|
"""
|
64
66
|
Executes evaluations of `Example`s asynchronously using one or more `BaseScorer`s.
|
@@ -71,8 +73,7 @@ async def a_execute_scoring(
|
|
71
73
|
ignore_errors (bool): Whether to ignore errors during evaluation.
|
72
74
|
throttle_value (int): The amount of time to wait between starting each task.
|
73
75
|
max_concurrent (int): The maximum number of concurrent tasks.
|
74
|
-
|
75
|
-
_use_bar_indicator (bool): Whether to use a progress bar indicator.
|
76
|
+
show_progress (bool): Whether to show the progress bar indicator.
|
76
77
|
|
77
78
|
Returns:
|
78
79
|
List[ScoringResult]: A list of `ScoringResult` objects containing the evaluation results.
|
@@ -101,16 +102,37 @@ async def a_execute_scoring(
|
|
101
102
|
tasks = []
|
102
103
|
cloned_scorers: List[BaseScorer]
|
103
104
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
105
|
+
if show_progress:
|
106
|
+
with tqdm_asyncio(
|
107
|
+
desc=f"Evaluating {len(examples)} example(s) in parallel",
|
108
|
+
unit="Example",
|
109
|
+
total=len(examples),
|
110
|
+
bar_format="{desc}: |{bar}|{percentage:3.0f}% ({n_fmt}/{total_fmt}) [Time Taken: {elapsed}, {rate_fmt}{postfix}]",
|
111
|
+
) as pbar:
|
112
|
+
for i, ex in enumerate(examples):
|
113
|
+
if isinstance(ex, Example):
|
114
|
+
if len(scorers) == 0:
|
115
|
+
pbar.update(1)
|
116
|
+
continue
|
117
|
+
|
118
|
+
cloned_scorers = clone_scorers(scorers)
|
119
|
+
task = execute_with_semaphore(
|
120
|
+
func=a_eval_examples_helper,
|
121
|
+
scorers=cloned_scorers,
|
122
|
+
example=ex,
|
123
|
+
scoring_results=scoring_results,
|
124
|
+
score_index=i,
|
125
|
+
ignore_errors=ignore_errors,
|
126
|
+
pbar=pbar,
|
127
|
+
)
|
128
|
+
tasks.append(asyncio.create_task(task))
|
129
|
+
|
130
|
+
await asyncio.sleep(throttle_value)
|
131
|
+
await asyncio.gather(*tasks)
|
132
|
+
else:
|
110
133
|
for i, ex in enumerate(examples):
|
111
134
|
if isinstance(ex, Example):
|
112
135
|
if len(scorers) == 0:
|
113
|
-
pbar.update(1)
|
114
136
|
continue
|
115
137
|
|
116
138
|
cloned_scorers = clone_scorers(scorers)
|
@@ -121,7 +143,7 @@ async def a_execute_scoring(
|
|
121
143
|
scoring_results=scoring_results,
|
122
144
|
score_index=i,
|
123
145
|
ignore_errors=ignore_errors,
|
124
|
-
pbar=
|
146
|
+
pbar=None,
|
125
147
|
)
|
126
148
|
tasks.append(asyncio.create_task(task))
|
127
149
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
"""Async utilities for judgeval."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import concurrent.futures
|
5
|
+
from typing import Awaitable, TypeVar
|
6
|
+
|
7
|
+
|
8
|
+
# Generic type variable for coroutine return type
|
9
|
+
T = TypeVar("T")
|
10
|
+
|
11
|
+
|
12
|
+
def safe_run_async(coro: Awaitable[T]) -> T: # type: ignore[type-var]
|
13
|
+
"""Safely execute an async *coro* from synchronous code.
|
14
|
+
|
15
|
+
This helper handles two common situations:
|
16
|
+
|
17
|
+
1. **No running event loop** – Simply delegates to ``asyncio.run``.
|
18
|
+
2. **Existing running loop** – Executes the coroutine in a separate
|
19
|
+
thread so that we don't attempt to nest event loops (which would raise
|
20
|
+
``RuntimeError``).
|
21
|
+
|
22
|
+
Args:
|
23
|
+
coro: The coroutine to execute.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
The result returned by *coro*.
|
27
|
+
"""
|
28
|
+
|
29
|
+
try:
|
30
|
+
asyncio.get_running_loop()
|
31
|
+
except RuntimeError:
|
32
|
+
return asyncio.run(coro)
|
33
|
+
|
34
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
35
|
+
future = executor.submit(lambda: asyncio.run(coro))
|
36
|
+
return future.result()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -10,27 +10,24 @@ License-File: LICENSE.md
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
12
|
Requires-Python: >=3.11
|
13
|
-
Requires-Dist: anthropic
|
14
13
|
Requires-Dist: boto3
|
15
|
-
Requires-Dist: datamodel-code-generator>=0.31.1
|
16
|
-
Requires-Dist: google-genai
|
17
|
-
Requires-Dist: groq>=0.30.0
|
18
14
|
Requires-Dist: langchain-anthropic
|
19
15
|
Requires-Dist: langchain-core
|
20
16
|
Requires-Dist: langchain-huggingface
|
21
17
|
Requires-Dist: langchain-openai
|
22
18
|
Requires-Dist: litellm>=1.61.15
|
23
|
-
Requires-Dist:
|
24
|
-
Requires-Dist: nest-asyncio
|
25
|
-
Requires-Dist: openai
|
19
|
+
Requires-Dist: nest-asyncio>=1.6.0
|
26
20
|
Requires-Dist: opentelemetry-api>=1.34.1
|
27
21
|
Requires-Dist: opentelemetry-sdk>=1.34.1
|
28
22
|
Requires-Dist: orjson>=3.9.0
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist: python-dotenv==1.0.1
|
31
|
-
Requires-Dist: python-slugify>=8.0.4
|
23
|
+
Requires-Dist: python-dotenv
|
32
24
|
Requires-Dist: requests
|
33
|
-
Requires-Dist:
|
25
|
+
Requires-Dist: rich
|
26
|
+
Provides-Extra: langchain
|
27
|
+
Requires-Dist: langchain-anthropic; extra == 'langchain'
|
28
|
+
Requires-Dist: langchain-core; extra == 'langchain'
|
29
|
+
Requires-Dist: langchain-huggingface; extra == 'langchain'
|
30
|
+
Requires-Dist: langchain-openai; extra == 'langchain'
|
34
31
|
Description-Content-Type: text/markdown
|
35
32
|
|
36
33
|
<div align="center">
|
@@ -1,53 +1,56 @@
|
|
1
|
-
judgeval/__init__.py,sha256=
|
2
|
-
judgeval/clients.py,sha256=
|
3
|
-
judgeval/constants.py,sha256=
|
4
|
-
judgeval/dataset.py,sha256=
|
5
|
-
judgeval/evaluation_run.py,sha256=
|
6
|
-
judgeval/judgment_client.py,sha256=
|
1
|
+
judgeval/__init__.py,sha256=5Lm1JMYFREJGN_8X-Wpruu_ovwGLJ08gCzNAt-u-pQE,419
|
2
|
+
judgeval/clients.py,sha256=HHul68PV1om0dxsVZZu90TtCiy5zaqAwph16jXTQzQo,989
|
3
|
+
judgeval/constants.py,sha256=UNoTLHgbpZHRInPM2ZaI3m0XokPkee5ILlg20reqhzo,4180
|
4
|
+
judgeval/dataset.py,sha256=vOrDKam2I-K1WcVF5IBkQruCDvXTc8PRaFm4-dV0lXs,6220
|
5
|
+
judgeval/evaluation_run.py,sha256=FJpnc1sGncmAOAnEUO0n2vNXjlycljGqBdV99qPT5og,3087
|
6
|
+
judgeval/judgment_client.py,sha256=tGhENRb2YVIe2WUlcssC8DuEijeUC7Ajj_rh_Dh7bzA,11878
|
7
|
+
judgeval/local_eval_queue.py,sha256=fAI0_OlvCr-WOCQWw18C4JIRJHKYzlyGzsGUm8LcsYE,7076
|
7
8
|
judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
|
8
|
-
judgeval/run_evaluation.py,sha256=
|
9
|
+
judgeval/run_evaluation.py,sha256=4kcaw3R_akhxqutGFGTaBS2pqD-3d0ET7zMDL1_7HK4,27741
|
9
10
|
judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
|
10
11
|
judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
|
11
12
|
judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
|
12
13
|
judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
|
13
14
|
judgeval/common/utils.py,sha256=oxGDRVWOICKWeyGgsoc36_yAyHSYF4XtH842Mkznwis,34739
|
14
15
|
judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
|
15
|
-
judgeval/common/api/api.py,sha256=
|
16
|
+
judgeval/common/api/api.py,sha256=uuLH6veC0LewfZ1IFiiUi5_OV7zTa7xTIK9LRlLoufc,13743
|
16
17
|
judgeval/common/api/constants.py,sha256=DXej0m8HEhb871SdiR8t_o4fzeMoQjHYqb_X0Plj8wY,4577
|
18
|
+
judgeval/common/api/json_encoder.py,sha256=XsScZe9hZP56yuxQ-3Ox6K8DcbjWxc2Yq7FcLF9qkUE,5852
|
17
19
|
judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
|
18
20
|
judgeval/common/storage/s3_storage.py,sha256=0-bNKheqJJyBZ92KGrzQtd1zocIRWBlfn_58L4a-Ay0,3719
|
19
21
|
judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
|
20
22
|
judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
|
21
|
-
judgeval/common/tracer/core.py,sha256=
|
23
|
+
judgeval/common/tracer/core.py,sha256=rI7P0CaarP5FLQZmOGWpOJkjdf6WUgSds6i_QF04J3M,85071
|
22
24
|
judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
|
23
25
|
judgeval/common/tracer/otel_span_processor.py,sha256=W7SM62KnxJ48vC9WllIHRKaLlvxkCwqYoT4KqZLfGNs,6497
|
26
|
+
judgeval/common/tracer/providers.py,sha256=3c3YOtKuoBjlTL0rc2HAGnUpppqvsyzrN5H6EKCqEi0,2733
|
24
27
|
judgeval/common/tracer/span_processor.py,sha256=eFjTgSWSkM6BWE94CrvgafDg_WkxLsFL_MafwBG-p9M,1145
|
25
|
-
judgeval/common/tracer/span_transformer.py,sha256=
|
28
|
+
judgeval/common/tracer/span_transformer.py,sha256=mUmfUYjEekUEOXAZMmH0WEF94ge05EBi5ftSc-T91zQ,7314
|
26
29
|
judgeval/common/tracer/trace_manager.py,sha256=ltiXcWC-68DRc8uSa28qHiWRSIBf6NpYOPkZYooR8tg,3086
|
27
30
|
judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
|
28
31
|
judgeval/data/example.py,sha256=kRskIgsjwcvv2Y8jaPwV-PND7zlmMbFsvRVQ_b7SZY0,914
|
29
|
-
judgeval/data/judgment_types.py,sha256=
|
32
|
+
judgeval/data/judgment_types.py,sha256=1DTpCnIdDM93Rozu9Dr812Q5K3lZfawMcWbPG2ofbxM,8407
|
30
33
|
judgeval/data/result.py,sha256=OtSnBUrdQpjyAqxXRLTW3wC9v9lOm_GqzL14ccRQxrg,2124
|
31
34
|
judgeval/data/scorer_data.py,sha256=5QBHtvOIWOq0Rn9_uPJzAMRYMlWxMB-rXnG_6kV4Z4Y,2955
|
32
35
|
judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
|
33
|
-
judgeval/data/trace.py,sha256=
|
34
|
-
judgeval/data/trace_run.py,sha256=
|
36
|
+
judgeval/data/trace.py,sha256=LG-IZksynC1VgfUBuBfIIfR1DT9Bn-sY4vIj6Rc9K6Q,2791
|
37
|
+
judgeval/data/trace_run.py,sha256=ZCAzktgOSUPD0p1XQj8qGcF-DdsdQFNZM2dtY0aKGbE,1657
|
35
38
|
judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
|
36
39
|
judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
|
37
40
|
judgeval/integrations/langgraph.py,sha256=kJXLsgBY7DgsUTZyVQ47deDgHm887brFHfyIbuyerGw,29986
|
38
41
|
judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
|
39
42
|
judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
|
40
|
-
judgeval/judges/litellm_judge.py,sha256=
|
41
|
-
judgeval/judges/mixture_of_judges.py,sha256=
|
43
|
+
judgeval/judges/litellm_judge.py,sha256=K9yCGOmozt7sYO0u8CHWyZNi8mXnSR3pPkP8yVsvuRc,2561
|
44
|
+
judgeval/judges/mixture_of_judges.py,sha256=iTNjTX4Le1nCwGRm9qfMCv1lQjgqoIw3OE0teiLubwo,14946
|
42
45
|
judgeval/judges/together_judge.py,sha256=5FADUhs6-FN1ZVV_1D3-8_gu9mPbZiG0PYTpme41SfM,2336
|
43
|
-
judgeval/judges/utils.py,sha256=
|
46
|
+
judgeval/judges/utils.py,sha256=_t6oYN9q63wyP7D4jI8X0bNmvVw7OfaE7uMTYDVS14E,2782
|
44
47
|
judgeval/scorers/__init__.py,sha256=4H_cinTQ4EogZv59YEV-3U9EOTLppNwgAPTi1-jI9Fw,746
|
45
48
|
judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
|
46
49
|
judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
|
47
50
|
judgeval/scorers/base_scorer.py,sha256=eDfQk8N8TQfM1ayJDWr0NTdSQxcbk9-VZHd0Igb9EbI,2878
|
48
51
|
judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
|
49
52
|
judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
|
50
|
-
judgeval/scorers/score.py,sha256=
|
53
|
+
judgeval/scorers/score.py,sha256=SWyoqOOvyLpLy39tLyb_Q94sdh9r_IuDv6YNREw52lg,7546
|
51
54
|
judgeval/scorers/utils.py,sha256=HQOYTJtNnsi_aPfMssePAaBbXpAv7LXgwUlWlDFuN2g,3965
|
52
55
|
judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
56
|
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=GX4KkwPR2p-c0Y5mZingJa8EUfjAbMGhrmRBDBunOGw,1484
|
@@ -63,9 +66,10 @@ judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMN
|
|
63
66
|
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
|
64
67
|
judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
|
65
68
|
judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
|
69
|
+
judgeval/utils/async_utils.py,sha256=uNx1SopEc0quSjc8GBQqyba0SmCMAzv2NKIq6xYwttc,989
|
66
70
|
judgeval/utils/file_utils.py,sha256=PWHRs8dUr8iDwpglSSk4Yjd7C6ZhDzUaO-jV3m7riHM,1987
|
67
71
|
judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
|
68
|
-
judgeval-0.
|
69
|
-
judgeval-0.
|
70
|
-
judgeval-0.
|
71
|
-
judgeval-0.
|
72
|
+
judgeval-0.5.0.dist-info/METADATA,sha256=wwnunL-UcNKbB7D5t-UnOM_x3DVghU2BBPAVxa0tNfo,10348
|
73
|
+
judgeval-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
74
|
+
judgeval-0.5.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
75
|
+
judgeval-0.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|